


/*

2019年网约车&巡游车原始数据介绍：
driver_id: 驾驶员的唯一标识，示例："AAAFSD"
deptime: 行程开始时间，示例："2019/6/17 7:18:51"
desttime: 行程结束时间, 示例："2019/6/17 7:48:43"
fare: 行程金额（元）
distance: 行程里程（公里）


结构化处理步骤如下：
step1: 根据开始、结束时间生成行程时长、行程间隔时长
step2: 根据行程间隔时长的标准划分班次
step3: 构造班次信息

*/




program trip_info
    
	// 生成行程时长和间隔时长的信息
   
	* 将 deptime 和 desttime 转换为 Stata 日期时间格式
	gen start_datetime = clock(deptime, "YMDhms")
	gen end_datetime = clock(desttime, "YMDhms")


	* 确保变量是正确的时间格式
	format start_datetime %tc
	format end_datetime %tc


	* 生成行程时长
	gen double trip_duration = (end_datetime - start_datetime)/3600000


	* 定义行程间隔时长：当前行程结束至下一个行程开始的时间
	sort driver_id start_datetime
	gen double trip_gap = .
	count
	local n = r(N)
	forvalues i = 1 / `n'{
		local j = `i' + 1
		if driver_id[`j'] == driver_id[`i'] {
			local a = (start_datetime[`j'] - end_datetime[`i'])/3600000
			qui replace trip_gap = `a' in `i'
		}
	}

end




program divide_shift
	args shift_gap

	// 定义停止班次的劳动供给决策，并对shift编号
	sort driver_id end_datetime
	gen whether_stop = 0
	gen shift_id = 1

	count
	local n = r(N)
	forvalues i = 2/ `n' {	
		local j = `i' - 1

		if driver_id[`i'] != driver_id[`j']{
			qui replace whether_stop = 1 in `j'
			qui replace shift_id = shift_id[`j'] + 1 in `i'   // 驾驶员变化，shift_id +1
		}
		
		if driver_id[`i'] == driver_id[`j']{
			if trip_gap[`j'] < `shift_gap' {
				qui replace whether_stop = 0 in `j'
				qui replace shift_id = shift_id[`j']  in `i'
			}
			
			if trip_gap[`j'] >= `shift_gap' {
				qui replace whether_stop = 1 in `j'
				qui replace shift_id = shift_id[`j'] + 1  in `i'  // 行程间隔时间>=5，shift_id +1
			}	
		}
	}

end


program shift_info

	// 构造shift信息
	
	* 对shift内的trip进行编号
	bysort shift_id: gen trip_id = _n

	* 构造shift信息
	bysort shift_id: egen shift_start = min(start_datetime)
	bysort shift_id: egen shift_end = max(end_datetime)
	gen shift_duration = (shift_end - shift_start)/3600000
	bysort shift_id: egen shift_income = sum(fare)
	bysort shift_id: egen shift_distance = sum(distance)

	* 构造shift中累积工作时长、累积收入的信息
	sort shift_id trip_id
	bysort shift_id (trip_id): gen shift_acc_income = sum(fare)
	gen shift_acc_duration = (end_datetime - shift_start)/3600000


end 



program shift_flag


	// 对极端值、异常值进行flag标记
	
	gen flag = 0
	
	gen trip_wage = fare/trip_duration
	gen trip_velocity = distance/trip_duration
	gen shift_wage = shift_income/shift_duration
	gen shift_velocity = shift_distance/shift_duration


	foreach var in trip_duration  trip_wage {
		_pctile `var',p(0.1, 99.9)   
		return list
		replace flag = 1 if `var' <= r(r1) | `var' >= r(r2)
	}


	_pctile trip_velocity , p(0.1, 99.9) 
	return list
	replace flag = 1 if trip_velocity > r(r2)

	_pctile shift_velocity if trip_id == 1, p(0.1, 99.9) 
	return list
	replace flag = 1 if shift_velocity > r(r2)



	_pctile shift_wage if trip_id == 1, p(1, 99) 
	return list
	replace flag = 1 if shift_wage < r(r1) | shift_wage > r(r2)


	_pctile shift_duration if trip_id == 1, p(1, 99) 
	return list
	replace flag = 1 if shift_duration < r(r1) | shift_duration > r(r2)



end




program gen_group_var

    // 对累积收入、累积工作时长设置分组虚拟变量
	gen base_group_accincome1 = shift_acc_income < 1.5
	forvalues i = 1/9{
		local j = `i'+ 1
		local a = 1.50 * `i'
		local b = 1.50 * (`i'+ 1)
		gen group_accincome`j' = (shift_acc_income>= `a' & shift_acc_income <`b')	
	}
	gen group_accincome11 = shift_acc_income >= 15


	gen base_group_accduration1 = shift_acc_duration < 2
	forvalues i = 1/9{
		local j = `i' + 1
		local a = 2 * `i'
		local b = 2 * (`i'+ 1)
		gen group_accduration`j' =  (shift_acc_duration>= `a' & shift_acc_duration <`b')		
	}
	gen group_accduration11 = shift_acc_duration >= 20
end

program match_rain
	
	// 匹配降雨数据
	split desttime
	split desttime1, p("/")
	split desttime2, p(":")
	gen hour_variable = desttime11 + substr("00"+desttime12, -2,2) +  substr("00"+desttime13, -2,2) + substr("00"+desttime21, -2,2)
	merge m:1 hour_variable using "weather.dta"
	drop if _merge == 2
	gen whether_rain = rain > 0

end



program gen_conrol

	// 生成控制变量
	gen dest_date = date(desttime1, "YMD")
	gen dest_week = dow(dest_date)
	gen dest_weekday = dest_week != 0 & dest_week != 6
	gen dest_hour = real(desttime21)

end

