



/*

主要内容：
	降雨描述性统计
	2015-2017小时数据: 描述性统计&实证分析
用到的数据：weather, data2015, data2016, data2017, hour_info
生成的表格&图: 表2 Panel A, 表2 Panel C, 表3

*/



// 降雨的描述性统计: 表2 Panel C
use "weather.dta", clear
/*
hour_variable: 天-小时的唯一标识，示例: "20150101"
temp: 小时温度（摄氏度）
rain: 小时降雨量（mm）
*/
sum rain if rain > 0  ///
& date(substr(hour_variable,1,8), "YMD") >= date("20150101", "YMD") ///
& date(substr(hour_variable,1,8),"YMD") <= date("20171231", "YMD")
sum rain if rain > 0 ///
& date(substr(hour_variable,1,8), "YMD") >= date("20190616", "YMD") ///
& date(substr(hour_variable,1,8), "YMD") <= date("20190622", "YMD")




// 运行数据处理脚本 data_process_2015_2017.do
do data_process_2015_2017.do


// 循环处理2015到2017年的原始数据，结构化为小时数据
forvalues i = 2015/2017{
	use "data`i'.dta", clear
	data_structured_2015_2017
	save "data`i'_hour.dta"

}

* 合并后数据
use "data2015_hour.dta", clear
append using "data2016_hour.dta"
append using "data2017_hour.dta"
* 为合并后的数据进行标记处理
data_flag_2015_2017


// 描述性统计：表2 Panel A
sum hour_income hour_duration hour_distance if flag != 1


// 实证分析：表3
* 由驾驶员-小时信息转换为小时信息：小时工作人数、平均收入、平均载客时长、平均速度
gen hour_count = 1
bysort hour_variable: egen total_hour_count = sum(hour_count)
bysort hour_variable: egen mean_hour_income = mean(hour_income)
bysort hour_variable: egen mean_hour_duration = mean(hour_duration)
bysort hour_variable: egen mean_hour_velocity = mean(hour_velocity)
duplicates drop hour_variable, force
keep hour_variable total_hour_count mean_hour_income mean_hour_duration mean_hour_velocity

gen lntotal_hour_count = log(total_hour_count)
gen lnmean_hour_income = log(mean_hour_income)
gen lnmean_hour_duration = log(mean_hour_duration)
gen lnmean_hour_velocity = log(mean_hour_velocity)

* 与天气信息结合
merge 1:1 hour_variable using "weather.dta"
keep if _merge==3
drop _merge
* 与小时信息结合（是否为节假日、是否为调价后）
merge 1:1 hour_variable using "hour_info.dta"
keep if _merge==3
drop _merge


* 生成控制变量
gen year = substr(hour_variable,1,4)
gen month = substr(hour_variable, 5,2)
gen hour = substr(hour_variable,-2,2)
destring year month hour, replace
gen day = date(substr(hour_variable,1,8),"YMD")
gen week_day = dow(day)
gen temp_gap = abs(temp-20)/10  // 温度与20度之差的绝对值/10
gen whether_rain = rain > 0  // 是否降雨

* 控制变量
local cv1  i.hour i.week_day i.month  i.year price_adj holiday whether_rain temp_gap


* 回归过程
reg lntotal_hour_count whether_rain temp_gap `cv1' , r
est store m31
outreg2 using table3.doc, replace  bdec(3) tdec(2) keep(whether_rain temp_gap)

reg lnmean_hour_income whether_rain temp_gap `cv1' , r
est store m32
outreg2 using table3.doc, append  bdec(3) tdec(2) keep(whether_rain temp_gap)


reg lnmean_hour_duration whether_rain temp_gap `cv1' , r
est store m33
outreg2 using table3.doc, append  bdec(3) tdec(2) keep(whether_rain temp_gap)

reg lnmean_hour_velocity whether_rain temp_gap `cv1' , r
est store m34
outreg2 using table3.doc, append  bdec(3) tdec(2) keep(whether_rain temp_gap)



