


/*

主要内容：利用2019小时数据，进行稳健性分析
生成的表格&图：表6 Panel A, 表6 Panel B, 表V1, 表VI1, 图V1

*/


// 区分全职和兼职驾驶员

use "网约&巡游_小时_实证样本.dta", replace


* 创建一个新的变量来表示小时序号
gen hour_str = substr(hour_variable, 9, 2)
gen day_str = substr(hour_variable, 7, 2)
gen hour_sequence = (real(day_str) - 16) * 24 + real(hour_str) + 1

* 转换数据结构
replace hour_duration = 0 if hour_duration == .
keep driver_id  hour_duration wangyue hour_sequence 
reshape wide hour_duration, i(driver_id wangyue) j(hour_sequence)
save "网约&巡游_聚类样本", replace


* 利用168维小时信息进行聚类
keep if wangyue == 1
cluster kmeans hour_duration1-hour_duration168 , k(4) name(kmeans4)
keep driver_id wangyue  kmeans4 
save "网约_聚类类型", replace

* 网约车类型与小时样本合并
merge 1:m driver_id using "网约&巡游_小时.dta"    
keep if _merge==3


* 画图找到全职类型
bysort kmeans4:  egen count_kmeans4 = nvals(driver_id)                        // 每一类型的驾驶员个数
bysort kmeans4  hour_variable: egen count_kmeans4_hour = count(driver_id)     // 每一类型、每一小时的驾驶员个数
gen freq_kmeans4 = count_kmeans4_hour / count_kmeans4                  // 每一类型中，每个小时工作的驾驶员个数
		
		
forvalues j = 1/4{
			 
	gen kmeans4_`j' = `j'                                                // 某一个类型单独考虑
	gen freq_kmeans4_`j'_test = 0
	replace freq_kmeans4_`j'_test = freq_kmeans4 if kmeans4_`j' == kmeans4      // 这种类型每小时的比例
			
	bysort hour_variable: egen freq_kmeans4_`j' = max(freq_kmeans4_`j'_test)		
	drop kmeans4_`j'  freq_kmeans4_`j'_test
				
}	



duplicates drop hour_variable, force
sort hour_variable
gen hour2 = _n


forvalues i = 1/4{

line freq_kmeans4_`i' hour2, ///
xlabel(1 "Sun" 25 "Mon"  49 "Tue" 73 "Wed"  97 "Thu" 121 "Fri" 145 "Sat", labsize(large)) ///
xtitle("")  ///
ytitle("Fraction",size(large))  ///
yscale(range(0 1)) ylabel(0 (0.2) 1)  ///
title("Type`i'",size(*1.5))   scheme(s1mono)  ///
saving(ridehailing`i', replace) aspectratio(0.3) ysize(2)


}


graph combine ridehailing1.gph ridehailing2.gph ridehailing3.gph ridehailing4.gph, ///
rows(2) ///
title("网约车聚类类型")  ///
scheme(s1mono)  ///
ysize(3)  name("type_online", replace)

graph export "type_online.png", as(png) name("type_online")  replace





* 从图中看出第3类为全职驾驶员 (每次聚类结束，全职所对应的类型数字有可能不一样)
local i = 3
use "网约_聚类类型", clear
keep if kmeans4 == `i'
merge 1:1 driver_id using "网约&巡游_聚类样本"
keep if wangyue == 0 | (wangyue == 1 & _merge == 3)
drop _merge





* 去掉下雨的周一、二、四，用剩下四天全样本，全职网约车与巡游车聚类   
cluster kmeans hour_duration1-hour_duration24 hour_duration73-hour_duration96 hour_duration121-hour_duration144 hour_duration145-hour_duration168 , k(4) name(day4kmeans4)


* 去掉下雨的周一、二、四，以及周三、周五的高峰期，全职网约车与巡游车聚类 
cluster kmeans hour_duration1-hour_duration24 ///
hour_duration73-hour_duration78 hour_duration82-hour_duration88 hour_duration91-hour_duration96  ///
hour_duration121-hour_duration126  hour_duration130-hour_duration136  hour_duration139-hour_duration144   ///  
hour_duration145-hour_duration168 , k(4) name(day4_kmeans4)

* 找到大部分全职网约车与巡游车匹配的类别
ta wangyue day4kmeans4 
ta wangyue day4_kmeans4 



merge 1:m driver_id using "网约&巡游_小时_实证样本"
keep if _merge==3



// 全职驾驶员样本回归

* 将driver_id转为数值型变量
encode driver_id, gen(driver_id_num)
* 将hourvariable转换为Stata的datetime格式
gen double hour_variable1 = clock(hour_variable, "YMDh")
format hour_variable1 %tc
* 设置面板数据结构
xtset driver_id_num  hour_variable1, delta(1 hour)


* day4kmeans4 == 4 (每次聚类后数字有可能不一样，由大部分全职网约车与巡游车匹配的类别决定)
reghdfe whether_work whether_rain  c.whether_rain#c.wangyue  ///
    i.hour if weekday == 1 & flag != 1 ///
	& day4kmeans4 == 4, absorb(driver_id week) vce(cluster driver_id)
est store m61
outreg2 using result6.doc, replace  bdec(3) tdec(2) keep(whether_rain  c.whether_rain#c.wangyue)



xtlogit whether_work whether_rain  c.whether_rain#c.wangyue  ///
    i.hour i.week if weekday == 1 & flag != 1 ///
	& day4kmeans4 == 4, fe
est store m62
outreg2 using result6.doc, append  bdec(3) tdec(3) keep(whether_rain  c.whether_rain#c.wangyue) ///
addstat(Pseudo R-squared, `e(r2_p)')

	

* day4_kmeans4 == 1 (每次聚类后数字有可能不一样，由大部分全职网约车与巡游车匹配的类别决定)
reghdfe whether_work whether_rain c.whether_rain#c.wangyue  ///
    i.hour if weekday == 1 & flag != 1 ///
	& day4_kmeans4 == 1, absorb(driver_id week) vce(cluster driver_id)
est store m63
outreg2 using result6.doc, append  bdec(3) tdec(2) keep(whether_rain  c.whether_rain#c.wangyue)


xtlogit whether_work whether_rain c.whether_rain#c.wangyue  ///
    i.hour i.week if weekday == 1 & flag != 1 ///
	& day4_kmeans4 == 1, fe
est store m64
outreg2 using result6.doc, append  bdec(3) tdec(3) keep(whether_rain  c.whether_rain#c.wangyue) ///
addstat(Pseudo R-squared, `e(r2_p)')





// 高峰时段动态调价的影响

use "网约&巡游_小时_实证样本.dta", replace
gen peak = (hour == 7 | hour == 8 | hour == 9 |  hour == 17 | hour == 18 ) & weekday == 1


* 将driver_id转为数值型变量
encode driver_id, gen(driver_id_num)
* 将hourvariable转换为Stata的datetime格式
gen double hour_variable1 = clock(hour_variable, "YMDh")
format hour_variable1 %tc
* 设置面板数据结构
xtset driver_id_num  hour_variable1, delta(1 hour)
 

* 线性概率模型，不包含个体和日期固定效应
reghdfe whether_work whether_rain wangyue c.whether_rain#c.wangyue c.peak#c.wangyue ///
    i.hour if weekday == 1 & flag != 1, vce(cluster driver_id)
est store m71
outreg2 using result7.doc, replace  bdec(3) tdec(2) ///
keep(whether_rain wangyue c.whether_rain#c.wangyue c.peak#c.wangyue)


* 线性概率模型，包含个体和日期固定效应
reghdfe whether_work whether_rain  c.whether_rain#c.wangyue c.peak#c.wangyue ///
    i.hour if weekday == 1& flag != 1 , absorb(driver_id week) vce(cluster driver_id)
est store m72
outreg2 using result7.doc, append  bdec(3) tdec(2) ///
keep(whether_rain  c.whether_rain#c.wangyue c.peak#c.wangyue)


* 逻辑回归模型，不包含个体和日期固定效应
logit whether_work whether_rain wangyue c.whether_rain#c.wangyue c.peak#c.wangyue ///
    i.hour if weekday == 1 & flag != 1, r
est store m73
outreg2 using result7.doc, append  bdec(3) tdec(3) ///
keep(whether_rain wangyue c.whether_rain#c.wangyue c.peak#c.wangyue) ///
addstat(Pseudo R-squared, `e(r2_p)')


* 逻辑回归模型，包含个体和日期固定效应
xtlogit whether_work whether_rain  c.whether_rain#c.wangyue c.peak#c.wangyue ///
    i.hour i.week if weekday == 1 & flag != 1, fe
est store m74
outreg2 using result7.doc, append  bdec(3) tdec(3) ///
keep(whether_rain  c.whether_rain#c.wangyue c.peak#c.wangyue) ///
addstat(Pseudo R-squared, `e(r2_p)')


gen lnhour_income = log(hour_income+1)

* 不包含个体和日期固定效应
reghdfe lnhour_income whether_rain wangyue c.whether_rain#c.wangyue c.peak#c.wangyue ///
    i.hour if weekday == 1 & flag != 1, vce(cluster driver_id)
est store m75
outreg2 using result7.doc, append  bdec(3) tdec(2) ///
keep(whether_rain wangyue c.whether_rain#c.wangyue c.peak#c.wangyue)


* 包含个体和日期固定效应
reghdfe lnhour_income whether_rain  c.whether_rain#c.wangyue c.peak#c.wangyue ///
    i.hour if weekday == 1 & flag != 1 , absorb(driver_id week) vce(cluster driver_id)
est store m76
outreg2 using result7.doc, append  bdec(3) tdec(2) ///
keep(whether_rain  c.whether_rain#c.wangyue c.peak#c.wangyue)






// 不同时段调价对驾驶员影响的实证结果

 
use "网约&巡游_小时_实证样本.dta", replace

* 将driver_id转为数值型变量
encode driver_id, gen(driver_id_num)
* 将hourvariable转换为Stata的datetime格式
gen double hour_variable1 = clock(hour_variable, "YMDh")
format hour_variable1 %tc
* 设置面板数据结构
xtset driver_id_num  hour_variable1, delta(1 hour)
 


* 0:00-7:00时段
reghdfe whether_work whether_rain c.whether_rain#c.wangyue ///
    i.hour if weekday == 1 & flag != 1 ///
	& hour >= 0 & hour <= 6, absorb(driver_id week) vce(cluster driver_id)
est store m81
outreg2 using result8.doc, replace  bdec(3) tdec(2) keep(whether_rain c.whether_rain#c.wangyue)


xtlogit whether_work whether_rain c.whether_rain#c.wangyue ///
    i.hour i.week if weekday == 1 & flag != 1 & hour >= 0 & hour <= 6, fe
est store m82
outreg2 using result8.doc, append  bdec(3) tdec(3) keep(whether_rain c.whether_rain#c.wangyue) ///
addstat(Pseudo R-squared, `e(r2_p)')

* 7:00-10:00 & 17:00-19:00时段
reghdfe whether_work whether_rain c.whether_rain#c.wangyue ///
    i.hour if weekday == 1 & flag != 1 ///
	& ((hour >= 7 & hour <= 9) | (hour >= 17 & hour <= 18)), absorb(driver_id week) vce(cluster driver_id)
est store m83
outreg2 using result8.doc, append bdec(3) tdec(2) keep(whether_rain c.whether_rain#c.wangyue)


xtlogit whether_work whether_rain c.whether_rain#c.wangyue ///
    i.hour i.week if weekday == 1 & flag != 1 ///
	& ((hour >= 7 & hour <= 9) | (hour >= 17 & hour <= 18)), fe
est store m84
outreg2 using result8.doc, append  bdec(3) tdec(3) keep(whether_rain c.whether_rain#c.wangyue) ///
addstat(Pseudo R-squared, `e(r2_p)')


* 10:00-17:00时段
reghdfe whether_work whether_rain c.whether_rain#c.wangyue ///
    i.hour if weekday == 1 & flag != 1 ///
	& (hour >= 10 & hour <= 16), absorb(driver_id week) vce(cluster driver_id)
est store m85
outreg2 using result8.doc, append  bdec(3) tdec(2) keep(whether_rain c.whether_rain#c.wangyue)


xtlogit whether_work whether_rain c.whether_rain#c.wangyue ///
    i.hour i.week if weekday == 1 & flag != 1 ///
	& (hour >= 10 & hour <= 16), fe
est store m86
outreg2 using result8.doc, append  bdec(3) tdec(3) keep(whether_rain c.whether_rain#c.wangyue) ///
addstat(Pseudo R-squared, `e(r2_p)')

* 19:00-24:00时段
reghdfe whether_work whether_rain c.whether_rain#c.wangyue ///
    i.hour if weekday == 1 & flag != 1 ///
	& (hour >= 19 & hour <= 23), absorb(driver_id week) vce(cluster driver_id)
est store m87
outreg2 using result8.doc, append  bdec(3) tdec(2) keep(whether_rain c.whether_rain#c.wangyue)


xtlogit whether_work whether_rain c.whether_rain#c.wangyue ///
    i.hour i.week if weekday == 1 & flag != 1 ///
	& (hour >= 19 & hour <= 23), fe
est store m88
outreg2 using result8.doc, append  bdec(3) tdec(3) keep(whether_rain c.whether_rain#c.wangyue) ///
addstat(Pseudo R-squared, `e(r2_p)')










