library(quantreg) library(foreign) library(MASS) #sum over the columns of a matrix sumc<-function(x) c(matrix(1,1,nrow(x))%*%x) #sum over the rows of a matrix sumr<-function(x) c(x%*%matrix(1,ncol(x),1)) #weighted sum over the columns of a matrix wsumc<-function(y,w) sumc(y*w) #mean over the columns of a matrix meanc<-function(x){ n<-nrow(x) c(matrix(1,1,n)%*%x)/n } #estimation of the conditional cdf by inverting the conditional quantile function estimated by global linear quantile regression #dep: dependent variable; vector n x 1 #reg: regressors; matrix n x k #ev: points in the reg dimension where the function is estimated; matrix nev x k #u: points in the dep dimension at which the cdf is estimated; vector nu x 1 #output: matrix nev x nu grq=function(dep,reg,ev,u){ ev=as.matrix(ev) reg=as.matrix(reg) nev=nrow(ev) temp=lm(dep~reg)$coef reg=reg[,is.na(temp[2:length(temp)])==FALSE] ev=ev[,is.na(temp[2:length(temp)])==FALSE] temp=rq(dep~reg,tau=-1)$sol nt=ncol(temp) temp1=cbind(1,ev)%*%temp[-(1:3),-nt] pred=matrix(,0,length(u)) for(i in 1:nev) pred=rbind(pred,wsumc(matrix(temp1[i,],nt-1,length(u))<=matrix(u,nt-1,length(u),byrow=TRUE),matrix(temp[1,2:nt]-temp[1,1:(nt-1)],nt-1,length(u)))) pred } #estimation of the conditional cdf by inverting the conditional quantile function estimated by local linear quantile regression #dep: dependent variable; vector n x 1 #reg: regressors; matrix n x k #ev: points in the reg dimension where the function is estimated; matrix nev x k #u: points in the dep dimension at which the cdf is estimated; vector nu x 1 #h: bandwidth; scalar #min: the bandwidth is locally increased until min observations have positive weights #output: matrix nev x nu lrq=function(dep,reg,ev,u,h,min=10){ if(h0)0.001]~z[wx>0.001,],tau=-1,weights=wx[wx>0.001])$sol nt=ncol(temp) pred=rbind(pred,wsumc(matrix(temp[4,1:(nt-1)],nt-1,length(u))<=matrix(u,nt-1,length(u),byrow=TRUE),matrix(temp[1,2:nt]-temp[1,1:(nt-1)],nt-1,length(u)))) } } else pred=grq(dep,reg,ev,u) pred } #global logit #dep: binary dependent variable; vector n x 1 #reg: regressors; matrix n x k #ev: points in the reg dimension where the function is estimated; matrix nev x k #output: vector nev x 1 glogit=function(dep,reg,ev){ temp=glm(dep~reg,family=binomial(logit))$coef temp=pmin(cbind(1,ev)%*%temp,500) exp(temp)/(1+exp(temp)) } #local logit #dep: dependent variable; vector n x 1 #reg: regressors; matrix n x k #ev: points in the reg dimension where the function is estimated; matrix nev x k #h: bandwidth; scalar #min: the bandwidth is locally increased until min observations have positive weights #output: vector nev x 1 llogit=function(dep,reg,ev,h,min=10){ if(h0)0]~z[wx>0,],family=binomial(logit),weights=wx[wx>0])$coef[1],500) pred=c(pred,exp(temp)/(1+exp(temp))) } } else pred=glogit(dep,reg,ev) pred } #local logit #differences with llogit: own observation is not used and the cdf is avaluated at all observations: useful for cross-validation #dep: dependent variable; vector n x 1 #reg: regressors; matrix n x k #h: bandwidth; scalar #min: the bandwidth is locally increased until min observations have positive weights #output: vector n x 1 llogitcv=function(dep,reg,h,min=10){ reg=as.matrix(reg) nd=length(dep) nk=ncol(reg) pred=c() for(i in 1:nd){ z=reg[-i,]-matrix(reg[i,],nd-1,nk,byrow=TRUE) wx=rep(1,nd-1) for(k in 1:nk) wx=wx*(abs(z[,k])0)0]~z[wx>0,],family=binomial(logit),weights=wx[wx>0])$coef[1],500) pred=c(pred,exp(temp)/(1+exp(temp))) } pred } #cross validation for logit #dep: dependent variable; vector n x 1 #reg: regressors; matrix n x k #grid: potential values for the bandwidth #min: the bandwidth is locally increased until min observations have positive weights #outputs: optimal bandwidth, MSE for all bandwithes in grid cvlogit=function(dep,reg,grid,min=10){ if(length(grid)>1){ criterion=c() for(i in 1:length(grid)){ pred=llogitcv(dep,reg,grid[i],min) criterion=c(criterion,mean((dep-pred)^2)) } } else criterion=1 list(grid[criterion==min(criterion)][1],criterion) } #exogenous linear quantile regresion #dep: dependent variable; vector n x 1 #reg: regressors; matrix n x k #w: weights; vector n x 1 #nrq: number of quantile regression to estimate. The quantiles will be uniformaly distributed between 0 and 1 #outputs: coefficients, weights of the quantiles (all identical) rql<-function(y,x,w,nrq){ beta<-rq(y~x,tau=0.5/nrq,weights=w,method="br")$coef for(i in seq(2,nrq,1)) beta<-cbind(beta,rq(y~x,tau=(i/nrq-0.5/nrq),weights=w,method="br")$coef) wq<-rep(1/nrq,nrq) list(beta,wq) } #weighted quantiles #y: dependent variable; vector n x 1 #w: weights; vector n x 1 #prob: quantiles at which the quantile function is estimated, vector np x 1 #output: weighted quantiles of y, vector np x 1 wq<-function(y,w,prob){ o<-order(y) y<-y[o]; w<-w[o] a<-0; i<-0; s<-sum(w) res<-c() for(q in prob){ while(a1) for(k in 2:ns){ p<-t(t(beta[floor((k-1)*dim(beta)[1]/ns+1):floor(k*dim(beta)[1]/ns),,drop=FALSE])%*%t(x[,floor((k-1)*dim(beta)[1]/ns+1):floor(k*dim(beta)[1]/ns)])) pred<-pred+p } nw<-c(c(w)%*%t(c(wq))) o<-order(pred) pred<-pred[o]; nw<-nw[o] a<-0; i<-0; s<-sum(nw) res<-c() for(q in prob){ while(a0)0]~z[wx>0,],weights=wx[wx>0])$coef[1]) } } else pred=glinear(dep,reg,ev) pred } #local linear #differences with llinear: own observation is not used and the cdf is avaluated at all observations: useful for cross-validation #dep: dependent variable; vector n x 1 #reg: regressors; matrix n x k #h: bandwidth; scalar #min: the bandwidth is locally increased until min observations have positive weights #output: vector n x 1 llinearcv=function(dep,reg,h,min=10){ reg=as.matrix(reg) nd=length(dep) nk=ncol(reg) pred=c() for(i in 1:nd){ z=reg[-i,]-matrix(reg[i,],nd-1,nk,byrow=TRUE) wx=rep(1,nd-1) for(k in 1:nk) wx=wx*(abs(z[,k])0)0]~z[wx>0,],weights=wx[wx>0])$coef[1]) } pred } #cross validation for local linear #dep: dependent variable; vector n x 1 #reg: regressors; matrix n x k #grid: potential values for the bandwidth #min: the bandwidth is locally increased until min observations have positive weights #outputs: optimal bandwidth, MSE for all bandwithes in grid cvlinear=function(dep,reg,grid,min=10){ if(length(grid)>1){ criterion=c() for(i in 1:length(grid)){ pred=llinearcv(dep,reg,grid[i],min) criterion=c(criterion,mean((dep-pred)^2)) } } else criterion=1 list(grid[criterion==min(criterion)][1],criterion) } #mixed kernel #regd: discrete regressors, n x nd #regc: continuous regressors, n x nx #ev: points where the function is evaluated; vector (nd+nc) x 1 #lambda: smoothing parameter for the disrete variables #band: smoothing parameter for the continuous variables #outcomes: weights, vector n x 1; matrix of all discrete and continuous variables mkern=function(regd,regc,ev,lambda,band){ regd=as.matrix(regd) regc=as.matrix(regc) nd=ncol(regd) nc=ncol(regc) n=nrow(regc) regd=regd-matrix(ev[1:nd],n,nd,TRUE) regc=regc-matrix(ev[(nd+1):(nd+nc)],n,nc,TRUE) w=lambda^(nd-sumr(regd==0)) for(i in 1:nc) w=w*(abs(regc[,i])0)0]~temp[[2]][temp[[1]]>0,],family=binomial(logit),weights=temp[[1]][temp[[1]]>0])$coef[1],500) pred=c(pred,exp(temp)/(1+exp(temp))) } } else pred=glogit(dep,cbind(regd,regc),ev) pred } #local logit for cross-validation #differences with llogitm: own observation is not used and the cdf is avaluated at all observations: useful for cross-validation #dep: binary dependent variable; vector n x 1 #regd: discrete regressors; matrix n x nd #regc: continuous regressors; matrix n x nc #h: lambda and bandwidth; vector 2 x 1; first lambda then bandwidth #min: the bandwidth is locally increased until min observations have positive weights #output: vector n x 1 llogitcvm=function(dep,regd,regc,h,min=10){ regd=as.matrix(regd) regc=as.matrix(regc) nd=length(dep) pred=c() for(i in 1:nd) pred=c(pred,llogitm(dep[-i],regd[-i,],regc[-i,],c(regd[i,],regc[i,]),h,min)) pred } #cross validation for logit #dep: dependent variable; vector n x 1 #regd: discrete regressors; matrix n x nd #regc: continuous regressors; matrix n x nc #grid: potential values for the smoothing parameters; vector (2*ng x 1); first ng values for lambda then ng values for bandwidth #min: the bandwidth is locally increased until min observations have positive weights #outputs: optimal bandwidth, scalar; MSE for all bandwithes in grid, vector ng x 1 cvlogitm=function(dep,regd,regc,grid){ grid=matrix(grid,length(grid)/2,2) if(nrow(grid)>1){ criterion=c() for(i in 1:nrow(grid)){ pred=llogitcvm(dep,regd,regc,grid[i,]) criterion=c(criterion,mean((dep-pred)^2)) } } else criterion=1 list(grid[criterion==min(criterion),],criterion) } #local linear #dep: dependent variable; vector n x 1 #regd: discrete regressors; matrix n x nd #regc: continuous regressors; matrix n x nc #ev: points in the reg dimension where the function is estimated; matrix nev x (nd+nc) #h: lambda and bandwidth; vector 2 x 1; first lambda then bandwidth #min: the bandwidth is locally increased until min observations have positive weights #output: vector nev x 1 llinearm=function(dep,regd,regc,ev,h,min=10){ if(is.matrix(ev)==FALSE) ev=matrix(ev,1,length(ev)) if(h[1]<1 | h[2]0)0]~temp[[2]][temp[[1]]>0,],weights=temp[[1]][temp[[1]]>0])$coef[1]) } } else pred=glinear(dep,cbind(regd,regc),ev) pred } #local linear for cross-validation #differences with llinearm: own observation is not used and the cdf is avaluated at all observations: useful for cross-validation #dep: dependent variable; vector n x 1 #regd: discrete regressors; matrix n x nd #regc: continuous regressors; matrix n x nc #h: lambda and bandwidth; vector 2 x 1; first lambda then bandwidth #min: the bandwidth is locally increased until min observations have positive weights #output: vector n x 1 llinearcvm=function(dep,regd,regc,h,min=10){ regd=as.matrix(regd) regc=as.matrix(regc) nd=length(dep) pred=c() for(i in 1:nd) pred=c(pred,llinearm(dep[-i],regd[-i,],regc[-i,],c(regd[i,],regc[i,]),h,min)) pred } #cross validation for local linear #dep: dependent variable; vector n x 1 #regd: discrete regressors; matrix n x nd #regc: continuous regressors; matrix n x nc #grid: potential values for the smoothing parameters; vector (2*ng x 1); first ng values for lambda then ng values for bandwidth #min: the bandwidth is locally increased until min observations have positive weights #outputs: optimal bandwidth, scalar; MSE for all bandwithes in grid, vector ng x 1 cvlinearm=function(dep,regd,regc,grid){ grid=matrix(grid,length(grid)/2,2) if(nrow(grid)>1){ criterion=c() for(i in 1:nrow(grid)){ pred=llinearcvm(dep,regd,regc,grid[i,]) criterion=c(criterion,mean((dep-pred)^2)) } } else criterion=1 list(grid[criterion==min(criterion),],criterion) } #estimation of the conditional cdf by inverting the conditional quantile function estimated by local linear quantile regression #dep: dependent variable; vector n x 1 #regd: discrete regressors; matrix n x nd #regc: continuous regressors; matrix n x nc #ev: points in the reg dimension where the function is estimated; matrix nev x (nd+nc) #u: points in the dep dimension at which the cdf is estimated; vector nu x 1 #h: bandwidth; scalar #min: the bandwidth is locally increased until min observations have positive weights #output: matrix nev x nu lrqm=function(dep,regd,regc,ev,u,h,min=10){ if(is.matrix(ev)==FALSE) ev=matrix(ev,1,length(ev)) if(h[1]<1 | h[2]0)0.0001]~temp[[2]][temp[[1]]>0.0001,])$coef temp[[2]]=temp[[2]][,is.na(temp1[2:length(temp1)])==FALSE] temp=rq(dep[temp[[1]]>0.0001]~temp[[2]][temp[[1]]>0.0001,],tau=-1,weights=temp[[1]][temp[[1]]>0.0001])$sol nt=ncol(temp) pred=rbind(pred,wsumc(matrix(temp[4,1:(nt-1)],nt-1,length(u))<=matrix(u,nt-1,length(u),byrow=TRUE),matrix(temp[1,2:nt]-temp[1,1:(nt-1)],nt-1,length(u)))) } } else pred=grq(dep,cbind(regd,regc),ev,u) pred } #estimation of the conditional cdf by inverting the conditional quantile function estimated by local linear quantile regression #differences with lrqm: own observation is not used and the cdf is avaluated at all observations: useful for cross-validation #dep: dependent variable; vector n x 1 #regd: discrete regressors; matrix n x nd #regc: continuous regressors; matrix n x nc #u: points in the dep dimension at which the cdf is estimated; vector nu x 1 #h: lambda and bandwidth; vector 2 x 1; first lambda then bandwidth #min: the bandwidth is locally increased until min observations have positive weights #output: vector n x 1 lrqcvm=function(dep,regd,regc,u,h,min=10){ regd=as.matrix(regd) regc=as.matrix(regc) nd=length(dep) pred=c() for(i in 1:nd) pred=c(pred,lrqm(dep[-i],regd[-i,],regc[-i,],c(regd[i,],regc[i,]),u,h,min)) pred } #cross validation for quantile regression #dep: dependent variable; vector n x 1 #regd: discrete regressors; matrix n x nd #regc: continuous regressors; matrix n x nc #u: points in the dep dimension at which the cdf is estimated; vector nu x 1 #grid: potential values for the smoothing parameters; vector (2*ng x 1); first ng values for lambda then ng values for bandwidth #min: the bandwidth is locally increased until min observations have positive weights #outputs: optimal bandwidth, scalar; MSE for all bandwithes in grid, vector ng x 1 cvrqm=function(dep,regd,regc,u,grid){ grid=matrix(grid,length(grid)/2,2) if(nrow(grid)>1){ criterion=c() for(i in 1:nrow(grid)){ pred=lrqcvm(dep,regd,regc,u,grid[i,]) criterion=c(criterion,mean(((dep<=u)-pred)^2)) } } else criterion=1 list(grid[criterion==min(criterion),],criterion) } #Instrumental variable estimator of quantile treatment effects proposed by Froelich and Melly #dep: binary dependent variable; vector n x 1 #treat: treatment variable; n x 1 #inst: instrumental variabl; n x 1 #regd: discrete regressors; matrix n x nd #regc: continuous regressors; matrix n x nc #h: lambda and bandwidth; vector 2 x 1; first lambda then bandwidth #prob: quantiles at which the QTE will be estimated; vector nq x 1 #min: the bandwidth is locally increased until min observations have positive weights #output: vector nev x 1 iv_qte=function(dep,treat,inst,regd,regc,h,prob,min=10){ predz=llogitm(inst,regd,regc,cbind(regd,regc),h) w=(inst-predz)/(predz*(1-predz)) w=w-mean(w) w=w*(2*d-1) Pc=mean(treat*w) dist1=c();dist0=c() for(i in sort(dep)){ dist1=c(dist1,mean((dep<=i)*treat*w)/Pc) dist0=c(dist0,mean((dep<=i)*(1-treat)*w)/Pc) } q1=c() for(i in prob) q1=c(q1,min(dep)+sum((sort(dep)[2:length(dep)]-sort(dep)[1:(length(dep)-1)])[dist1[2:length(dist1)]<=i])) q0=c() for(i in prob) q0=c(q0,min(dep)+sum((sort(dep)[2:length(dep)]-sort(dep)[1:(length(dep)-1)])[dist0[2:length(dist0)]<=i])) q1-q0 } #examples using the Card data set data=read.dta("G:/Quantile/IV QTE/Card.dta") attach(data) #define the variables y=wage76/100 xd=cbind(black,smsa76r,reg76r,smsa66r,reg662,reg663,reg664,reg665,reg666,reg667,reg668,reg669,nodaded,nomomed,momdad14,sinmom14) xc=cbind(momed,daded,exp76) xc=xc[is.na(y)==0,] xd=xd[is.na(y)==0,] d=(ed76[is.na(y)==0]>12) z=nearc4[is.na(y)==0] y=y[is.na(y)==0] quants=seq(0.01,0.99,0.01) xc=xc%*%solve(chol(var(xc))) #cross validations that will be useful later #takes a very long time! #alternatively, here are the optimal values: #optbandz=optbandd1=optbandd0=optbandy11=optbandy10=optbandy01=optbandy00=optbandd=c(); optbandz[[1]]=c(0.4,5); optbandd1[[1]]=c(0.6,5); optbandd0[[1]]=optbandy10[[1]]=c(1,5); optbandy11[[1]]=optbandy01[[1]]=optbandy00[[1]]=optbandd[[1]]=c(0.8,5); optbanw1=optbanw0=optbanddp1=optbandyp11=optbandyp10=optbandyp01=Inf; optbanddp0=optbandyp00=0.3 gridz1=matrix(,0,2) for(i in seq(0.2,1,0.2)) for(j in c(0.25,0.5,1,5,Inf)) gridz1=rbind(gridz1,c(i,j)) optbandz=cvlogitm(z,xd,xc,gridz1) predz=llogitm(z,xd,xc,cbind(xd,xc),optbandz[[1]]) w=(z-predz)/(pmax(predz,0.001)*(1-pmin(predz,0.999))) gridw1=c(0.1,0.2,0.3,0.4,0.5,1,2.5,5,10,50,100,250,500,Inf) optbanw1=cvlinear(w[d==1],y[d==1],gridw1) optbanw0=cvlinear(w[d==0],y[d==0],gridw1) optbandd1=cvlogitm(d[z==1],xd[z==1,],xc[z==1,],gridz1) optbandd0=cvlogitm(d[z==0],xd[z==0,],xc[z==0,],gridz1) optbandy11=cvlogitm(1*(y[d==1 & z==1]<=median(y[d==1 & z==1])),xd[d==1 & z==1,],xc[d==1 & z==1,],gridz1) optbandy10=cvlogitm(1*(y[d==1 & z==0]<=median(y[d==1 & z==0])),xd[d==1 & z==0,],xc[d==1 & z==0,],gridz1) optbandy01=cvlogitm(1*(y[d==0 & z==1]<=median(y[d==0 & z==1])),xd[d==0 & z==1,],xc[d==0 & z==1,],gridz1) optbandy00=cvlogitm(1*(y[d==0 & z==0]<=median(y[d==0 & z==0])),xd[d==0 & z==0,],xc[d==0 & z==0,],gridz1) optbanddp1=cvlogit(d[z==1],predz[z==1],gridw1) optbanddp0=cvlogit(d[z==0],predz[z==0],gridw1) optbandyp11=cvlogit(1*(y[d==1 & z==1]<=median(y[d==1 & z==1])),predz[d==1 & z==1],gridw1) optbandyp10=cvlogit(1*(y[d==1 & z==0]<=median(y[d==1 & z==0])),predz[d==1 & z==0],gridw1) optbandyp00=cvlogit(1*(y[d==0 & z==0]<=median(y[d==1 & z==0])),predz[d==0 & z==0],gridw1) optbandyp01=cvlogit(1*(y[d==0 & z==1]<=median(y[d==1 & z==0])),predz[d==0 & z==1],gridw1) optbandd=cvlogitm(d,xd,xc,gridz1) #reweighting endogenous #main estimator proposed in Froelich and Melly all_w=iv_qte(y,d,z,xd,xc,optbandz[[1]],quants) #no control for parental education only_pe=iv_qte(y,d,z,xd[,c(1,1:12)],xc[,3],optbandz[[1]],quants) #no control for X w=(z-mean(z))/(mean(z)*(1-mean(z)))*(2*d-1) Pc=mean(d*w) dist1=c();dist0=c() for(i in sort(y)){ dist1=c(dist1,mean((y<=i)*d*w)/Pc) dist0=c(dist0,mean((y<=i)*(1-d)*w)/Pc)} q1=c() for(i in quants) q1=c(q1,min(y)+sum((sort(y)[2:length(y)]-sort(y)[1:(length(y)-1)])[dist1[2:length(dist1)]<=i])) q0=c() for(i in quants) q0=c(q0,min(y)+sum((sort(y)[2:length(y)]-sort(y)[1:(length(y)-1)])[dist0[2:length(dist0)]<=i])) no_x=q1-q0 #estimated positive weights #alternative identification strategy discussed in Froelich and Melly #consists in using the projection of the previous weights on the d and y space predz=llogitm(z,xd,xc,cbind(xd,xc),optbandz[[1]]) w=(z-predz)/(predz*(1-predz)) w=w-mean(w) w=w*(2*d-1) predw=rep(0,length(y)) predw[d==1]=llinear(w[d==1],y[d==1],y[d==1],optbanw1[[1]]) predw[d==0]=llinear(w[d==0],y[d==0],y[d==0],optbanw0[[1]]) pos_w=rq(y[predw>0.0001]~d[predw>0.0001],weights=predw[predw>0.0001],tau=quants)$coef[2,] #matching on x #alternative identification strategy discussed in Froelich and Melly predd1=llogitm(d[z==1],xd[z==1,],xc[z==1,],cbind(xd,xc),optbandd0[[1]]) predd0=llogitm(d[z==0],xd[z==0,],xc[z==0,],cbind(xd,xc),optbandd1[[1]]) evu=seq(1.1*min(y)-0.1*max(y),1.1*max(y)-0.1*min(y),length.out=500) predy11=lrqm(y[d==1 & z==1],xd[d==1 & z==1,],xc[d==1 & z==1,],cbind(xd,xc),evu,c(1,Inf)) predy10=lrqm(y[d==1 & z==0],xd[d==1 & z==0,],xc[d==1 & z==0,],cbind(xd,xc),evu,c(1,Inf)) predy00=lrqm(y[d==0 & z==0],xd[d==0 & z==0,],xc[d==0 & z==0,],cbind(xd,xc),evu,c(1,Inf)) predy01=lrqm(y[d==0 & z==1],xd[d==0 & z==1,],xc[d==0 & z==1,],cbind(xd,xc),evu,c(1,Inf)) denominator=mean(predd1-predd0) distc1=meanc(predy11*c(predd1)-predy10*c(predd0))/denominator distc0=meanc(predy01*c(predd1-1)-predy00*c(predd0-1))/denominator q1=c() for(i in quants) q1=c(q1,evu[1]+sum((evu[2:length(evu)]-evu[1:(length(evu)-1)])[distc1[2:length(distc1)]<=i])) q0=c() for(i in quants) q0=c(q0,evu[1]+sum((evu[2:length(evu)]-evu[1:(length(evu)-1)])[distc0[2:length(distc0)]<=i])) match_x=q1-q0 #matching on the propensity score #alternative identification strategy discussed in Froelich and Melly predd1=llogit(d[z==1],predz[z==1],predz,optbanddp1[[1]]) predd0=llogit(d[z==0],predz[z==0],predz,optbanddp0[[1]]) predy11=lrq(y[d==1 & z==1],predz[d==1 & z==1],predz,evu,optbandyp11[[1]]) predy10=lrq(y[d==1 & z==0],predz[d==1 & z==0],predz,evu,optbandyp10[[1]]) predy00=lrq(y[d==0 & z==0],predz[d==0 & z==0],predz,evu,optbandyp00[[1]]) predy01=lrq(y[d==0 & z==1],predz[d==0 & z==1],predz,evu,optbandyp01[[1]]) denominator=mean(predd1-predd0) distc1=meanc(predy11*c(predd1)-predy10*c(predd0))/denominator distc0=meanc(predy01*c(predd1-1)-predy00*c(predd0-1))/denominator q1=c() for(i in quants) q1=c(q1,evu[1]+sum((evu[2:length(evu)]-evu[1:(length(evu)-1)])[distc1[2:length(distc1)]<=i])) q0=c() for(i in quants) q0=c(q0,evu[1]+sum((evu[2:length(evu)]-evu[1:(length(evu)-1)])[distc0[2:length(distc0)]<=i])) match_p=q1-q0 #exogenous treatment #Estimation of the conditional cdf using linear quantile regression, estimation of the counterfactual distribution using the estimator if Chernozhukov, Ivan Fernandez-Val and Melly coef0=rql(y[d==0],cbind(xc,xd)[d==0,],rep(1,sum(1-d)),200) coef1=rql(y[d==1],cbind(xc,xd)[d==1,],rep(1,sum(d)),200) exo_qr=evalu(cbind(xc,xd),coef1[[1]],coef1[[2]])-evalu(cbind(xc,xd),coef0[[1]],coef0[[2]]) #reweighting exogenous #estimator of Firpo predd=llogitm(d,xd,xc,cbind(xd,xc),optbandz[[1]]) w1=d/predd w0=(1-d)/(1-predd) exo_w=wq(y,w1,seq(0.01,0.99,0.01))-wq(y,w0,seq(0.01,0.99,0.01)) #bootstrap rboot=matrix(,100,99) for(rep in 1:100){ set.seed(rep) index=sample(1:length(y),length(y),TRUE) yb=y[index];db=d[index];zb=z[index];xdb=xd[index,];xcb=xc[index,] rboot[rep,1:99]=iv_qte(yb,db,zb,xdb,xcb,optbandz[[1]],quants) } #Some figures #Is the common support condition satisfied? exp2=xc[,3] graph=glm(z~xc+xd+exp2,family=binomial(probit))$fitted plot(density(graph[z==1],bw=0.03)$x,density(graph[z==1],bw=0.03)$y,type="l",xlim=c(0,1),xlab="Propensity score",ylab="Density") lines(density(graph[z==0],bw=0.03)$x,density(graph[z==0],bw=0.03)$y,col="grey") legend(0,3.6,c("Sample with Z=1","Sample with Z=0"),col=c("black","grey"),lty=c(1,1)) #comparison of the estimators allowing for endogeneity plot(seq(0.1,0.9,0.01),all_w[10:90],type="l",ylim=c(1,5),xlab="Quantile",ylab="Quantile treatment effect") lines(seq(0.1,0.9,0.01),pos_w[10:90],col="grey") lines(seq(0.1,0.9,0.01),match_x[10:90],lty=2) legend(0.1,5,c("Weighting estimator","Weighting using estimated weights","Regression estimator"),col=c("black","grey","black"),lty=c(1,1,2)) #Results using the main estimator with 95% confidence intervals plot(seq(0.1,0.9,0.01),all_w[10:90],type="l",ylim=c(-1.5,7),xlab="Quantile",ylab="Quantile treatment effect") polygon(c(seq(0.1,0.9,0.01),seq(0.9,0.1,-0.01)),c(all_w[10:90]+1.96*sd(rboot[,10:90]),rev(all_w[10:90]-1.96*sd(rboot[,10:90]))),col="grey",border=NA) lines(seq(0.1,0.9,0.01),all_w[10:90]) #Comparison between some estimators plot(seq(0.1,0.9,0.01),exo_w[10:90],type="l",ylim=c(0,10),xlab="Quantile",ylab="Quantile treatment effect") lines(seq(0.1,0.9,0.01),all_w[10:90],col="grey") lines(seq(0.1,0.9,0.01),no_x[10:90],lty=2) lines(seq(0.1,0.9,0.01),only_pe[10:90],lty=2,col="grey") legend(0.1,10,c("Assuming exogeneity","IV with all covariates","IV without covariates", "IV with control for parental education"),col=c("black","grey","black","grey"),lty=c(1,1,2,2))