/************************************************************************/ /************ ADDW.PRG **************************************************/ /************************************************************************/ /***** PROGRAM TO APPLY HOROWITZ-MAMMEN ESTIMATOR FOR ADDITIVE **********/ /***** NONPARAMETRIC MODEL WITH A KNOWN LINK FUNCTION TO DATA. **********/ /******LINK FUNCTION IN THIS PROGRAM IS IDENTITY FUNCTION. **************/ /************************************************************************/ /************************************************************************/ /************************************************************************/ /************************************************************************/ new; library pgraph; graphset; rndseed 85210985; clear del; tstart = date; /************************************************************************/ /**** GENERATE n KNOT POINTS IN [-1,1] FOR B-SPLINES OF ORDER k. ********/ /************************************************************************/ proc(1) = kspl(nk1,k); local incr,mncr,nn1, kn0, strt, tmp0, tmp1; incr = 2/nk1; mncr = -incr; nn1 = nk1 + 1; kn0 = seqa(-1,incr,nn1); strt = -1 - incr; tmp0 = rev(seqa(strt,mncr,k)); strt = 1 + incr; tmp1 = seqa(strt,incr,k); retp(tmp0|kn0|tmp1); endp; /************************************************************************/ /******** GENERATE MATRIX IDENTIFYING THE NON-ZERO TERMS OF THE *********/ /******** THE SPLINE SUMMAND AND THE PRODUCT COEFFICIENT FOR EACH *******/ /******** SPLINE COMPONENT. THE P'TH COLUMN OF SMAT CORRESPONDS ********/ /******** TO THE P'TH BSPLINE. ******************************************/ /************************************************************************/ proc(1) = spmat(k,xkt); local nk,np, smat,jj,p,i,jjm,imax,sgn,trm; nk = rows(xkt); np = nk - k - 1; smat = zeros(nk,np); jj = 1; jjm = nk; do until jj > jjm; p = 1; do until p > np; @ ***** COMPUTE THE PRODUCT THAT GIVES THE SIGN AND FACTOR OF THE TERM **** @ smat[jj,p] = (jj >= p)*(jj <= p + k + 1); i = p; imax = p + k + 1; sgn = 1; if smat[jj,p] > 0; do until i > imax; if i /= jj; sgn = sgn*del/(i - jj); endif; i = i + 1; endo; endif; smat[jj,p] = smat[jj,p]*sgn; p = p + 1; endo; jj = jj + 1; endo; retp(smat); endp; /************************************************************************/ /***** EVALUATE P'TH BSPLINE AT THE VALUES OF X *************************/ /************************************************************************/ proc(1) = bspl(x,p,k,xkt,smat); local term; term = ((abs(x' - xkt))^k).*(x' - xkt .> 0).*smat[.,p]; retp(sumc(term)); endp; /**********************************************************/ /********** 2ND ORDER KERNEL WITH SUPPORT [-1,1] **********/ /**********************************************************/ proc KERN2(v); local kern; kern = (15/16)*((1 - v^2)^2); kern = kern.*(abs(v) .<= 1); retp(kern); endp; /*********************************************************/ /****** NADARAYA-WATSON MEAN REGRESSION ******************/ /*********************************************************/ proc(1)=NADWAT(xx,x,y,h); @ xx = Mx1 vector of arguments of regression function; h = Bandwidth y = Nx1 vector of dependent variable x = Nx1 vector of observations of independent variable @ local wx, term; wx = kern2((x - xx')/h); term = sumc(y.*wx)./(1.e-8 + sumc(wx)); retp(term); endp; /************************************************************************/ /******* LOCAL LINEAR MEAN REGRESSION ***********************************/ /************************************************************************/ proc(1)= loclin(xx,x,y,h); @ xx = Mx1 vector of arguments of regression function; h = Bandwidth y = Nx1 vector of dependent variable x = Nx1 vector of observations of independent variable @ local wx,dx,sn0,sn1,sn2,tn0,tn1, term; wx = kern2((x - xx')/h); dx = x - xx'; sn0 = meanc(wx); tn0 = meanc(y.*wx); sn1 = meanc(dx.*wx); tn1 = meanc(y.*dx.*wx); sn2 = meanc((dx^2).*wx); term = (sn2.*tn0 - sn1.*tn1)./(sn2.*sn0 - sn1^2); @ term = (sn2.*tn0)./(sn2.*sn0); @ retp(term); endp; /************************************************************************/ /********* MAIN PROGRAM *************************************************/ /************************************************************************/ k = 3; @ ***** Order of the spline ***** @ nk0 = 6; @ ***** Initial no. of spline knots in [-1,1] ***** @ h0 = .7; @ ***** Initial bandwidth for second-stage estimator ***** @ h1 = .7; @ ***** Final bandwidth for second-stage estimator ***** @ splot = 0; @ ***** 1 to plot spline estimates; 0 to plot second stage estimates ***** @ xkern = 0; @ ***** 0 for local constant second stage; 1 for local linear ***** @ DATASET = "PATH TO DATA"; @ **** IDENTIFY INPUT DATA SET **** @ OPEN FY=^DATASET; @ READ THE DATA AND SET UP ARRAYS @ N = ROWSF(FY); YZ=READR(FY,N); CLOSE(FY); @ ***** IDENTIFY THE INDEPENDENT VARIABLES ***** @ X = YZ[.,2:5]; @ ***** Modify this to choose right X from data ***** @ @ ***** IDENTIFY THE DEPENDENT VARIABLE ***** @ Y = YZ[.,1]; @ ***** Modify to choose right Y from data ***** @ n = rows(x); @ number of observations @ dx = cols(x); @ dimension of x @ @ ***** TRANSFORM X TO [-1,1] ***** @ xmax = maxc(x); xmin = minc(x); del = xmax - xmin; x = ones(n,2) - 2*(xmax' - x)./(del'); @ ***** GENERATE ARGUMENTS OF SECOND-STAGE ESTIMATES OF ADDITIVE COMPONENTS ***** @ npt = 21; incr = 0.05; xarg1 = seqa(-.5,incr, npt); npt = 17; incr = .1; xarg2 = seqa(-.8,incr, npt); output file = PATH TO OUTPUT HERE reset; ""; "TITLE OF OUTPUT HERE" ""; "Sample size: ";; n; "Dimension of X: ";; dx; ""; output off; @ ***** LOOP THROUGH NK VALUES ***** @ dcount = 1; nk = nk0; do until nk > 6; nk1 = nk - 1; del = nk1/2; xkt = kspl(nk1,k); smat = spmat(k,xkt); nkk = rows(xkt); np = nkk - k - 1; @ ***** EVALUATE SPLINE BASIS AT EACH X VALUE ***** @ z = zeros(n,np); dx1 = dx*np; xs = zeros(n,dx1); ix = 1; do until ix > dx; ip = 1; do until ip > np; z[.,ip] = bspl(x[.,ix],ip,k,xkt,smat); ip = ip + 1; endo; @ ***** FORM "DESIGN MATRIX" OF SPLINE VALUES AT THE OBSERVED X'S ***** @ if ix == 1; xs = z; else; xs = xs~z[.,1:np-1]; endif; ix = ix + 1; endo; @ ***** DO THE SPLINE REGRESSION ***** @ xt1 = xs[.,1:np]; np1 = np + 1; npx = cols(xs); xt2 = xs[.,np1:npx]; bhat = invpd(xs'xs)*xs'y; @ ***** COMPUTE AND NORMALIZE THE ADDITIVE COMPONENTS ***** @ f1 = xt1*bhat[1:np]; f1 = f1 - meanc(f1); f2 = xt2*bhat[np1:npx]; f2 = f2 - meanc(f2); mu = meanc(y); @ ***** GRAPH THE SPLINE ESTIMATES ***** @ if splot == 1; temp = x[.,1]~f1; temp = sortc(temp,1); title("SPLINE ESTIMATE OF EDUCATION COMPONENT"); xy(temp[.,1], temp[., 2]); pause(5); stop; temp = x[.,2]~f2; temp = sortc(temp,1); title("TITLE OF GRAPH"); xy(temp[.,1],temp[.,2]); pause(5); stop; endif; @ ***** COMPUTE THE SECOND STAGE ESTIMATES ***** @ ut1 = y - mu - f2; ut2 = y - mu - f1; @ ***** LOOP THROUGH BANDWIDTHS ***** @ h = h0; do until h > h1; @ ***** NADARAYA-WATSON ESTIMATOR ***** @ f1hat = NADWAT(xarg1,x[.,1],ut1,h); f2hat = NADWAT(xarg2,x[.,2],ut2,h); f1hat = f1hat - meanc(f1hat); f2hat = f2hat - meanc(f2hat); @ ***** LOCAL LINEAR ESTIMATOR ***** @ f1lin = loclin(xarg1,x[.,1],ut1,h); f2lin = loclin(xarg2,x[.,2],ut2,h); f1lin = f1lin - meanc(f1lin); f2lin = f2lin - meanc(f2lin); @ ***** GRAPH THE SECOND STAGE ESTIMATES ***** @ f1max = maxc(f1hat); f1min = minc(f1hat); f1hat = (f1hat - f1min)/(f1max - f1min); @ xarg1; ""; stop; @ xarg1 = 0.5*xmax[1]*(1 + xarg1); xarg2 = 0.5*xmax[2]*(1 + xarg2); title("TITLE OF GRAPH OF FIRST GRAPHED COMPONENT"); xlabel("LABEL X AXIS"); ylabel("LABEL Y AXIS"); ytics(0,1,.5,3); _PTEK = "PATH TO FILE STORING GRAPH"; xy(xarg1,f1hat); @ xy(xarg1,f1hat~f1lin); @ pause(5); f2max = maxc(f2hat); f2min = minc(f2hat); f2hat = (f2hat - f2min)/(f2max - f2min); _PTEK = "PATH TO GRAPH OF SECOND GRAPHED COMPONENT"; title("TITLE OF GRAPH"); xlabel("LABEL X AXIS"); @ xy(xarg2,f2hat~f2lin); @ xy(xarg2,f2hat); pause(5); h = h + 0.1; endo; rtime = ethsec(tstart,date)/6000; @ ***** WRITE RESULTS ***** @ output on; format 9,5; "NAME OF VARIABLE: "; ""; xarg1~f1hat; ""; ""; "NAME OF VARIABLE"; ""; xarg2~f2hat; output off; nk = nk + 1; endo; output on; "Running time: ";; rtime; output off;