LAB 3 EXERCISE 1:Analysis of residuals from Regression and from median polish EXERCISE 2:CODE FOR GLS ESTIMATION EXERCISE 3:Theoretical (Exponential, Spherical and Gaussian) and sample semivariogram (fitting BY EYE) after removing mean with proc reg EXERCISE 4: obtain beta_hat from GSL and create estimated variance matrix (V) EXERCISE 5:CODE FOR THE NEAREST NEIGHBOR PLOT (LIKE IN LAB 1) LAB 3: ********************************************************************** EXERCISE 1: ********************************************************************** /* Analysis of residuals from Regression and from median polish */ options nodate nocenter nonumber ps=40 ls=50; goptions horigin=1in; data matval; input row col value; cards; 1 1 3 1 2 6 1 3 12 1 4 14 1 5 1 2 1 7 2 2 10 2 3 15 2 4 18 2 5 28 3 1 11 3 2 15 3 3 21 3 4 23 3 5 29 4 1 16 4 2 20 4 3 23 4 4 11 4 5 37 5 1 20 5 2 25 5 3 7 5 4 33 5 5 39 ; proc reg data=matval; model value=row col; output out=out p=pred r=resid; title 'first order polynomial'; run; proc print data=out; run; proc gplot data= out; plot resid*pred; symbol1 v=diamond c=green h=2 i=none; run; proc iml; start= {3 6 12 14 1 0, 7 10 15 18 28 0, 11 15 21 23 29 0, 16 20 23 11 37 0, 20 25 27 33 39 0, 0 0 0 0 0 0}; numcol=ncol(start)-1; numrow=nrow(start)-1; tol=0.001; maxit=50; rows=start; iterate=0; do until(converge|(iterate>=maxit)); ****** sweep medians from rows ******; rowmeds=median(rows[,1:numcol]`); /* calc row medians not using last column. */ rowmeds=rowmeds`; /* transpose to column vector. */ rowmmat=J(1,numcol,1)@rowmeds; /* create matrix to sweep from rows. */ rowsweep=(rows[,1:numcol]-rowmmat); /* sweep medians from all but last column. */ lastcol=rows[,numcol+1]+rowmeds; /* add row medians to last column. */ rowdone=rowsweep||lastcol; /* concatenate last column; done with rows.*/ columns=rowdone; /* now pass to column sweep step. */ iterate=iterate+1; ****** sweep medians from columns ******; colmeds=median(columns[1:numrow,]); colmmat=J(numrow,1,1)@colmeds; colsweep=(columns[1:numrow,]-colmmat); lastrow=columns[numrow+1,]+colmeds; coldone=colsweep//lastrow; rows=coldone; iterate=iterate+1; ****** check to see if tolerance met (only on even iteration) ******; diff=rowdone-coldone; convmat=choose(abs(diff)F Model 2 1467.08000 733.54000 15.645 0.0001 Error 22 1031.48000 46.88545 C Total 24 2498.56000 Root MSE 6.84730 R-square 0.5872 Dep Mean 17.76000 Adj R-sq 0.5496 C.V. 38.55459 Parameter Estimates Parameter Standard T for H0: Variable DF Estimate Error Parameter=0 Prob > |T| INTERCEP 1 -5.160000 4.33060987 -1.192 0.2461 ROW 1 4.100000 0.96835380 4.234 0.0003 COL 1 3.540000 0.96835380 3.656 0.0014 OBS ROW COL VALUE PRED RESID 1 1 1 3 2.48 0.52 2 1 2 6 6.02 -0.02 3 1 3 12 9.56 2.44 4 1 4 14 13.10 0.90 5 1 5 1 16.64 -15.64 6 2 1 7 6.58 0.42 7 2 2 10 10.12 -0.12 8 2 3 15 13.66 1.34 9 2 4 18 17.20 0.80 10 2 5 28 20.74 7.26 11 3 1 11 10.68 0.32 12 3 2 15 14.22 0.78 13 3 3 21 17.76 3.24 14 3 4 23 21.30 1.70 15 3 5 29 24.84 4.16 16 4 1 16 14.78 1.22 17 4 2 20 18.32 1.68 18 4 3 23 21.86 1.14 19 4 4 11 25.40 -14.40 20 4 5 37 28.94 8.06 21 5 1 20 18.88 1.12 22 5 2 25 22.42 2.58 23 5 3 7 25.96 -18.96 24 5 4 33 29.50 3.50 25 5 5 39 33.04 5.96 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Output of median polish: FINAL 0 -1 1 0 -21 -8 0 -1 0 0 2 -4 0 0 2 1 -1 0 0 0 -1 -16 2 5 0 1 -1 2 0 9 -8 -4 0 3 11 19 *********************************************************** EXERCISE 2: *********************************************************** /* CODE FOR GLS ESTIMATION */ options nodate nocenter nonumber ps=67 ls=120; data coalash; infile '/ncsu/st733_info/www/coalash.dt'; input id rowloc colloc z; title 'estimated GSL estimation'; run; proc iml; use coalash; read all var {rowloc colloc z}; npts=nrow(z); /* create estimate V */; v=I(npts); /* identity matrix of size npts */ t0=0;t1=1; t2=.25; /* t0 is theta_0 t1 is theta_1 and t2 is theta_2 (inverse of range) for exponential covariance function */ do ipt =1 to npts-1; do jpt=ipt +1 to npts; v[ipt,jpt]= exp(-1*t2*sqrt((rowloc[ipt,1] -rowloc[jpt,1])**2 + (colloc[ipt,1]-colloc[jpt,1])**2)); /*exponential cov. is exp(-theta_2 *r) */ v[jpt,ipt]=v[ipt,jpt]; end; end; v=t0#I(npts) + t1#v; /* # multiply matrix by scalar */ print '5x5 elements of v' (v[1:5,1:5]); /* Compute estimated GLS estimators */; x = j(npts,1)||colloc||rowloc; vinv=inv(v); bhat=inv(x` *vinv*x)*x` *vinv*z; print bhat; quit; ********************************************************************** /* SAS OUTPUT: estimated GSL estimation #TEM1003 5x5 elements of v 1 0.7788008 0.6065307 0.2185609 0.3567299 0.7788008 1 0.7788008 0.1707138 0.2794995 0.6065307 0.7788008 1 0.1332452 0.2185609 0.2185609 0.1707138 0.1332452 1 0.6065307 0.3567299 0.2794995 0.2185609 0.6065307 1 BHAT 10.144376 0.0567686 -0.138809 */ ****************************************************************** EXERCISE 3: ******************************************************************* /*Theoretical (Exponential, Spherical and Gaussian) and sample semivariogram*/ options nodate nocenter nonumber ps=40 ls=40 ; data soilph; infile '/ncsu/st733_info/www/soilph.dat'; input row col ph; r2=row*row; c2=col*col; rc=row*col; run; proc reg data=soilph; model ph=row col r2 rc c2; output out=resdat r=resid; *proc print data=resdat; run; proc variogram data=resdat outv=outv; compute lagd=0.7 maxlag=8 ndir=4; coordinates xc=col yc=row; var resid; run; proc variogram data=resdat outv=outo; compute lagd=0.7 maxlag=8 robust; coordinates xc=col yc=row; var resid; run; data outo2; set outo; th0= 0.008; th1= 0.03; th2=1/.8; vari= th0+ th1*(1-exp(-distance*th2) ); type='Exponential'; output; vari= variog; type='regular'; output; *proc print data=outo2; run; proc gplot data=outo2; where (distance > .); plot vari*distance=type; symbol1 c=black i=join l=1 v='0'; symbol2 c=red i=join l=2 v='1'; axis1 minor=none label=(c=black 'lag distance') offset=(3,3); axis2 minor=(number=1) label=(c=green 'semi-variogram') offset=(3,3); title 'Sample variogram versus theoretical'; footnote ' '; run; data outo3; set outo; /*Spherical variogram fit*/ th0=0.003; th1= 0.035; th2=2; if ( distance < 2) then vari= th0+ th1*( 3*distance/(2*th2)- ((distance/th2)**3)/2); if ( distance > 2) then vari= th1+th0; type='Spherical'; output; vari= variog; type='sample'; output; run; proc gplot data=outo3; where (distance > .); plot vari*distance=type; symbol1 c=black i=join l=1 v='0'; symbol2 c=red i=join l=2 v='1'; axis1 minor=none label=(c=black 'lag distance') offset=(3,3); axis2 minor=(number=1) label=(c=green 'semi-variogram') offset=(3,3); title 'Sample variogram versus Spherical'; footnote ' '; run; proc print data=outo3; run; data outo4; set outo; /*Gaussian variogram fit*/ th0=0.003; th1= 0.035; th2=1/2; vari= th0+ th1*( 1- exp(-distance*distance/4)); type='Gaussian'; output; vari= variog; type='sample'; output; run; proc print data=outo4; run; proc gplot data=outo4; where (distance > .); plot vari*distance=type; symbol1 c=black i=join l=1 v='0'; symbol2 c=red i=join l=2 v='1'; axis1 minor=none label=(c=black 'lag distance') offset=(3,3); axis2 minor=(number=1) label=(c=green 'semi-variogram') offset=(3,3); title 'Sample variogram versus Gaussian'; footnote ' '; run; ***************************************************************** output: Model: MODEL1 Dependent Variable: PH Analysis of Variance Sum of Mean Source DF Squares Square F Value Prob>F Model 5 1.27645 0.25529 6.966 0.0001 Error 115 4.21466 0.03665 C Total 120 5.49111 Root MSE 0.19144 R-square 0.2325 Dep Mean 4.48463 Adj R-sq 0.1991 C.V. 4.26880 Parameter Estimates Parameter Standard T for H0: Variable DF Estimate Error Parameter=0 Prob > |T| INTERCEP 1 4.197752 0.10797728 38.876 0.0001 ROW 1 0.119825 0.02642918 4.534 0.0001 COL 1 0.026320 0.02642918 0.996 0.3214 R2 1 -0.008831 0.00197057 -4.481 0.0001 RC 1 -0.004640 0.00174036 -2.666 0.0088 C2 1 -0.000363 0.00197057 -0.184 0.8540 Sample variogram versus Spherical OBS VARNAME LAG COUNT DISTANCE AVERAGE VARIOG COVAR 1 RESID -1 121 . .0000000 . 0.035122 2 RESID -1 121 . .0000000 . 0.035122 3 RESID 0 0 . . . . 4 RESID 0 0 . . . . 5 RESID 1 220 1.00000 .0009081 0.028993 0.006185 6 RESID 1 220 1.00000 .0009081 0.028993 0.006185 7 RESID 2 200 1.41421 .0028181 0.034580 0.001307 8 RESID 2 200 1.41421 .0028181 0.034580 0.001307 9 RESID 3 558 2.15230 .0013569 0.035340 -0.000353 10 RESID 3 558 2.15230 .0013569 0.035340 -0.000353 11 RESID 4 338 2.91777 .0006585 0.034606 -0.000087 12 RESID 4 338 2.91777 .0006585 0.034606 -0.000087 13 RESID 5 608 3.37225 .0014887 0.036915 -0.002341 14 RESID 5 608 3.37225 .0014887 0.036915 -0.002341 15 RESID 6 814 4.22667 .0005614 0.038282 -0.003042 16 RESID 6 814 4.22667 .0005614 0.038282 -0.003042 17 RESID 7 596 5.03987 .0020389 0.037521 -0.002185 18 RESID 7 596 5.03987 .0020389 0.037521 -0.002185 19 RESID 8 506 5.60694 .0021335 0.036787 -0.001780 20 RESID 8 506 5.60694 .0021335 0.036787 -0.001780 OBS RVARIO TH0 TH1 TH2 VARI TYPE 1 . .003 0.035 2 . Spherical 2 . .003 0.035 2 . sample 3 . .003 0.035 2 . Spherical 4 . .003 0.035 2 . sample 5 0.027615 .003 0.035 2 0.027063 Spherical 6 0.027615 .003 0.035 2 0.028993 sample 7 0.041132 .003 0.035 2 0.033936 Spherical 8 0.041132 .003 0.035 2 0.034580 sample 9 0.034009 .003 0.035 2 0.038000 Spherical 10 0.034009 .003 0.035 2 0.035340 sample 11 0.034516 .003 0.035 2 0.038000 Spherical 12 0.034516 .003 0.035 2 0.034606 sample 13 0.039947 .003 0.035 2 0.038000 Spherical 14 0.039947 .003 0.035 2 0.036915 sample 15 0.039504 .003 0.035 2 0.038000 Spherical 16 0.039504 .003 0.035 2 0.038282 sample 17 0.042475 .003 0.035 2 0.038000 Spherical 18 0.042475 .003 0.035 2 0.037521 sample 19 0.036119 .003 0.035 2 0.038000 Spherical 20 0.036119 .003 0.035 2 0.036787 sample Sample variogram versus Gaussian OBS VARNAME LAG COUNT DISTANCE AVERAGE VARIOG COVAR 1 RESID -1 121 . .0000000 . 0.035122 2 RESID -1 121 . .0000000 . 0.035122 3 RESID 0 0 . . . . 4 RESID 0 0 . . . . 5 RESID 1 220 1.00000 .0009081 0.028993 0.006185 6 RESID 1 220 1.00000 .0009081 0.028993 0.006185 7 RESID 2 200 1.41421 .0028181 0.034580 0.001307 8 RESID 2 200 1.41421 .0028181 0.034580 0.001307 9 RESID 3 558 2.15230 .0013569 0.035340 -0.000353 10 RESID 3 558 2.15230 .0013569 0.035340 -0.000353 11 RESID 4 338 2.91777 .0006585 0.034606 -0.000087 12 RESID 4 338 2.91777 .0006585 0.034606 -0.000087 13 RESID 5 608 3.37225 .0014887 0.036915 -0.002341 14 RESID 5 608 3.37225 .0014887 0.036915 -0.002341 15 RESID 6 814 4.22667 .0005614 0.038282 -0.003042 16 RESID 6 814 4.22667 .0005614 0.038282 -0.003042 17 RESID 7 596 5.03987 .0020389 0.037521 -0.002185 18 RESID 7 596 5.03987 .0020389 0.037521 -0.002185 19 RESID 8 506 5.60694 .0021335 0.036787 -0.001780 20 RESID 8 506 5.60694 .0021335 0.036787 -0.001780 OBS RVARIO TH0 TH1 TH2 VARI TYPE 1 . .003 0.035 0.5 . Gaussian 2 . .003 0.035 0.5 . sample 3 . .003 0.035 0.5 . Gaussian 4 . .003 0.035 0.5 . sample 5 0.027615 .003 0.035 0.5 0.010742 Gaussian 6 0.027615 .003 0.035 0.5 0.028993 sample 7 0.041132 .003 0.035 0.5 0.016771 Gaussian 8 0.041132 .003 0.035 0.5 0.034580 sample 9 0.034009 .003 0.035 0.5 0.027007 Gaussian 10 0.034009 .003 0.035 0.5 0.035340 sample 11 0.034516 .003 0.035 0.5 0.033834 Gaussian 12 0.034516 .003 0.035 0.5 0.034606 sample 13 0.039947 .003 0.035 0.5 0.035961 Gaussian 14 0.039947 .003 0.035 0.5 0.036915 sample 15 0.039504 .003 0.035 0.5 0.037598 Gaussian 16 0.039504 .003 0.035 0.5 0.038282 sample 17 0.042475 .003 0.035 0.5 0.037939 Gaussian 18 0.042475 .003 0.035 0.5 0.037521 sample 19 0.036119 .003 0.035 0.5 0.037986 Gaussian 20 0.036119 .003 0.035 0.5 0.036787 sample ***************************************************************** EXERCISE 4: ***************************************************************** /* obtain beta_hat from GSL and create estimated variance matrix (V) */ proc iml; use soilph; read all var {row col ph r2 rc c2}; npts=nrow(ph); /* create estimated variance matrix (V) */; V=I(npts); th0=0.008; th1=0.03; th2=1/0.8; do i=1 to npts-1; do j=i+1 to npts; /* calculate the distances */; *D[i,j]=sqrt( (row[i,1]-row[j,1])**2 +(col[i,1]-col[j,1])**2) ); /* exponential cov. functions */ V[i,j]=exp(-th2*sqrt((row[i,1]-row[j,1])**2+(col[i,1]-col[j,1])**2)); V[j,i]= V[i,j]; end; end; V= th0 # I(npts) + th1 # V; print 'Elements of V' (V[1:5,1:5]); /* compute estimated GLS estimators */ X= J(npts,1) || row || col || r2 || rc || c2 ; Vinv= INV(V); bhat= INV(X`* Vinv * X)*X` *Vinv* ph; print 'beta_hat' bhat; quit; ********************************************************** Output: #TEM1003 Elements of V 0.038 0.0085951 0.0024625 0.0007055 0.0002021 0.0085951 0.038 0.0085951 0.0024625 0.0007055 0.0024625 0.0085951 0.038 0.0085951 0.0024625 0.0007055 0.0024625 0.0085951 0.038 0.0085951 0.0002021 0.0007055 0.0024625 0.0085951 0.038 BHAT beta_hat 4.2583374 0.0953619 0.0250555 -0.00767 -0.002787 -0.000988 ***************************************************************** EXERCISE 5: **************************************************************** /* CODE FOR THE NEAREST NEIGHBOR PLOT (LIKE IN LAB 1)*/ data prob3; do row=1 to 5; do col=1 to 5; input datum @; output; end; end; cards; 1 6 12 14 21 7 10 15 18 28 11 15 21 23 29 16 19 23 11 37 20 25 27 33 39 ; * use outpair and outdist options to get nearest neighbor data; proc variogram data=prob3 outpair=nnd1pair; var datum; coordinates xcoord=col ycoord=row; compute novariogram outpdist=1; run; data nnd1pair; set nnd1pair; v1=v1+19.24; * put back overall mean removed by proc variogram; v2=v2+19.24; ; proc gplot; plot v1*v2; run; quit; *********************************************************************