; ; Uses regional age-banded MXD time series to predict NH or ALL temperatures, ; with calibration done using principal component regression, and independent ; validation. ; Should run bandreg2nh_youngold_fixed.pro in treeharry/banding to get the ; regional MXD series based on 20-550yr trees, with regional weighting, and ; at least 2 trees per band. ; There are a number of different PCR models that can be built, depending ; on which regions have data. ; ; It also computes the standard error ranges for the particular model being ; used, and for a given 'fixed grid'. For each model use the following ; fixed grids (also says whether to interpolate between the errors given ; by the various fixed grids, or to use a constant error model): ; ; Model Period used Fixed grid to use ; 0 1683-1981 1950 1800 1700 (1683-99 con, 1700-1900 int, 1901-81 con) ; 1 1667-1682 1700 (con) ; 2 1625-1666 1700 (con) ; 3 1602-1624 1700 (con) ; 4 1588-1603 1600 (con) ; 5 1480-1587 1600 1500 (1480-99 con, 1500-1587 int) ; 6 1402-1479 1500 1400 (1402-1479 int) ; 7 1982-1988 1950 (con) ; 8 1989-1991 1988 (con) ; 9 1992-1994 1988 (con) ; ; The use of fixed grids and multiple models is quite complex. The full grid ; needs to be used to define the EOFs that are used as predictors. These are ; then used to generate a full-grid reconstruction for that PCR model. The ; fixed grid data is then projected onto these EOFs and a new PCR model is ; built. This new model is not used for any reconstruction, but the ; calibration statistics are used to estimate errors around the full-grid ; reconstruction for this period. ; ; ***DECIDE WHETHER TO USE THE NSIB AGE-BANDED OR THE NSIB HUGERSHOFF RECORD ; ***BECAUSE THE FORMER HAS LARGE ANOMALIES! ; dohug=0 ; 0=age-banded NSIB 1=hugershoff NSIB ; ; Define periods for calibration ; if n_elements(iper) eq 0 then iper=0 ; 0=full 1=1667-1682 2=1625-1666 3=1604-1624 4=1588-1603 ; 5=1480-1587 6=1402-1479 ; 7=1982-1988 8=1989-1991 9=1992-1994 if n_elements(thalf) eq 0 then thalf=10. if n_elements(dosmooth) eq 0 then dosmooth=0 ; 0=compute errors for interannual data 1=for smoothed if n_elements(donh) eq 0 then donh=1 ; 0=do NH 1=do ALL if n_elements(fyr) eq 0 then begin case iper of 0: fyr=1950 ; 1950, 1800, 1700 1: fyr=1700 2: fyr=1700 3: fyr=1700 4: fyr=1600 5: fyr=1600 ; 1600, 1500 6: fyr=1500 ; 1500, 1400 7: fyr=1950 8: fyr=1988 9: fyr=1988 endcase endif fst=string(fyr,format='(I4)') ; doprint=1 ; 0=no output 1=extra output nret=9 ; max number of PCs to compute (should be all) nuse=6 ; max number of PCs to show and use in PCR pcaper=[1700,1960] ; PCA analysis period calper=[1906,1960] ; calibration period verper=[1871,1905] ; verification period ; IF CHANGING DOFINAL to 0, CHANGE BACK & RERUN AFTERWARDS!!!! dofinal=0 ; 0=use calper & verper finalper=[1881,1960] ; 1=happy with model, so re-calibrate using *same* ; predictors but a standard period ; ; Now, if required, get the Hugershoff NSIB series ; if dohug eq 1 then begin ; Get the full grid, full time version restore,'regts_mxd_fixed1950.idlsave' ; Gets many things including regname, timey, bestmxd insib=where(regname eq 'NSIB') hugts=reform(bestmxd(*,insib)) hugyr=timey ; Get the fixed grid, full time version restore,'regts_mxd_fixed'+fst+'.idlsave' ; Gets many things including regname, timey, bestmxd hugfixts=reform(bestmxd(*,insib)) endif ; if donh eq 0 then nhtit='NH' else nhtit='ALL' pertit=string(iper,format='(I1)') ; ; Get the hemispheric temperature series ; restore,filename='datastore/compbest_mxd_fixed1950.idlsave' if donh eq 0 then nhtemp=reform(comptemp(*,3)) $ else nhtemp=reform(comptemp(*,2)) ; Get rid of pre-1871 temperatures knh=where(timey lt 1871) nhtemp(knh)=!values.f_nan tempyr=timey ; ; Get the age-banded regional MXD series for the required fixed grid ; if fyr eq 1950 then fst='' restore,filename='/cru/u2/f055/treeharry/banding/bandregnh_fixed'+fst+'.idlsave' ; Gets: timey,nyr,nhts,allts,alleps,nreg,regname fixts=allts ; ; Get all the age-banded MXD series (regions back to 1400) ; restore,filename='/cru/u2/f055/treeharry/banding/bandregnh_fixed.idlsave' ; Gets: timey,nyr,nhts,allts,alleps,nreg,regname yrmxd=timey regname=['NEUR','SEUR','NSIB','ESIB','CAS','TIBP','WNA','NWNA','ECCA'] ; ; Optionally replace NSIB with Hugershoff version. But since we're using ; covariance matrix PCA, we need the variance of it to be correct, so match ; it to the age-banded version being replaced over the period 1700-1994. ; In fact, the age-banded series were normalised over that period, so we ; just re-normalise the new series. ; if dohug eq 1 then begin insib=where(regname eq 'NSIB') onets=reform(fixts(insib,*)) mknormal,onets,timey,refperiod=[1700,1994],refmean=rfm,refsd=rfs print,regname(insib),rfm,rfs onets=reform(allts(insib,*)) mknormal,onets,timey,refperiod=[1700,1994],refmean=rfm,refsd=rfs print,regname(insib),rfm,rfs mknormal,hugfixts,hugyr,refperiod=[1700,1994] mknormal,hugts,hugyr,refperiod=[1700,1994] ; The hugershoff is 1400-1994, but the age-banded is 1400-1995, so: fixts(insib,*)=[hugfixts,!values.f_nan] allts(insib,*)=[hugts,!values.f_nan] endif ; ; Remove the non-contiguous bits of the series, by identifying the ; latest missing value before 1700, and removing all values prior ; to this. ; for ireg = 0 , nreg-1 do begin kkk=where(yrmxd lt 1700) onets=reform(allts(ireg,*)) ml=where(finite(onets(kkk)) eq 0,nmiss) if nmiss gt 0 then begin lastmiss=ml(nmiss-1) allts(ireg,0:lastmiss)=!values.f_nan fixts(ireg,0:lastmiss)=!values.f_nan endif endfor ; ; Prepare for plotting ; loadct,39 multi_plot,nrow=4,ncol=1,layout='large' if !d.name eq 'X' then begin if doprint eq 1 then window,ysize=850 !p.font=-1 endif else begin !p.font=0 device,/helvetica,/bold,font_size=12 endelse !y.omargin=[4,0] def_1color,10,color='blue' def_1color,11,color='red' def_1color,12,color='vlgrey' ; ; Optionally remove TIBP and CAS ; ;iuse=[0,1,2,3,6,7,8] ;nreg=n_elements(iuse) ;nret=nreg ;nuse=nuse < nret ;regname=regname(iuse) ;allts=allts(iuse,*) ; case iper of 0: iuse=indgen(nreg) 1: iuse=[0,1,2,3,4,6,7,8] 2: iuse=[0,1,2,3,4,6,7] 3: iuse=[0,1,2,3,4,6] 4: iuse=[0,2,3,6] 5: iuse=[2,3,6] 6: iuse=[2,3] 7: iuse=[0,2,3,4,5,8] 8: iuse=[0,2,3,4,5] 9: iuse=[3,4,5] endcase nreg=n_elements(iuse) nret=nreg nuse=nuse < nret regname=regname(iuse) allts=allts(iuse,*) fixts=fixts(iuse,*) print,'Model number',iper print,'Regions',nreg,regname print,'Fixed grid',fyr print,'Region',nhtit print,'Smoothing?',dosmooth,thalf print,'NSIB: 0=age 1=hug',dohug ; ; Now compute leading PCs of the group of time series ; kpca=where( (yrmxd ge pcaper(0)) and (yrmxd le pcaper(1)) , nyrpca) pcayr=yrmxd(kpca) data1=allts(*,kpca) myeof1d,data1,ev,ea,lam,lampct,lamcum,nretain=nret ; if doprint eq 1 then begin plot,findgen(nret)+1.,lam,xtitle='Mode',$ ytitle='Eigenvalue',$ title='Covariance matrix PCA of!Cregional age-banded MXD' ; plot,findgen(nret)+1.,alog(lam),xtitle='Mode',$ ytitle='Log of eigenvalue' ; plot,findgen(nret+1),[!values.f_nan,lampct],xtitle='Mode',$ ytitle='Explained variance (%)',psym=10 ; plot,findgen(nret+1),[0.,lamcum],xtitle='Mode',$ ytitle='Cumulative explained variance (%)',psym=10,yrange=[0,100] endif ; ; Plot the selected ones ; nret=nuse for iret = 0 , nret-1 do begin ; ; Adjust EOF and PC to give mean loading as positive ; oneev=reform(ev(*,iret)) onetot=total(oneev,/nan) if onetot lt 0. then begin ev(*,iret)=-ev(*,iret) ea(*,iret)=-ea(*,iret) endif ; if doprint eq 1 then begin pause onets=ea(*,iret) filter_cru,thalf,/nan,tsin=onets,tslow=onelow plot,pcayr,onets,$ /xstyle,xtitle='Year',$ /ystyle,ytitle='PC time series',yrange=[-4.5,4],$ title='Mode #'+string(iret+1,format='(I2)') oplot,!x.crange,[0.,0.],linestyle=1 oplot,pcayr,onelow,thick=3 ; if ((iret mod 3) eq 2) or (iret eq nret-1) then begin !p.multi=[3,3,4,0,0] subind=iret mod 3 for jret = iret-subind , iret do begin onelf=ev(*,jret) onex=findgen(nreg) cpl_barts,onex,onelf,outline=-1,bar_color=12,$ /xstyle,xtickformat='nolabels',$ /ystyle,yrange=[-1,1],ytitle='Loading',$ title='Mode #'+string(jret+1,format='(I2)') for ireg = 0 , nreg-1 do begin xyouts,onex(ireg)+0.25,-1.05,align=1.,orient=90.,regname(ireg) endfor endfor !p.multi=[0,1,4,0,0] endif endif ; endfor ; ; Now extract the PCA modes to provide to the step-wise screening regression ; ev=ev(*,0:nret-1) ea=ea(*,0:nret-1) ; ; Now recompute the PC time series using all the data (gives longer series ; and ones that haven't had a specific long-term mean removed). ; Also do it for the fixed grid regional series. ; allea=fltarr(nyr,nret) fixea=fltarr(nyr,nret) missea=fltarr(nyr,nret) allea(*,*)=!values.f_nan fixea(*,*)=!values.f_nan missea(*,*)=!values.f_nan for iyr = 0 , nyr-1 do begin for iret = 0 , nret-1 do begin onedat=allts(*,iyr)*ev(*,iret) allea(iyr,iret)=total(onedat,/nan) missea(iyr,iret)=total(onedat) onedat=fixts(*,iyr)*ev(*,iret) fixea(iyr,iret)=total(onedat) endfor endfor ea=missea ; kcal=where( (yrmxd ge calper(0)) and (yrmxd le calper(1)) , nyrcal) indts=transpose(ea(kcal,*)) indfixts=transpose(fixea(kcal,*)) ktem=where( (tempyr ge calper(0)) and (tempyr le calper(1)) , nyrtemp) if nyrtemp ne nyrcal then message,'Ooops!!!' depts=nhtemp(ktem) wts=fltarr(nyrcal)+1. ltm=total(depts)/float(nyrtemp) vn='PC#'+string(indgen(nret)+1,format='(I1)') ; ; Now perform the regression ; ;print,'---------------------------------------------------------------' ;print,'REGRESSION USING ALL RETAINED PCs:',nret regression,indts,depts,wts,acoeff,bcoeff,yresid,ypred,bsigma,fvalue,$ indr,varname=vn,noprint=1-doprint ;print ;print,'Individual correlations' ;print,indr,format='(9F7.2)' ;print,'---------------------------------------------------------------' ;print print,'---------------------------------------------------------------' print,'STEPWISE REGRESSION' ineq=!values.f_nan ; do not force any variables into regression! stepwise,indts,depts,ineq,0.05,0.05,varnames=vn ;,/report print,'---------------------------------------------------------------' ; if dofinal eq 1 then begin calper=finalper kcal=where( (yrmxd ge calper(0)) and (yrmxd le calper(1)) , nyrcal) indts=transpose(ea(kcal,*)) indfixts=transpose(fixea(kcal,*)) ktem=where( (tempyr ge calper(0)) and (tempyr le calper(1)) , nyrtemp) if nyrtemp ne nyrcal then message,'Ooops!!!' depts=nhtemp(ktem) wts=fltarr(nyrcal)+1. ltm=total(depts)/float(nyrtemp) endif ; ; Now redo the regression using just the selected predictors and also ; compute the calibration statistics ; indts=indts(ineq,*) mkcalibratemulti,indts,depts,fitcoeff=fitcoeff,autocoeff=autocoeff bcoeff=fitcoeff.beta acoeff=fitcoeff.alpha ; ; Now redo the regression using just the selected predictors and the ; PC timeseries based on the fixed-grid regional series to compute the ; calibration statistics for error bands ; indfixts=indfixts(ineq,*) mkcalibratemulti,indfixts,depts,fitcoeff=fitfix,autocoeff=autofix print,'Fixed grid calibration statistics' print,' r alpha SEalpha sig rmse' print,fitfix.r,fitfix.alpha,fitfix.sealpha,fitfix.sig,fitfix.rms,$ format='(F6.2,2F8.4,F7.2,F8.4)' print,'beta(s) then SEbeta(s)' print,fitfix.beta,fitfix.sebeta,format='(8F8.4)' print,' ess a_resid a_tem a_mxd...' print,autofix.ess,autofix.ac1resid,autofix.ac1pand,autofix.ac1por,$ format='(F8.1,6F8.2)' ; ; Scale up the standard errors of the regression coefficients if there ; is autocorrelation in the residuals ; aresid=autofix.ac1resid serialfac=(1.+abs(aresid))/(1.-abs(aresid)) fitfix.sebeta=fitfix.sebeta*serialfac print,'SEbeta(s) scaled up for autocorrelation' print,fitfix.sebeta,format='(4F8.4)' ; ; Now compute the standard errors for this (full-grid) reconstruction. ; SE due to uncertainty in (a) slope(s) and (b) intercept (ZERO HERE!!), ; and (c) due to unexplained variance ; useea=missea(*,ineq) npred=n_elements(ineq) allerr=fltarr(nyr,npred) if dosmooth ne 0 then begin ; filter the predictor(s) and project the slope errors for ipred = 0 , npred-1 do begin ; To correctly compute errors, need predictors as anomalies from calper! onepred=reform(useea(*,ipred)) mkanomaly,onepred,yrmxd,refperiod=calper,refmean=predmean print,'Predictor #',ipred,' mean over cal. per. is',predmean filter_cru,/nan,thalf,tsin=onepred,tslow=lowpred,weight=filwt allerr(*,ipred)=lowpred*fitfix.sebeta(ipred) endfor nwt=n_elements(filwt) allwt=[reverse(filwt(1:nwt-1)),filwt] varfac=serialfac*sqrt(total(allwt^2)) < 1. ; don't let errors get bigger ; print,'Unexplained variance scaled (due to smoothing) by',varfac se_c=fitfix.rms*varfac endif else begin ; project the slope errors for ipred = 0 , npred-1 do begin ; To correctly compute errors, need predictors as anomalies from calper! onepred=reform(useea(*,ipred)) mkanomaly,onepred,yrmxd,refperiod=calper,refmean=predmean print,'Predictor #',ipred,' mean over cal. per. is',predmean allerr(*,ipred)=onepred*fitfix.sebeta(ipred) endfor se_c=fitfix.rms endelse if npred eq 1 then se_a=reform(allerr) else se_a=sqrt(total(allerr^2,2)) ;if npred eq 1 then se_a=reform(allerr) else se_a=total(allerr,2)/float(npred) ;print,'TESTING:' ;print,se_a(500),reform(allerr(500,*)),sqrt(total(allerr(500,*)^2)) se_b=0. predserr=sqrt( se_a^2 + se_b^2 + se_c^2 ) ; ; Now apply the regression to the full period with data (any missing, ; means no prediction!). ; prednh=fltarr(nyr) prednh(*)=!values.f_nan useea=missea(*,ineq) for iyr = 0 , nyr-1 do begin prednh(iyr)=total(useea(iyr,*)*bcoeff(*))+acoeff endfor ; ; Now verify it over the verification period ; Both verification and calibration % var expl. remove the temperature long ; term mean from the calibration period! ; kmxd=where( (yrmxd ge verper(0)) and (yrmxd le verper(1)) , nyrmxd) ktem=where( (tempyr ge verper(0)) and (tempyr le verper(1)) , nyrtemp) if nyrtemp ne nyrmxd then message,'Ooops!!!' print print,'Verification over:',verper print,mkcorrelation(prednh(kmxd),nhtemp(ktem),filter=thalf) sqvfull=total((nhtemp(ktem)-ltm)^2) sqvresid=total( (nhtemp(ktem)-prednh(kmxd))^2 ) print,'RE=',1.-sqvresid/sqvfull verltm=total(nhtemp(ktem))/float(nyrtemp) sqvfull=total((nhtemp(ktem)-verltm)^2) ;print,'CE=',1.-(sqvresid/sqvfull) ; kmxd=where( (yrmxd ge calper(0)) and (yrmxd le calper(1)) , nyrmxd) ktem=where( (tempyr ge calper(0)) and (tempyr le calper(1)) , nyrtemp) if nyrtemp ne nyrmxd then message,'Ooops!!!' ;print print,'Calibration over:',calper print,mkcorrelation(prednh(kmxd),nhtemp(ktem),filter=thalf) sqvfull=total((nhtemp(ktem)-ltm)^2) sqvresid=total( (nhtemp(ktem)-prednh(kmxd))^2 ) print,'RE=',1.-sqvresid/sqvfull verltm=total(nhtemp(ktem))/float(nyrtemp) sqvfull=total((nhtemp(ktem)-verltm)^2) ;print,'CE=',1.-sqvresid/sqvfull ; ; Now apply the regression to the full period with data (any missing, ; set predictor to zero and carry on!). ; fullnh=fltarr(nyr) fullnh(*)=!values.f_nan useea=allea(*,ineq) for iyr = 0 , nyr-1 do begin fullnh(iyr)=total(useea(iyr,*)*bcoeff(*))+acoeff endfor ; ; Only store results if dofinal is set ; if dofinal ne 0 then begin ; fst=string(fyr,format='(I4)') if dosmooth eq 1 then smst='sm'+string(thalf,format='(I2.2)') else smst='' if dohug eq 1 then ahug='_NSIBhug' else ahug='' save,$ filename='bandtemp'+nhtit+pertit+'_fixed'+fst+smst+'_calpcr'+ahug+'.idlsave',$ yrmxd,prednh,nhtit,nyr,fullnh,predserr,tempyr,nhtemp ; endif ; end