#These functions are for estimating an ARMA(p,q) time series model in R using ranks
#Code written by Yi Wu, Department of Statistics, Northwestern University (yi-wu@northwestern.edu)
#Work supervised by Beth Andrews, Department of Statistics, Northwestern University
#Reference: Rank-based estimation for autoregressive moving average time series models, Journal of Time Series Analysis 29 (2008) 51--73, by Beth Andrews

#Requirement: "timesac" package
#which is available from http://cran.r-project.org/
#For example:
install.packages("timsac")
library(timsac)

#Vector of observed time series values is labeled "ts"
#Here we use a simulated ARMA(p,q) series
#for example: ts[t] = -.75ts[t-1] + z[t] + .5z[t-1]
ts<-arima.sim(list(order=c(1,0,1),ar=c(-.75),ma=c(.5)),n=1000)
#Subtract mean
m<-mean(ts)
cat("Mean for observed time series :",m, "\n")
x<-ts-m
#Plot mean-corrected time series 
ts.plot(x)
#Sample ACF and PACF functions
acf(x)
pacf(x)


#use AIC criterion to find appropriate p,q
y<-autoarmafit(x)
p<-y$best.order$arorder
q<-y$best.order$maorder

#---------------------------------------------------------------------------------
#Estimate alpha, the vector of ARMA(p,q) model parameters via rank estimation:
#---------------------------------------------------------------------------------
#Auxiliary functions:
#---------------------------------------------------------------------------------
#Lambda specifies the weight function used. Here we have Wilcoxon as default, van 
#der Waerden, LAD-like weights as options.
 
lambda<-function(x) {
#Wilcoxon weight function
c<-x-.5
#------------------------------------------------
#van der Waerden weight function
#c<-qnorm(x)
#-------------------------------------------------
#LAD-like weight function
#if(x<=.5) {
#c<--1
#}
#else
#c<-1
#--------------------------------------------------
return(c)
}

#--------------------------------------------------------------------------------------

#armaInit finds candidate values for alpha, the Durbin-Levinson algorithm is used.
armaInit<-function(p,q) {
guess<-matrix(0,1,p+q)
 if(p>0) {
pacf=runif(p)
pacf=2*pacf-1
dlc<-matrix(rep(0),nrow=p,ncol=p)
dlc[1,1]=pacf[1]
if(p>1){
for (i in  2:p){
dlc[i,i]=pacf[i]
 for (ii in 1:(i-1)) {
dlc[i,ii]=dlc[(i-1),ii]-pacf[i]*dlc[(i-1),(i-ii)]
 }
}
}
guess[1:p]<-dlc[p,1:p]
}

if(q>0) {
pacf=runif(q)
pacf=2*pacf-1
dlc<-matrix(rep(0),nrow=q,ncol=q)
dlc[1,1]=pacf[1]
if(q>1){
for (i in  2:q){
dlc[i,i]=pacf[i]
 for (ii in 1:(i-1)) {
dlc[i,ii]=dlc[(i-1),ii]-pacf[i]*dlc[(i-1),(i-ii)]
 }
}
}
guess[(p+1):(p+q)]<-(-1)*dlc[q,1:q]
}
return(guess)
}


#----------------------------------------------------------------------------------
#Df returns the value of D for a candidate parameter vector alpha

Df<-function(alpha,n,x,p,q) {
z<-array(0,dim=c(1,(n+p+q)))
if (p==0) {
 for (t in (q+1):(n+q)) {
   z[t]<-x[(t-q)]-alpha%*%rev(z[(t-q):(t-1)])
 }
z<-z[(q+1):(n+q)]
}

if (q==0) {
 for (t in (p+1):n) {
   z[t]<-x[t]-alpha%*%rev(x[(t-p):(t-1)])
 }
}

if(p>0 && q>0) {
if (p>=q) {
 for (t in (p+1):n) {
   z[t]<-x[t]-alpha[1:p]%*%rev(x[(t-p):(t-1)])-alpha[(p+1):(p+q)]%*%rev(z[(t-q):(t-1)])
 }
}

if (q>p) {
 for (t in (n+1):(n+q-p)) {
    z[t]<-0
}
for (t in (q+1):(n+q-p)) {
   z[t]<-x[t-q+p]-alpha[1:p]%*%rev(x[(t-q):(t-1-q+p)])-alpha[(p+1):(p+q)]%*%rev(z[(t-q):(t-1)])
 }
z<-z[(q-p+1):(n+q-p)]
}
}
z<-z[(p+1):n]
zs<-sort(z-mean(z))

D<-0
for (t in 1:(n-p)) {
D<-D+lambda(t/(n-p+1))*zs[t]
}
#Check for model causality and invertibility.
if (p>0) { 
d<-polyroot(c(1,(-1)*alpha[1:p]))
for(i in 1:p) {
if(abs(d[i])<=1){
D<-D+1000
}
}
}
if (q>0) {
f<-polyroot(c(1,alpha[(p+1):(p+q)]))
for(i in 1:q) {
if(abs(f[i])<=1){
D<-D+1000
}
}
}
return(D)
}

#---------------------------------------------------------------------------------
#End of auxiliary functions
#---------------------------------------------------------------------------------

#Initialize a 100*(p+q+1) matrix and fill it with 100 candidate parameter vectors 
#and their D values 

initial<-matrix(rep(0),nrow=100,ncol=p+q+1)
for (ii in 1:100) {
guess=armaInit(p,q)
initial[ii,1:(p+q)]<-guess
D<-Df(guess,length(x),x,p,q)
initial[ii,(p+q+1)]<-D
}


#Return initial matrix ordered by D value, and pick the parameter estimate with the 
#smallest D value

initial<-initial[order(initial[,(p+q+1)]),]
malpha<-initial[1,1:(p+q)]
mD<-initial[1,(p+q+1)]

# Return our rank-based estimate.

for (ii in 1:5) {
alpha<-initial[ii,1:(p+q)]
alpha<-optim(par=alpha,fn=Df,n=length(x),x=x,p=p,q=q)
D<-alpha$value
if (D<mD) {
mD<-D
malpha<-alpha$par
}
}
cat("ARMA model parameters:",malpha,"with p:",p,"q:",q,"and D value:", mD, "\n")

#-----------------------------------------------------------------------------------------------------------------
#Estimate the covariance matrix for the parameter estimates and 95% confidence intervals for the parameter values
#-----------------------------------------------------------------------------------------------------------------
#Auxiliary functions:
#---------------------------------------------------------------------------------
#Innov returns calculated residuals
innov<-function(alpha,n,x,p,q) {
z<-array(0,dim=c(1,(n+p+q)))
if (p==0) {
 for (t in (q+1):(n+q)) {
   z[t]<-x[(t-q)]-alpha%*%rev(z[(t-q):(t-1)])
 }
z<-z[(q+1):(n+q)]
}

if (q==0) {
 for (t in (p+1):n) {
   z[t]<-x[t]-alpha%*%rev(x[(t-p):(t-1)])
 }
}

if(p>0 && q>0) {
if (p>=q) {
 for (t in (p+1):n) {
   z[t]<-x[t]-alpha[1:p]%*%rev(x[(t-p):(t-1)])-alpha[(p+1):(p+q)]%*%rev(z[(t-q):(t-1)])
 }
}

if (q>p) {
 for (t in (n+1):(n+q-p)) {
    z[t]<-0
}
for (t in (q+1):(n+q-p)) {
   z[t]<-x[t-q+p]-alpha[1:p]%*%rev(x[(t-q):(t-1-q+p)])-alpha[(p+1):(p+q)]%*%rev(z[(t-q):(t-1)])
 }
z<-z[(q-p+1):(n+q-p)]
}
}
z<-z[(p+1):n]
return(z)
}

#---------------------------------------------------------------------------------
#Lambdasq returns the squared weight function.
lambdasq<-function(x){
c<-(lambda(x))^2
return(c)
}
#---------------------------------------------------------------------------------
#End of auxiliary functions
#---------------------------------------------------------------------------------

n<-length(x)
z<-innov(malpha,n,x,p,q)
#Calculate J
a<-integrate(lambdasq,0,1)$value
b<-integrate(lambda,0,1)$value 
J<-a-b^2

#Calculate estimated sigma
sigma<-sqrt(t(z)%*%z/n)

#Calculate estimated K
zs<-sort(z)
IQR<-quantile(z,.75)-quantile(z,.25)
h<-.9*n^(-.2)*min(sigma, IQR/1.34)
K<-0
for (t in 1:(n-p)) {
a<-dnorm((z-zs[t])/h)
K<-K+sum(a)/n/h*(lambda(t/(n-p))-lambda((t-1)/(n-p)))
}

#Calculate Gamma
#Calculate the covariance matrix for U
Umatrix<-0
if (p>0){
g <- malpha[1:p]
Ucoeff<-ARMAtoMA(ar=g,lag.max=(100+max(p,q)))
Ucoeff<-c(1,t(Ucoeff))
Uacf<-ARMAacf(ar=g,lag.max=p)
Ugamma0 <- t(Ucoeff)%*%Ucoeff
Uacvf <- Ugamma0*Uacf
Umatrix <- matrix(nrow=p,ncol=p)
for (i in 1:p) {
     for (j in 1:p ){
     Umatrix[i,j]<-Uacvf[abs(i-j)+1]
}
}
}

#Calculate the covariance matrix for V
Vmatrix<-0
if(q>0){
h<-(-1)*malpha[(p+1):(p+q)]
Vcoeff<-ARMAtoMA(ar=h,lag.max=(100+max(p,q)))
Vcoeff<-c(1,t(Vcoeff))
Vacf<-ARMAacf(ar=h,lag.max=q)
Vgamma0<-t(Vcoeff)%*%Vcoeff;
Vacvf<-Vgamma0*Vacf
Vmatrix<-matrix(nrow=q,ncol=q)
for (i in 1:q) {
     for (j in 1:q ){
     Vmatrix[i,j]<-Vacvf[abs(i-j)+1]
}
}
}

#Calculate the covariance matrix for U,V
if (p>0 && q>0) {
UVacvf<-matrix(rep(0),nrow=(p+q-1),ncol=1)
for (h in 1:(p+q-1)){
     for (i in max(1,(q+1-h)):max(100,(100+q-h))){
     UVacvf[h]<-UVacvf[h]+Ucoeff[i]*Vcoeff[i+h-q]
}
}

UVmatrix<-matrix(rep(0),nrow=p,ncol=q)
for (i in 1:p) {
     for (j in 1:q ){
     UVmatrix[i,j]<-UVacvf[i-j+q]
}
}

Gamma<-cbind(rbind(Umatrix,t(UVmatrix)),rbind(UVmatrix,Vmatrix))
}

if (p==0){
Gamma<-Vmatrix
}

if (q==0) {
Gamma<-Umatrix
}

inv<-solve(Gamma)
b<-J*(K*sigma)^(-2)
b<-c(b)
covmatrix<-b*inv

#Display 95% confidence intervals for model parameters:
for (i in 1:(p+q)){
c<-covmatrix[i,i]
d<-sqrt(c/n)*1.96
right<-malpha[i]+d
left<-malpha[i]-d
cat("parameter estimate:", malpha[i],"and 95% confidence interval:" , left , "to", right, "\n")
}

#Residual analysis:
#Residual plot
ts.plot(z)
#Sample ACFs of residuals, absolute value of residuals, and squared residuals
acf(z)
acf(abs(z))
acf(z*z)
#Identify noise distribution so an optimal weight function could be used.
qqnorm(z)          #Normal qq-plot of residuals
hist(z)            #histogram of residuals
plot(density(z))   #kernel density estimate