#
#  Ordinal: A Library of Ordinal Models
#  Copyright (C) 1998, 1999, 2000, 2001 P.J. Lindsey
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public Licence as published by
#  the Free Software Foundation; either version 2 of the Licence, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public Licence for more details.
#
#  You should have received a copy of the GNU General Public Licence
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  SYNOPSIS
#
#  kalord(response,times=NULL,distribution="multinomial",
#         depend="independence",mu=NULL,ccov=NULL,tvcov=NULL,torder=0,
#         interaction=NULL,preg=NULL,ptvc=NULL,pinitial=1,pdepend=NULL,
#         envir=sys.frame(sys.parent()),optimize=T,print.level=0,
#         ndigit=10,gradtol=0.00001,steptol=0.00001,fscale=1,
#         iterlim=100,typsiz=abs(p),stepmax=10*sqrt(p%*%p))
#
#  DESCRIPTION
#
#    A function to fit various parameterization of the logistic
#  distribution inserted into a Pareto distribution with serial
#  dependence or gamma frailties using Kalman-type update for
#  ordinal longitudinal data.
#

kalord <- function(response,times=NULL,distribution="multinomial",depend="independence",mu=NULL,ccov=NULL,tvcov=NULL,torder=0,interaction=NULL,preg=NULL,
                   ptvc=NULL,pinitial=1,pdepend=NULL,envir=sys.frame(sys.parent()),optimize=T,print.level=0,ndigit=10,gradtol=0.00001,steptol=0.00001,
                   fscale=1,iterlim=100,typsiz=abs(p),stepmax=10*sqrt(p%*%p)) {
  series <- function(p) {
    if(rf)
      b <- mu1(p[-(1:(nc-1))])
    z <- .C("kord",
            p=as.double(p),
            y=as.integer(y),
            t=as.double(times),
            x=as.double(resp$ccov$ccov),
            nind=as.integer(nind),
            nobs=as.integer(nobs),
            nbs=as.integer(n),
            nc=as.integer(nc),
            nccov=as.integer(nccov),
            npv=as.integer(npv),
            init=as.integer(init),
            model=as.integer(mdl),
            dep=as.integer(dep),
            torder=as.integer(torder),
            inter=as.integer(interaction),
            tvc=as.integer(tvc),
            tvcov=as.double(resp$tvcov$tvcov),
            fit=as.integer(0),
            pred=double(n),
            rpred=double(n),
            cprob=double(n*(nc-1)),
            ppred=double(n),
            cpred=double(n*(nc-1)),
            creg=double(n*(nc-1)),
            rf=as.integer(rf),
            bbb=as.double(b),
            beta=double(nc-1),
            bt=double(nc-1),
            bt2=double(nc-1),
            bet=double(nc-1),
            cum=double(nc-1),
            like=double(1),
            DUP=F)
    z$like
  }
  call <- sys.call()
  tmp <- c("binary","multinomial","continuation-ratio","proportional-odds")
  mdl <- match(distribution <- match.arg(distribution,tmp),tmp)
  tmp <- c("independence","Markov","serial","frailty")
  dep <- match(depend <- match.arg(depend,tmp),tmp)-1
  rf <- !is.null(mu)
  respenv <- exists(paste(deparse(substitute(response))))&&inherits(response,"repeated")&&!inherits(envir,"repeated")
  if(respenv) {
    if(dim(response$response$y)[2]>1)
      stop("kalseries only handles univariate responses")
    if(!is.null(response$NAs)&&any(response$NAs))
      stop("kalseries does not handle data with NAs")
  }
  envname <- if(respenv) paste(deparse(substitute(response))) else NULL
  if(!respenv&&inherits(envir,"repeated")) {
    if(!is.null(envir$NAs)&&any(envir$NAs))
      stop("kalseries does not handle data with NAs")
    cn <- paste(deparse(substitute(response)))
    if(length(cn)>1)
      stop("only one response variable allowed")
    response <- envir
    if(dim(response$response$y)[2]>1) {
      col <- match(cn,colnames(response$response$y))
      if(is.na(col))
        stop("response variable not found")
      response$response$y <- response$response$y[,col,drop=F]
    }
  }
  if(respenv||inherits(envir,"repeated")) {
    if(!rf) {
      resp <- response
      if(is.null(ccov))
        resp$ccov <- NULL
      else
        if(inherits(ccov,"formula")) {
          tmp <- wr(ccov,data=response,expand=F)$design
          if(!is.matrix(tmp))
            stop("ccov covariate(s) not found")
          resp$ccov$ccov <- tmp[,-1,drop=F]
          rm(tmp)
        }
        else
          stop("ccov must be a W&R formula")
      if(is.null(tvcov))
        resp$tvcov <- NULL
      else
        if(inherits(tvcov,"formula")) {
          tmp <- wr(tvcov,data=response)$design
          if(!is.matrix(tmp))
            stop("tvcov covariate(s) not found")
          resp$tvcov$tvcov <- tmp[,-1,drop=F]
          rm(tmp)
        }
        else
          stop("tvcov must be a W&R formula")
    }
    else
      resp <- rmna(response$response)
    nccov <- if(rf||is.null(resp$ccov$ccov)) 0 else dim(resp$ccov$ccov)[2]
    ttvc <- if(rf||is.null(resp$tvcov$tvcov)) 0 else dim(resp$tvcov$tvcov)[2]
  }
  else {
    if(inherits(response,"response"))
      resp <- response
    else
      resp <- restovec(response,times)
    if(is.null(ccov))
      nccov <- 0
    else {
      if(!inherits(ccov,"tccov")) {
        ccname <- paste(deparse(substitute(ccov)))
        if((is.matrix(ccov)&&is.null(colnames(ccov)))) {
          ccname <- paste(deparse(substitute(ccov)))
          if(dim(ccov)[2]>1) {
            tmp <- NULL
            for(i in 1:dim(ccov)[2])
              tmp <- c(tmp,paste(ccname,i,sep=""))
            ccname <- tmp
          }
        }
        ccov <- tcctomat(ccov,names=ccname)
      }
      nccov <- if(rf) 0 else dim(ccov$ccov)[2]
    }
    if(is.null(tvcov))
      ttvc <- 0
    else {
      if(!inherits(tvcov,"tvcov")) {
        tvcname <- paste(deparse(substitute(tvcov)))
        if(is.list(tvcov)&&dim(tvcov[[1]])[2]>1) {
          if(is.null(colnames(tvcov[[1]]))) {
            tvcname <- paste(deparse(substitute(tvcov)))
            tmp <- NULL
            for(i in 1:dim(tvcov[[1]])[2])
              tmp <- c(tmp,paste(tvcname,i,sep=""))
            tvcname <- tmp
          }
          else
            tvcname <- colnames(tvcov[[1]])
        }
        tvcov <- tvctomat(tvcov,names=tvcname)
      }
      ttvc <- if(rf) 0 else dim(tvcov$tvcov)[2]
    }
    resp <- rmna(response=resp, tvcov=tvcov, ccov=ccov)
    if(!is.null(ccov))
      rm(ccov)
    if(!is.null(tvcov))
      rm(tvcov)
  }
  n <- length(resp$response$y)
  nc <- length(unique(resp$response$y))
  if(min(resp$response$y)!=0||max(resp$response$y)!=nc-1)
    stop(paste("Ordinal values must be numbered from 0 to ",nc-1,".",sep=""))
  if(nc<2)
    stop("The response must at least have 2 categories.")
  else
    if(nc==2) {
      mdl <- 1
      distribution <- "binary"
    }
    else
      if(distribution=="binary") {
        mdl <- 2
        distribution <- "multinomial"
      }
  nobs <- nobs(resp)
  nind <- length(nobs)
  if((inherits(envir,"repeated")&&(length(nobs)!=length(nobs(envir))||any(nobs!=nobs(envir))))||(inherits(envir,"tvcov")&&(length(nobs)!=length(envir$tvcov$nobs)||any(nobs!=envir$tvcov$nobs))))
    stop("response and envir objects are incompatible")
  mu3 <- NULL
  if(respenv||inherits(envir,"repeated")||inherits(envir,"tccov")||inherits(envir,"tvcov")) {
    type <- if(respenv||inherits(envir,"repeated")) "repeated" else if(inherits(envir,"tccov")) "tccov" else "tvcov"
    if(is.null(envname))
      envname <- paste(deparse(substitute(envir)))
    if(inherits(mu,"formula")) {
      mu3 <- if(respenv) finterp(mu,.envir=response,.name=envname,.intercept=F) else finterp(mu,.envir=envir,.name=envname,.intercept=F)
      class(mu) <- c(class(mu),type)
    }
    else
      if(is.function(mu)) {
        tmp <- parse(text=paste(deparse(mu))[-1])
        class(mu) <- type
        mu <- if(respenv) fnenvir(mu,.envir=response,.name=envname) else fnenvir(mu,.envir=envir,.name=envname)
        mu3 <- mu
        attr(mu3,"model") <- tmp
      }
  }
  if(!is.null(preg)) {
    if(distribution=="proportional-odds")
      if(length(unique(preg[1:(nc-1)]))!=(nc-1)) {
        preg[1:(nc-1)] <- 1:(nc-1)
        warning("Changing initial preg estimates for the intercepts as they must be different.")
      }
  }
  else
    if(!is.null(ptvc))
      if(distribution=="proportional-odds")
        if(length(unique(ptvc[1:(nc-1)]))!=(nc-1)) {
          ptvc[1:(nc-1)] <- 1:(nc-1)
          warning("Changing initial preg estimates for the intercepts as they must be different.")
        }
  npreg <- length(preg)
  mu1 <- b <- NULL
  if(inherits(mu,"formula")) {
    pr <- if(npreg>0) preg else ptvc
    npr <- length(pr)
    mu2 <- if(respenv) finterp(mu,.envir=response,.name=envname,.expand=is.null(preg),.intercept=F) else finterp(mu,.envir=envir,.name=envname,.expand=is.null(preg),.intercept=F)
    npt1 <- length(attr(mu2,"parameters"))+nc-1
    if(is.character(attr(mu2,"model"))) {
      if(length(attr(mu2,"model"))==0) {
        mu2 <- NULL
        rf <- F
      }
    }
    else {
      if(npr!=npt1) {
        cat("\nParameters are ")
        cat(attr(mu2,"parameters"),"\n")
        stop(paste("preg or ptvc should have",npt1,"estimates"))
      }
      if(is.list(pr)) {
        if(!is.null(names(pr))) {
          o <- match(attr(mu2,"parameters"),names(pr))
          pr <- unlist(pr)[o]
          if(sum(!is.na(o))!=length(pr))
            stop("invalid estimates for mu - probably wrong names")
        }
        else
          pr <- unlist(pr)
        if(npreg>0)
          preg <- pr
        else
          ptvc <- pr
      }
    }
    if(!is.null(mu2)) {
      if(inherits(envir,"tccov")) {
        cv <- covind(response)
        mu1 <- function(p) mu2(p)[cv]
        attributes(mu1) <- attributes(mu2)
      }
      else {
        mu1 <- mu2
        rm(mu2)
      }
    }
  }
  else
    if(is.function(mu))
      mu1 <- mu
  if(!is.null(mu1)&&is.null(attr(mu1,"parameters"))) {
    attributes(mu1) <- if(is.function(mu)) {
      if(!inherits(mu,"formulafn")) {
        if(respenv)
          attributes(fnenvir(mu,.envir=response))
        else
          attributes(fnenvir(mu,.envir=envir))
      }
      else
        attributes(mu)
    }
    else {
      if(respenv)
        attributes(fnenvir(mu1,.envir=response))
      else
        attributes(fnenvir(mu1,.envir=envir))
    }
  }
  nlp <- if(is.function(mu1)) length(attr(mu1,"parameters"))+nc-1 else if(is.null(mu1)) NULL else npt1
  if(!is.null(nlp)) {
    if(is.null(ptvc)&&nlp!=npreg)
      stop(paste("preg should have",nlp,"initial estimates"))
    else
      if(!is.null(ptvc)&&length(ptvc)!=nlp)
        stop(paste("ptvc should have",nlp,"initial estimates"))
  }
  if(rf&&!is.function(mu1))
    stop("mu must be a formula or function")
  tvc <- length(ptvc)
  init <- !is.null(pinitial)
  if(init)
    if(pinitial<=0)
      stop("Estimate of initial parameter must greater than 0")
    else
      pinitial <- log(pinitial)
  else
    if(depend!="independence")
      stop(paste("To introduce a ",depend," dependence, pinit must have an initial estimate.",sep=""))
  if(depend=="independence"||depend=="frailty")
    pdepend <- NULL
  else {
    if(is.null(pdepend))
      stop("An estimate of the dependence parameter must be supplied")
    else
      if(pdepend<=0||pdepend>=1)
        stop("Dependence parameter must be between 0 and 1")
      else
        pdepend <- log(pdepend/(1-pdepend))
  }
  if(is.null(resp$response$times)) {
    if(depend=="serial"||depend=="Markov")
      stop("No times. Serial and Markov dependence cannot be fitted.")
    ave <- times <- 0
  }
  else {
    ave <- mean(resp$response$times)
    times <- resp$response$times-ave
  }
  if(!is.null(interaction)) {
    if(length(interaction)!=nccov)
      stop(paste(nccov,"interactions with time must be specified"))
    else
      if(any(interaction>torder))
        stop(paste("Interactions can be at most of order ",torder))
  }
  else
    interaction <- rep(0,nccov)
  npv <- torder+sum(interaction)
  if(rf&&npreg>0)
    nccov <- npreg-nc+1
  if(!rf&&nc-1+nccov+npv!=npreg)
    stop(paste(nc-1+nccov+npv,"regression estimates must be supplied"))
  y <- resp$response$y
  if(any(y<0))
    stop("All responses must be non-negative.")
  if(!rf&&(ttvc>0&&tvc!=ttvc||ttvc==0&&tvc>0))
    stop(paste(ttvc,"initial estimates of coefficients for time-varying covariates must be supplied"))
  if(rf) {
    if(tvc>0&&nccov>0)
      stop("With a mean function or formula, initial estimates must be supplied either in preg or in ptvc")
    ow <- options("warn")
    options(warn=-1)
    if(tvc>0) {
      if(length(mu1(ptvc))!=n)
        stop("The mu function must provide an estimate for each observation")
      tvc <- tvc-nc+1
    }
    else
      if(length(mu1(preg))==1) {
        if(nccov==0)
          mu1 <- function(p) rep(p[1],nind)
        else
          stop("Number of estimates does not correspond to mu function")
      }
      else
        if(length(mu1(preg))==n) { # &&nind!=n
          if(inherits(mu,"formula"))
            mu1 <- if(respenv) finterp(mu,.envir=response,.name=envname,.intercept=F) else finterp(mu,.envir=envir,.name=envname,.intercept=F)
          ptvc <- preg
          preg <- NULL
          npreg <- nccov <- 0
          tvc <- length(ptvc)-nc+1
        }
        else
          if(length(mu1(preg))!=nind)
            stop("The mu function or formula must provide an estimate for each individual")
    options(ow)
  }
  np <- nc-1+nccov+npv+tvc+init+(depend=="serial"||depend=="Markov")
  p <- c(preg,ptvc,pinitial,pdepend)
  serie <- series
  if(fscale==1)
    fscale <- serie(p)
  if(is.na(serie(p)))
    stop("Likelihood returns NAs: probably invalid initial values")
  if(optimize) {
    z0 <- nlm(serie,p=p,hessian=T,print.level=print.level,typsiz=typsiz,ndigit=ndigit,gradtol=gradtol,stepmax=stepmax,steptol=steptol,iterlim=iterlim,fscale=fscale)
    p <- z0$estimate
    like <- z0$minimum
    a <- if(any(is.na(z0$hessian))||any(abs(z0$hessian)==Inf)) 0 else qr(z0$hessian)$rank
    if(a==np)
      cov <- solve(z0$hessian)
    else
      cov <- matrix(NA,ncol=np,nrow=np)
    se <- sqrt(diag(cov))
    corr <- cov/(se%o%se)
    dimnames(corr) <- list(1:np,1:np)
    gradient <- z0$gradient
    iterations <- z0$iterations
    code <- z0$code
  }
  z <- {
    if(rf)
      b <- mu1(p[-(1:(nc-1))])
    .C("kord",
       p=as.double(p),
       y=as.integer(y),
       t=as.double(times),
       x=as.double(resp$ccov$ccov),
       nind=as.integer(nind),
       nobs=as.integer(nobs),
       nbs=as.integer(n),
       nc=as.integer(nc),
       nccov=as.integer(nccov),
       npv=as.integer(npv),
       init=as.integer(init),
       model=as.integer(mdl),
       dep=as.integer(dep),
       torder=as.integer(torder),
       inter=as.integer(interaction),
       tvc=as.integer(tvc),
       tvcov=as.double(resp$tvcov$tvcov),
       fit=as.integer(1),
       pred=double(n),
       rpred=double(n),
       cprob=double(n*(nc-1)),
       ppred=double(n),
       cpred=double(n*(nc-1)),
       creg=double(n*(nc-1)),
       rf=as.integer(rf),
       bbb=as.double(b),
       beta=double(nc-1),
       bt=double(nc-1),
       bt2=double(nc-1),
       bet=double(nc-1),
       cum=double(nc-1),
       like=double(1),
       DUP=F)
  }
  if(!optimize) {
    like <- z$like
    se <- rep(0,np)
    cov <- corr <- NULL
    gradient <- iterations <- code <- NULL
  }
  if(rf&&tvc>0) {
    nccov <- tvc
    tvc <- 0
  }
  if(!is.null(mu3))
    mu1 <- mu3
  z <- list(call=call,
            distribution=distribution,
            mdl=mdl,
            mu=mu1,
            npr=nc-1+nccov+tvc+torder+sum(interaction),
            depend=depend,
            torder=torder,
            interaction=interaction,
            response=resp$response,
            pred=z$pred,   # Fitted probabilities
            rpred=z$rpred, # Fitted recursive means
            cprob=z$cprob, # Fitted recursive cumulative probabilities
            ppred=z$ppred, # Fitted recursive probabilities
            cpred=z$cpred, # Fitted cumulative probabilities
            ccov=resp$ccov,
            creg=z$creg,
            tvcov=resp$tvcov,
            maxlike=like,
            aic=like+np,
            df=n-np,
            npt=np,
            npv=npv,
            init=init,
            coefficients=p,
            se=se,
            cov=cov,
            corr=corr,
            optimize=optimize,
            grad=gradient,
            iterations=iterations,
            code=code)
  class(z) <- c("kalordinal","recursive","repeated")
  return(z)
}
