# p.val.method="max"; ret.p.val=TRUE; mc.n=5e4; interaction.method="lik.ratio"; chngpts=NULL; lb.quantile=.1; ub.quantile=.9; chngpts.cnt=50; b.=-30; verbose=FALSE 
chngpt.test = function(formula.null, formula.chngpt, data, 
# during development we consider more interaction.method
    interaction.method=c("lr", "score", "lr.norm",    "weighted.max", "weighted.one.sided", "weighted.single",    "main.itxn", "main.only", "itxn.only",    "wald"), 
    chngpts=NULL, lb.quantile=.1, ub.quantile=.9, chngpts.cnt=50, b.=-30, single.weight=1,
    p.val.method=c("max"), ret.p.val=TRUE, mc.n=5e4, 
    verbose=FALSE 
) {    
    p.val.method <- match.arg(p.val.method)
    interaction.method <- match.arg(interaction.method)
    
    DNAME = deparse(substitute(data))
    
    # keep only records that have no missing data for the null model and the chngpt model
    subset.1 = complete.cases(model.frame(formula.null, data, na.action=na.pass))
    subset.2 = complete.cases(model.frame(formula.chngpt, data, na.action=na.pass))
    data=data[subset.1 & subset.2,,drop=FALSE]
    
    y=model.frame(formula.null, data)[,1]
    Z=model.matrix(formula.null, data)
    tmp=as.matrix(model.frame(formula.chngpt, data))
    if (nrow(Z)!=nrow(tmp)) stop("number of records do not match between formula.null and formula.chngpt")
    
    n=nrow(Z)
    p.z=ncol(Z)
    if (verbose) myprint(n, p.z)
    
    chng.var = tmp[,setdiff(colnames(tmp), colnames(Z))[1]]
    z.1.name=intersect(colnames(tmp), colnames(Z))
    has.itxn = length(z.1.name)>0        
        
    z.1 = Z[,z.1.name] # if the intersection is a null set, z.1 is a matrix of n x 0 dimension
    # only standardize the covariate involved in the interaction term when interaction.method=="weighted.max"
    if (startsWith(interaction.method, "weighted")) {
        z.1=drop(scale(z.1)) # scale returns a matrix, we want a vector
        Z[,z.1.name]=z.1
        chngpts.cnt = 25 # reduced since we need to search a grid of 14 beta2 as well
    }    
        
    fit.null=keepWarnings(glm(formula.null,  data=data, family="binomial")) # if glm gives a warning, use sandwich estimator to get variance est
    if(length(fit.null$warning)!=0) {
        return (NA)
    } else {
        fit.null=fit.null$value
    }
    
    beta.h = coef(fit.null)
    mu.h = drop(expit(Z %*% beta.h))    
    D.h = diag(c(mu.h*(1-mu.h)))
    V.beta.h = solve(t(Z) %*% D.h %*% Z)
    V.eta.h = Z %*% V.beta.h %*% t(Z)
    A.h = diag(n) - D.h %*% V.eta.h
    ADA = A.h %*% D.h %*% t(A.h)
    
    # when beta.0 is not null, compute ADA using true values of beta
#    if (!is.null(beta.0)) {
#        mu = drop(expit(Z %*% beta.0))    
#        D = diag(c(mu*(1-mu)))
#        V.beta = solve(t(Z) %*% D %*% Z)
#        V.eta = Z %*% V.beta %*% t(Z)
#        A = diag(n) - D %*% V.eta
#        ADA = A %*% D %*% t(A)
#    }
    
        
    if (is.null(chngpts)) chngpts=quantile(chng.var, seq(lb.quantile,ub.quantile,length=chngpts.cnt))
    p <- M <- length(chngpts)  # p is the dimension of the multivariate statistics, it will be a multiple of M in interaction testing
    
    # W is n x M 
    W=sapply(chngpts, function (e.){
        # logistic function based, used in mtct study
        u = exp(b.*(chng.var-e.))
        u[is.nan(u)]=1 # 0 * INF gives NaN in R
        #w = (1/(1+u))/sum(1/(1+u)) # w is not 0/1 as in the paper, but based on sigmoid approxiation, when b is large, it is almost like 0/1
        w = 1/(1+u) # w is not 0/1 as in the paper, but based on sigmoid approxiation, when b is large, it is almost like 0/1
#        # simply 0/1
#        w=as.numeric(chng.var>e.) #does not need to divide by sqrt(n) to be on the same scale as likelihood ratio
    })
    
    
    # get W
    if (has.itxn) {
        if (verbose) print("has interaction term")
        if (verbose) myprint(interaction.method)
        
        if (interaction.method=="itxn.only") {
            W = W * z.1
        
        } else if (interaction.method=="main.only") {
            # nothing is needed
        
        } else if (interaction.method=="main.itxn") {
            W = cbind(W, W*z.1)
        
        } else if (interaction.method=="weighted.max") {        
            W = cbind(W, W*z.1     
                , W*(1+1/4*z.1), W*(1+1/3*z.1), W*(1+1/2*z.1), W*(1+z.1), W*(1+2*z.1), W*(1+3*z.1), W*(1+4*z.1)
                , W*(1-1/4*z.1), W*(1-1/3*z.1), W*(1-1/2*z.1), W*(1-z.1), W*(1-2*z.1), W*(1-3*z.1), W*(1-4*z.1)
            )
        
        } else if (interaction.method=="weighted.one.sided") {
            # one sided in the sense that power is enhanced when beta1 and beta2 have the same sign
            W = cbind(W, W*z.1     
                , W*(1+1/5*z.1), W*(1+1/4*z.1), W*(1+1/3*z.1), W*(1+1/2*z.1), W*(1+z.1), W*(1+2*z.1), W*(1+3*z.1), W*(1+4*z.1), W*(1+5*z.1)
            )
                
        } else if (interaction.method=="weighted.single") {        
            W = W*(1+single.weight*z.1)
        
        } else if (interaction.method %in% c("lr","lr.norm","score")) {
        
            W.tmp = matrix(0, nrow=n, ncol=2*M)
            for (m in 1:M) {
                data$x.gt.e  = as.numeric(chng.var>chngpts[m])
                formula.1=update(formula.null, as.formula("~.+"%+%z.1.name%+%"*x.gt.e"))
                X = model.matrix(formula.1, data) # make sure column order is the same as the coefficient order
                # fit semi-alternative model to compute D.h.a and I
                # if test statistics is likelihood ratio, it does not matter if we had used D.h when computing I, but the type I error rate is increased from 5.9 to 6.4
                # if test statistics is computed from W, then it has to be D.h.a
                fit.a=keepWarnings(glm(formula.1,  data=data, family="binomial"))
                if(length(fit.a$warning)!=0) return (NA) else fit.a=fit.a$value
                beta.h.a = coef(fit.a)
                mu.h.a = drop(expit(fit.a$linear.predictors))    
                D.h.a = diag(c(mu.h.a*(1-mu.h.a)))  # invariant to affine transformation of Z
                I = t(X) %*% D.h.a %*% X # fisher information estimated under semi-alternative model
                I.bb.a = I[p.z+1:2, p.z+1:2] - I[p.z+1:2, 1:p.z] %*% solve(I[1:p.z, 1:p.z], I[1:p.z, p.z+1:2]) # the order depends on formula.1, hardcoded here
                
                # choose one, in terms of speed, there is a 10% difference, not a big deal
                # use eigen decomposition
                eig = eigen(solve(I.bb.a)) 
                I.bb.a.inv.sqrt = eig$vectors %*% diag(sqrt(eig$values)) %*% t(eig$vectors) 
#                # use chol decomposition
#                I.bb.a.inv.sqrt = t(chol(solve(I.bb.a))) 
                
                W.tmp[,1:2+2*(m-1)] = cbind(W[,m], W[,m]*z.1) %*% I.bb.a.inv.sqrt                
                # debug
                if(verbose==2) {
                    print(formula.1)
                    myprint(chngpts[m])
                    myprint(diag(I))
                    cat("I.bb.a.inv.sqrt\n")
                    print(I.bb.a.inv.sqrt)
                    myprint(summary(c(W.tmp)))
                }                
            }            
            W = W.tmp
            
        } else if (interaction.method=="wald") {
            
            # note that this idea is bad b/c size is 13.7%
            W.tmp = matrix(0, nrow=n, ncol=M)
            for (m in 1:M) {            
                # fit semi-alternative model to compute beta.h.a
                data$x.gt.e  = as.numeric(chng.var>chngpts[m])
                formula.1=update(formula.null, as.formula("~.+"%+%z.1.name%+%"*x.gt.e"))
                fit.a=keepWarnings(glm(formula.1,  data=data, family="binomial"))
                if(length(fit.a$warning)!=0) return (NA) else fit.a=fit.a$value
                beta.h.a = coef(fit.a)[p.z+1:2]                
                W.tmp[,m] = cbind(W[,m], W[,m]*z.1) %*% beta.h.a                
                # debug
                if(verbose==2) {
                    print(formula.1)
                }                
            }            
            W = W.tmp
            
        } # end if interaction.method
        
        p = ncol(W)
    } # end if has.itxn
    
    V.S.hat = t(W) %*% ADA %*% W
    if (verbose==2) print(V.S.hat)
    
#    qr(V.S.hat, tol = 1e-8)$rank
#    isSymmetric(A.h)
    
    #####################################################################################
    # compute test statistics
    
    if (has.itxn & interaction.method %in% c("lr","score")) {
        # maximum of chisquared statistics
        if (interaction.method=="lr") {
            # likelihood ratio test statistics
            QQ = sapply (1:M, function(m) {
                data$x.gt.e  = as.numeric(chng.var>chngpts[m])
                # fit semi-alternative model
                fit.a=keepWarnings(glm(update(formula.null, as.formula("~.+x.gt.e*"%+%z.1.name)),  data=data, family="binomial"))
                if(length(fit.a$warning)!=0) return (NA) else fit.a=fit.a$value
                fit.null$deviance - fit.a$deviance            
            })
        } else if (interaction.method=="score") {
            # compute from W
            aux=c((t(W) %*% (y - mu.h)) / sqrt(diag(V.S.hat))) # it is key to scale to 1 for some reason that I don't completely understand
            QQ=aux[1:M*2-1]**2 + aux[1:M*2]**2
        } else stop("interaction.method not supported "%+%interaction.method)
        Q.max=max(QQ)
        
        # debug
        if(verbose==2) {
            print(diag(V.S.hat))
            myprint(QQ)      
#            plot(QQ.1, type="b")  
#            lines(QQ, type="b", col=2)  
#            plot(QQ, QQ.1)
#            abline(0,1)
        }
    } else {
        # maximum of score statistics
        TT.0=c((t(W) %*% (y - mu.h)) / sqrt(diag(V.S.hat)))
        TT = abs(TT.0)
        T.max=max(TT)
    }
        
    # debug
    if(verbose==2) {
        myprint(dim(W))
        myprint(dim(V.S.hat))
        myprint(summary(c(ADA)))
        print(c(t(W) %*% (y - mu.h)))
        print(sqrt(diag(V.S.hat)))
        #print((TT.0))
        #print(round(V.S.hat[1:10,1:10],6))
    }
        
    #####################################################################################
    # compute p value
    
    p.value=NA
    if (ret.p.val) {                
    
        # save rng state before set.seed in order to restore before exiting this function
        save.seed <- try(get(".Random.seed", .GlobalEnv), silent=TRUE) 
        if (class(save.seed)=="try-error") {        
            set.seed(1)
            save.seed <- get(".Random.seed", .GlobalEnv)
        }                        
        set.seed(1)
        
        # one can also find the quantile of max of multivariate normal by the following, but it actually takes 3 times as long
        #qmvnorm(.975, interval = c(-10, 10), tail = c("lower.tail"), mean = 0, corr = cov2cor(Sigma), sigma = NULL, maxpts = 25000, abseps = 0.001, releps = 0)        
        
        if (p.val.method=="max") {
            
            if (has.itxn & interaction.method %in% c("lr","score")) {
                # sample max of chisquared to get p value
                sam=mvrnorm (n=mc.n, mu=rep(0,p), Sigma=cov2cor(V.S.hat)) # it is key to use cor, if not size becomes 1.6% and power is reduced by half almost
                x.max = apply(sam, 1, function(aux) {
                    max(aux[1:M*2-1]**2 + aux[1:M*2]**2)
                })
                p.value = mean(x.max>Q.max)                
                # debug
                if(verbose==2) myprint(x.max)
            } else {
                sam=mvrnorm (n=mc.n, mu=rep(0,p), Sigma=cov2cor(V.S.hat))
                sam=abs(sam)            
                # there are several programming methods to get the max
                #x.max=rowMaxs(sam) #from matrixStats is slowest
                #x.max=pmax(sam[,1], sam[,2], sam[,3], sam[,4], sam[,5], sam[,6], sam[,7], sam[,8], sam[,9], sam[,10]) # faster than apply, but hard coded
                # the following is a little slower than doing pmax as above, but acceptable for n=1e5
                tmp = apply(sam, 2, function(aux) list(aux))
                tmp = do.call(c, tmp)
                x.max=do.call(pmax, tmp)            
                p.value = mean(x.max>T.max)
            }
            
        } else if (p.val.method=="chi.squared") {
        
            sqrt.inv = solve(chol(cov2cor(V.S.hat))) # t(sqrt.inv) %*% cov2cor(V.S.hat) %*% sqrt.inv = identity, i.e. t(sqrt.inv) %*% TT has identity covariance matrix
            TT.std = t(sqrt.inv) %*% TT.0
            p.value = pchisq(sum(TT.std**2), df=p, lower.tail = FALSE)                
        
        } else stop ("p.val.metthod not found")        
        
        # restore rng state 
        assign(".Random.seed", save.seed, .GlobalEnv)     
        
    } 
        
    res=list()
    res$chngpts=chngpts
    
    if (has.itxn & interaction.method %in% c("lr","score")) {
        res$TT=QQ
        res$statistic=Q.max
        names(res$statistic)="Maximum of Chi-squared statistics"
        res$method="Maximum of Chi-squared Statistics Change Point Test"
    } else  {
        res$TT=TT
        res$statistic=T.max
        names(res$statistic)="Maximum of score statistics"
        res$method="Maximum of Score Statistics Change Point Test"
    }    
    max.id=which.max(res$TT) %% chngpts.cnt
    if(max.id==0) max.id=chngpts.cnt
    res$chngpt = chngpts[max.id] 
    res$parameter=NULL
    res$conf.int=NULL
    res$estimate=NULL
    res$null.value=NULL
    res$alternative="two-sided"
    res$data.name=DNAME
    res$V.S.hat = V.S.hat
    res$p.value=p.value
    res$p.val.method=p.val.method
    res$interaction.method=interaction.method

    
    class(res)=c("chngpt.test","htest",class(res))
    res
    
}

plot.chngpt.test <- function(x, ...) {
    # when there is both main and interaction, there are two sets of statistics
    fold=length(x$TT)/length(x$chngpts)
    perc=as.numeric(strtrim(names(x$chngpts),nchar(names(x$chngpts))-1))  
    plot(rep((0:(fold-1))*100, each=length(perc))+perc, x$TT, xlab="change point (percentile)", ylab="T", type="b",  ...)
    #axis(side=1, at=rep((0:(fold-1))*100, each=length(perc))+perc, labels=rep(perc, fold))
    abline(v=(0:(fold-1))*100+as.numeric(strtrim(names(x$chngpt),nchar(names(x$chngpt))-1))  , lty=2)
    if (fold>1) abline(v=(1:(fold-1))*100)
}
