##http://socrates.berkeley.edu/~biostat/Courses/Spring/Ph29632/Data/index.html ##Chapter 13: p. 204 ##Assignment 13: ##aids.13.data adat <- read.table("http://www.medepi.net/selvin/aids.13.data", header = FALSE, sep="") adat$age <- c(rep(0,52), rep(1, 88-52)) names(adat) <- c("status", "time", "beta", "cd4", "age") ##Create Table 1, p. 206 ##'summary' works but does not provide standard deviations summary(adat) ##Instead will use 'sapply' tab13.1 <- cbind(sapply(adat,mean), sapply(adat,median), sapply(adat,sd), sapply(adat,min), sapply(adat,max) ) colnames(tab13.1) <- c("Mean", "Median", "Std Dev", "Minimum", "Maximum") tab13.1 <- tab13.1[4:2,] round(tab13.1, 2) ##Create Figure 13.1 (two plot per page) par(mfrow = c(1, 2)) hist(adat$cd4, breaks = 15, probability = TRUE, axes = FALSE, xlab = "cd4", ylab = "", col = "lightcyan1", lwd = 2, main = "Distribution of cd4 counts") lines(density(adat$cd4)) axis(1) hist(adat$beta, breaks = 25, probability = TRUE, axes = FALSE, xlab = "beta", ylab = "", col = "darkolivegreen1", lwd = 2, main = "Distribution of beta levels") lines(density(adat$beta)) axis(1) par(mfrow = c(1, 1)) ##Create Figure 13.2 cd4bar <- mean(adat$cd4) betabar <- mean(adat$beta) plot(adat$cd4, adat$beta, type = "p", pch = 16) abline(lsfit(adat$cd4, adat$beta)$coef, lty = 2) abline(v = cd4bar, h = betabar, lty = 3) text(x = c(200, cd4bar), y = c(betabar+0.1, 0.8), labels = c("mean beta level", "mean cd4 count"), cex = 0.7) text(x = 1250, y = 1.5, labels = "least squares line") axis(1, at = cd4bar, labels = round(cd4bar, 1), cex.axis = .7, line = -1, tick = FALSE) axis(2, at = betabar, labels = round(betabar, 1), cex.axis = .7, line = -0.5, tick = FALSE, las = 2) ##Kaplan-Meier analysis, stratified by CD4 categories ##Create Tables 13.2, 13.3, 13.4, pp. 208-209 ##Create CD4 stratification variable (factor) cd4cat <- cut(adat$cd4, breaks = c(0, 399, 900, 1500), labels = c("<400", "400-900", ">900")) table(cd4cat) ##Notice that 'cd4cat' is a factor is.factor(cd4cat) cd4cat ##add cd4cat to data frame adat$cd4cat <- cd4cat ##Tables 13.2-13.4 ##Must load 'survival' package once library(survival) tab13.2to4 <- survfit(Surv(time, status)~cd4cat, data = adat) tab13.2to4 summary(tab13.2to4) ##Create Figure 13.3, p. 210 tab13.2 <- survfit(Surv(time, status)~cd4cat, data = adat, subset = cd4cat == "<400") tab13.3 <- survfit(Surv(time, status)~cd4cat, data = adat, subset = cd4cat == "400-900") tab13.4 <- survfit(Surv(time, status)~cd4cat, data = adat, subset = cd4cat == ">900") ##multiple figures on page par(mfrow = c(2, 2)) plot(tab13.2, xlab = "time (weeks)", ylab = "Survival Probability", main = "Kaplan-Meier: cd4 < 400") plot(tab13.3, xlab = "time (weeks)", ylab = "Survival Probability", main = "Kaplan-Meier: cd4 400 - 900") plot(tab13.4, xlab = "time (weeks)", ylab = "Survival probability", main = "Kaplan-Meier: cd4 > 900") plot(tab13.2to4, xlab = "time (weeks)", ylab = "Survival Probability", main = "Kaplan-Meier: by CD4 category") text(x = c(30, 60, 90), y = c(.25, .54, .67), labels = c("cd4 < 400", "cd4 400-900", "cd4 > 900"), cex = .7) par(mfrow = c(1, 1)) ##Model under constant hazard assumption, p. 212 ##Create Table 13.5 ##First, calculate mean times with CIs t.i <- tapply(adat$time, cd4cat, sum) ##all subjects t.i d.i <- tapply(adat$status, cd4cat, sum) ##uncensored d.i tbar.i <- t.i/d.i tbar.i sd.tbar.i <- tbar.i/sqrt(d.i) ##standard error sd.tbar.i conf.level <- 0.95 Z <- qnorm(0.5*(1-conf.level)) lower.tbar.i <- tbar.i - Z*sd.tbar.i lower.tbar.i upper.tbar.i <- tbar.i + Z*sd.tbar.i upper.tbar.i ##Second, calculate mean rates with CIs lam.i <- 1/tbar.i lam.i lower.lam.i <- 1/lower.tbar.i lower.lam.i upper.lam.i <- 1/upper.tbar.i upper.lam.i tab13.5 <- cbind(tbar = tbar.i, "S.D." =sd.tbar.i, tbar.L = lower.tbar.i, tbar.U = upper.tbar.i, rate = lam.i, rate.L = lower.lam.i, rate.U = upper.lam.i) round(tab13.5, 3) ##Plot Figure 13.4 ##Using constant survival probability function ##S(t) = exp(-lam*t) tim <- 0:120 ##cd4 <400 surv1 <- exp(-tab13.5["<400","rate"]*tim) surv1.L <- exp(-tab13.5["<400","rate.L"]*tim) surv1.U <- exp(-tab13.5["<400","rate.U"]*tim) ##cd4 400-900 surv2 <- exp(-tab13.5["400-900","rate"]*tim) surv2.L <- exp(-tab13.5["400-900","rate.L"]*tim) surv2.U <- exp(-tab13.5["400-900","rate.U"]*tim) ##cd4 >400 surv3 <- exp(-tab13.5[">900","rate"]*tim) surv3.L <- exp(-tab13.5[">900","rate.L"]*tim) surv3.U <- exp(-tab13.5[">900","rate.U"]*tim) ##Generate multiple figure plot on p. 213 (Figure 13.4) par(mfrow = c(2, 2)) matplot(tim, cbind(surv1, surv1.L, surv1.U), type = "l", lty = c(1, 2, 2), col = 1, xlab = "time (weeks)", ylab = "Survival probability", main = "Exponential survival:\ncd4 < 400") matplot(tim, cbind(surv2, surv2.L, surv2.U), type = "l", lty = c(1, 2, 2), col = 1, xlab = "time (weeks)", ylab = "Survival probability", main = "Exponential survival:\ncd4 400-900") matplot(tim, cbind(surv3, surv3.L, surv3.U), type = "l", lty = c(1, 2, 2), col = 1, xlab = "time (weeks)", ylab = "Survival probability", main = "Exponential survival:\ncd4 > 900") matplot(tim, cbind(surv1, surv2, surv3), type = "l", lty = 1:3, col = 1, xlab = "time (weeks)", ylab = "Survival probability", main = "Exponential survival:\nCD4 categories") text(x = c(20, 65, 100), y = c(.25, .45, .65), labels = c("cd4 <400", "cd4 400-900", "cd4 > 900"), cex = .8) par(mfrow = c(1, 1)) ##Calculate median survival ##Already done previously tab13.2to4 ##Parametric calculation of medican survival time, p. 214 ##median = (1/lam)*log(2) = tbar*log(2) tmed.i <- tbar.i*log(2) tmed.i ##Graphically evaluate whether constant hazard (exponential survival) ##assumption is valid, p. 214 ##Plot log-log plot ##log(-log(S(t))) = log(lambda) + log(t) ##First, transform data from Kaplan-Meier analysis loglogSt <- log(-log(summary(tab13.2to4)$surv)) loglogSt[loglogSt==Inf] <- NA logtime <- log(summary(tab13.2to4)$time) strata <- summary(tab13.2to4)$strata ##Second, fit straight line to transformed K-M analysis lsfit1 <- lsfit(logtime[strata=="cd4cat=<400"], loglogSt[strata=="cd4cat=<400"]) lsfit2 <- lsfit(logtime[strata=="cd4cat=400-900"], loglogSt[strata=="cd4cat=400-900"]) lsfit3 <- lsfit(logtime[strata=="cd4cat=>900"], loglogSt[strata=="cd4cat=>900"]) ##Third, create Figure 13.5 (bottom left), p. 215 plot(logtime, loglogSt, type = "n") lines(logtime[strata=="cd4cat=<400"], loglogSt[strata=="cd4cat=<400"], type="b", lty = 1, pch = 16) lines(logtime[strata=="cd4cat=400-900"], loglogSt[strata=="cd4cat=400-900"], type="b", lty = 2, pch = 1) lines(logtime[strata=="cd4cat=>900"], loglogSt[strata=="cd4cat=>900"], type="b", lty = 3, pch = 4) abline(lsfit1$coef, type="l", lty = 1) abline(lsfit2$coef, type="l", lty = 2) abline(lsfit3$coef, type="l", lty = 3) ##Create Table 13.6, p. 216 tab13.6 <- rbind(rev(lsfit1$coef), rev(lsfit2$coef), rev(lsfit3$coef)) tab13.6 tab13.6 <- cbind(tab13.6, exp(tab13.6[,2]), lam.i) colnames(tab13.6) <- c("Slope", "Intercept", "exp(Intercept)", "lamda.i") round(tab13.6, 3) ##Additive model, Table 13.8, p. 219 model0 <- coxph(Surv(time, status)~cd4 + beta + age, data = adat) summary(model0) loglik0 <- -2*model0$loglik[2] loglik0 ##Interation model, Table 13.7, p. 219 model1 <- coxph(Surv(time, status)~cd4 + beta + age + age*beta + age*cd4 + beta*cd4, data = adat) summary(model1) loglik1 <- -2*model1$loglik[2] loglik1 chi2 <- loglik0 - loglik1 chi2 pchisq(chi2, df = 3, lower.tail = FALSE) ##For chi-square test using -2*loglik from models ##use the anova function. See help for 'anova.coxph' anova(model0, model1, test = "Chisq") ##Assess model without age, pp. 220-221 model3 <- coxph(Surv(time, status)~cd4 + beta, data = adat) anova(model3, model0, test = "Chisq") ##Notice p-value is same 'age' variable in model0 ##For explanation see Selvin, p. 220 model0