##http://socrates.berkeley.edu/~biostat/Courses/Spring/Ph29632/Data/index.html ##Chapter3: p. 39 ##Randomized Trial ##alzheimer.3.data adat <- read.table("http://www.medepi.net/selvin/alzheimer.3.data", header = FALSE, sep="") names(adat) <- c("group", "score1", "score2") attr(adat, "dict") <- c("group = treatment indicator (1=treatment, 0=placebo)", "score1 = test score at baseline", "score2 = test score after six months") ##camparison of mean baseline scores, p. 41 xbar <- tapply(adat$score1, adat$group, mean) xbar ##alternative way to get mean x.c <- adat$score1[adat$group==0] x.t <- adat$score1[adat$group==1] xbar.c <- mean(x.c) xbar.c xbar.t <- mean(x.t) xbar.t ##standard error of xbar.c and xbar.t ##from Selvin 2004, p. 214 ##se.xbar = sqrt(var(xbar)) = sqrt(var(x)/n) n.c <- length(x.c) n.t <- length(x.t) se.xbar.c <- sqrt(var(x.c)/n.c) se.xbar.c se.xbar.t <- sqrt(var(x.t)/n.t) se.xbar.t ##mean difference xbar.diff <- xbar.c - xbar.t xbar.diff ##standard error of xbar.diff ##from Selvin 2004, p. 217 ##se.xbar.diff = sqrt(var(xbar.diff)) = sqrt(S2.pooled*(1/n1 + 1/n2)) ##where S2.pooled = ((n1-1)*var(x1)+(n2-1)*var(x2))/((n1-1)+(n2-1)) S2.pooled <- ((n.c-1)*var(x.c)+(n.t-1)*var(x.t))/((n.c-1)+(n.t-1)) se.xbar.diff <- sqrt(S2.pooled*(1/n.c + 1/n.t)) se.xbar.diff ##95% CI, p. 41 conf.level <- 0.95 T.ci <- qt(0.5*(1 + conf.level), df = n.c + n.t - 2) T.ci xbar.diff.L <- xbar.diff - T.ci*se.xbar.diff xbar.diff.U <- xbar.diff + T.ci*se.xbar.diff c(lower = xbar.diff.L, upper = xbar.diff.U) ##P(|T| >= Tval | no difference at baseline) = two-sided, p.41 Tval <- (xbar.diff - 0)/se.xbar.diff 2*pt(abs(Tval), df = n.c + n.t -2, lower.tail = FALSE) ##Of course, above is just two-sample t-test ##In R, see 't.test' function t.test(x.c, x.t) ##Wilcoxon Rank Sum Test, p. 41 wilcox.test(x.c, x.t, exact = FALSE) ##To see other statistical tests: apropos("test") ##Now, assess mean scores AFTER 6 months of treatment, p. 42 y.c <- adat$score2[adat$group==0] y.t <- adat$score2[adat$group==1] ##We'll skip to two-sample t-test using 't.test' function t.test(y.c, y.t) ##Now, a statistically more efficient approach is to take ##the difference of before and after values for each subject., p. 43 d.c <- x.c - y.c d.t <- x.t - y.t dd <- t.test(d.c, d.t) ##one-sided p-value at bottom of p. 44 pt(dd$statistic, df = n.c + n.t - 2, lower.tail = FALSE) ##'t-test' can also give one-sided p value t.test(d.c, d.t, alternative = "greater") ##compare to Wilcoxon Rank Sum Test, p. 45 ##set to give one-sided p value ##note that function argument are abbreviated wilcox.test(d.c, d.t, alt = "g", exact = FALSE) ##COMPUTER INTENSIVE APPROACHES ##Bootstrap, p. 45 n <- 6000 ##replicates boot.c <- sample(d.c, size = length(d.c)*n, replace = TRUE) boot.c <- matrix(boot.c, replicates, length(d.c)) Dbar.c <- apply(boot.c, 1, mean) boot.t <- sample(d.t, size = length(d.t)*n, replace = TRUE) boot.t <- matrix(boot.t, replicates, length(d.t)) Dbar.t <- apply(boot.t, 1, mean) Dbar.diff <- Dbar.c - Dbar.t mean(Dbar.diff) ##Calculate P(Dbar.diff <= 0 | mean(Dbar.diff)) pval <- sum(Dbar.diff<=0)/length(Dbar.diff) pval ##Randomization test, p. 46 n <- 6000 ##replicates d.all <- c(d.c, d.t) rand <- matrix(NA, n, length(d.all)) for(i in 1:nrow(rand)){ rand[i,] <- sample(d.all, size = length(d.all)) } rand.c <- rand[,1:length(d.c)] rand.t <- rand[,-c(1:length(d.c))] Dbar.cprime <- apply(rand.c, 1, mean) Dbar.tprime <- apply(rand.t, 1, mean) Dbar.diff.prime <- Dbar.cprime - Dbar.tprime mean(Dbar.diff.prime) ##Now, what is the P(Dbar.diff.prime >= dbar.diff| no difference)? ##where dbar.diff = mean(d.c)-mean(d.t) ##p. 47 dbar.diff <- mean(d.c)-mean(d.t) dbar.diff pval <- sum(Dbar.diff.prime >= dbar.diff)/length(Dbar.diff.prime) pval