##http://socrates.berkeley.edu/~biostat/Courses/Spring/Ph29632/Data/index.html ##Chapter 8: p. 108 ##Assignment 8: ##defects.8.data dat <- read.table("http://www.medepi.net/selvin/defects.8.data", header = FALSE, sep="") head(dat) ##display first 6 lines names(dat) <- c("Status", "Distance") attr(dat,"dict") <- list(Distance = "distance in meters", Status = "0 = case, 1 = control #1, 2 = control #2" ) str(dat) ##It might be convenience to create Strata variable ##Strata = i, for i = 1, ..., 118 n <- 118 dat$Strata <- rep(1:n, rep(3, n)) ##APPROACH 1, P. 110 ##Create Table 8.1 describe <- function(x){ n <- length(x) Mean <- mean(x) Median <- median(x) Std.Dev <- sd(x) Minimum <- min(x) Maximum <- max(x) cbind(n, Mean, Median, Std.Dev, Minimum, Maximum) } temp <- tapply(dat$Distance, dat$Status, describe) tab8.1 <- rbind(temp[[1]], temp[[2]], temp[[3]]) rownames(tab8.1) <- c("cases", "controls.1", "controls.2") round(tab8.1,1) ##Create Figure 8.1 windows(width = 5.5, height = 3) par(mfrow=c(1,3)) hist(dat$Distance[dat$Status==0], breaks = 18, main="") hist(dat$Distance[dat$Status==1], breaks = 18, main="") hist(dat$Distance[dat$Status==2], breaks = 18, main="") par(mfrow=c(1,1)) ##create matrix for convenience; ##works because data frame is sorted and there are 2 controls for each case d.ik <- matrix(dat$Distance, n, 3, byrow = TRUE) colnames(d.ik) <- 0:2 d.ik[1:20,] ##display 20 values d.i <- d.ik[,"0"] - 0.5*(d.ik[,"1"] + d.ik[,"2"]) d.i ##Create Figure 8.2 windows(width = 5.5, height = 3) par(mfrow=c(1,2)) plot(density(d.ik[,"0"], adjust = .35), ylab = "frequency", main = "", axes = FALSE) lines(density(d.ik[,"1"], adjust = .35), lty = 2) lines(density(d.ik[,"2"], adjust = .35), lty = 3) box() axis(1) legend(1500, 1e-03, legend = c("cases", "controls 1", "controls 2"), lty = 1:3, cex = 0.7) plot(density(d.ik[,"0"]), ylab = "frequency", main = "", lty = 1, axes = FALSE) lines(density(d.ik[,"1"]), lty = 2) lines(density(d.ik[,"2"]), lty = 3) box() axis(1) legend(1500, 8e-04, legend = c("cases", "controls 1", "controls 2"), lty = 1:3, cex = 0.7) par(mfrow=c(1,1)) ##Create Figure 8.3 windows(width = 5.5, height = 3) par(mfrow=c(1,2)) hist(d.i, breaks=20, xlab = "difference in distances", main = "Histogram") plot(density(d.i), xlab = "difference in distances", main = "Smoothed Frequency", axes = FALSE) axis(1) box() par(mfrow=c(1,1)) ##p. 111 dbar <- sum(d.i)/n dbar ##variance, p. 113 S2d <- sum((d.i - dbar)^2)/(n-1) S2dbar <- S2d/n Sdbar <- sqrt(S2dbar) tval <- (dbar - 0)/Sdbar tval ##P(T <= Tval | no difference in distances) pt(tval, df = n-1) ##t.test, p. 114 ##Go straight to t.test in R t.test(d.i, df = n-1, alternative = "less") ##APPROACH 2, P. 114 ##Use inverse of distance to deemphasize long distances hbar0 <- n/sum(1/d.ik[,"0"]) hbar1 <- n/sum(1/d.ik[,"1"]) hbar2 <- n/sum(1/d.ik[,"2"]) hbar <- hbar0 - 0.5*(hbar1 + hbar2) hbar round(cbind(hbar0, hbar1, hbar2), 3) ##Bootstrap estimate replicates <- 5000 d.i0.boot <- matrix(sample(d.ik[,"0"], replicates*n, replace = TRUE), nrow = replicates, ncol = n) d.i1.boot <- matrix(sample(d.ik[,"1"], replicates*n, replace = TRUE), nrow = replicates, ncol = n) d.i2.boot <- matrix(sample(d.ik[,"2"], replicates*n, replace = TRUE), nrow = replicates, ncol = n) Hbar0 <- n/apply(1/d.i0.boot, 1, sum) Hbar1 <- n/apply(1/d.i1.boot, 1, sum) Hbar2 <- n/apply(1/d.i2.boot, 1, sum) Hbar <- Hbar0 - 0.5*(Hbar1 + Hbar2) meanHbar <- mean(Hbar) meanHbar medianHbar <- median(Hbar) medianHbar sdHbar <- sd(Hbar) sdHbar ##Create Figure 8.4, p. 117 windows(width = 5, height = 5) z <- hist(Hbar, breaks = 50, freq = FALSE) lines(z$mids, dnorm(z$mids, mean=meanHbar, sd=sdHbar)) ##Bootstrap pvalue ##Pr(Hbar >= 0 | no case/control difference), p. 116 pval <- sum(Hbar>=0)/length(Hbar) pval ##normal approximation ##Pr(Hbar >= 0 | no case/control difference), p. 116 zval <- (0 - meanHbar)/sdHbar pnorm(zval, lower.tail = FALSE) ##confidence interval using normal approximation ##and mean and std error from bootstrap conf.level <- 0.95 Z <- qnorm(0.5*(1+conf.level)) Hbar.ci <- meanHbar + c(-1, 1)*Z*sdHbar Hbar.ci ##APPROACH 3, P. 118 ##We have already created the 118x3 table: d.ik ##Create Figure 8.3 tt0 <- t.test(d.ik[,"0"], df=n-1) tt1 <- t.test(d.ik[,"1"], df=n-1) tt2 <- t.test(d.ik[,"2"], df=n-1) tt <- rbind(c(tt0$estimate, tt0$conf.int), c(tt1$estimate, tt1$conf.int), c(tt2$estimate, tt2$conf.int)) colnames(tt) <- c("mean", "lower","upper") ##Create Figure 8.5 windows(width = 5, height = 5) matplot(1:3, tt, type = "p", pch = c(16,3,3), col = 1, xlab = "Case/Controls", ylab = "Mean values", xlim = c(0.5, 3.5), axes = FALSE) axis(1, at = 1:3, labels = 0:2) axis(2) box() segments(x0 = 1:3, y0 = tt[,2], x1 = 1:3, y1 = tt[,3], lty = 2) text(x = c(1.25, 2.25, 3.25), y = tab8.1[,"Mean"], labels = round(tab8.1[,"Mean"],1)) ##Re-create tab8.3 analysis of variance, p. 119 ##Distance converted to kilometers dat$Kilometers <- dat$Distance/1000 fit1 <- aov(Kilometers ~ factor(Strata) + factor(Status), data = dat) tab8.3 <- summary(fit1) tab8.3 ##APPROACH 4, P. 120 ##Stratified-matched 2x2 tables dat$Distance2 <- cut(dat$Distance, c(0, 100, max(dat$Distance)), labels = c("<=100", ">100")) ##set the exposure reference level (important in modeling later) dat$Distance2 <- relevel(dat$Distance2, ref=">100") table(dat$Distance2) ##make 2x2x118 array ##Need to dichotomize outcome variable dat$Status2 <- dat$Status dat$Status2[dat$Status==0] <- "Case" dat$Status2[dat$Status>0] <- "Control" dat$Status2 <- factor(dat$Status2) tab3way <- table(Status = dat$Status2, Distance = dat$Distance2, Strata = dat$Strata) tab3way[,,1:2] ##let's reverse order of columns ##this is important for MH calculations tab3way <- tab3way[,2:1,] tab3way[,,1:2] ##looks better ai <- tab3way[1,1,] bi <- tab3way[1,2,] ci <- tab3way[2,1,] di <- tab3way[2,2,] ni <- ai+bi+ci+di ##Traditional MH odds ratio or.mh <- sum((ai*di)/ni)/sum((bi*ci)/ni) or.mh ##Create Table 8.5, p. 122 Type <- rep(NA, n) Type[ai==1 & bi==0 & ci==2 & di==0] <- "n12" Type[ai==1 & bi==0 & ci==1 & di==1] <- "n11" Type[ai==1 & bi==0 & ci==0 & di==2] <- "n10" Type[ai==0 & bi==1 & ci==2 & di==0] <- "n02" Type[ai==0 & bi==1 & ci==1 & di==1] <- "n01" Type[ai==0 & bi==1 & ci==0 & di==2] <- "n00" Type <- factor(Type, level = c("n12","n11","n10","n02","n01","n00")) tab8.5 <- table(Type) tab8.5 n12 <- tab8.5["n12"] n11 <- tab8.5["n11"] n10 <- tab8.5["n10"] n02 <- tab8.5["n02"] n01 <- tab8.5["n01"] n00 <- tab8.5["n00"] chi2.m <- ((n11+2*n10)-(n01+2*n02))^2/(2*(n11+n10+n02+n01)) chi2.m ##P value pchisq(chi2.m, df = 1, lower.tail = FALSE) var.logor.m <- 1/(2*or.mh*(((n10+n01)/(2+or.mh)^2) + ((n11+n02)/(1+2*or.mh)^2))) conf.level <- 0.95 Z <- qnorm(0.5*(1+conf.level)) lower <- or.mh*exp(-Z*sqrt(var.logor.m)) upper <- or.mh*exp(Z*sqrt(var.logor.m)) cbind(lower, upper) ##APPROACH 5, P. ##Conditional logistic regression model in 'survival' package ##Need to re-code outcome: 0=control, 1=case dat$case <- dat$Status dat$case[dat$Status==0] <- 1 dat$case[dat$Status==1] <- 0 dat$case[dat$Status==2] <- 0 library(survival) cmod <- clogit(case ~ Distance2 + strata(Strata), data = dat) summary(cmod) ##Heah!!! Done