##http://socrates.berkeley.edu/~biostat/Courses/Spring/Ph29632/Data/index.html ##Chapter 5: p. 67 ##Updated 2005-02-24 ##Assignment 5: ##mendel.5.data mdat <- read.table("http://www.medepi.net/selvin/mendel.5.data", header = FALSE, sep="") names(mdat) <- c("tot", "spec", "alt") ##re-create Table 5.1, p. 70 prob <- cbind(c(rep(3/4,7),rep(2/3,7)),c(rep(1-3/4,7),rep(1-2/3,7))) oi <- mdat[,2:3] ei <- cbind(mdat$tot,mdat$tot)*prob ei ##re-shape into Table 5.1 oi2 <- as.vector(t(oi)) ei2 <- as.vector(t(ei)) diff <- oi2-ei2 resid <- diff/sqrt(ei2) tab5.1 <- round(cbind(oi = oi2, ei = ei2, diff, resid),3) tab5.1 ##chi2 from Table 5.1, p. 71 chi2 <- sum(resid^2) chi2 pchisq(chi2, df = 14) ##Now, evaluate P(closeness) ##re-create Table 5.2, p. 72 ##exact binomial probabilities ni <- mdat$tot p0 <- prob[,1] x <- abs((oi-ei)[,1]) x0 <- round(ni*p0 - x) x1 <- round(ni*p0 + x) p.exact <- rep(NA, length(x)) for(i in 1:length(p.exact)){ p.exact[i] <- sum(dbinom(x0[i]:x1[i], size=ni[i], p=p0[i])) } p.exact ##normal approximations phati <- mdat$spec/mdat$tot var.phati <- p0*(1-p0)/ni zi <- (abs(phati - p0) + 0.5/ni)/sqrt(var.phati) p.approx <- pnorm(zi) - pnorm(-zi) ##tail probabilities, p. 73 p.value <- 2*pnorm(zi, lower.tail = FALSE) tab5.2 <- round(cbind(phati, p0, p.exact, p.approx, zi, p.value), 3) tab5.2 ##simulation approach, p. 74 replicates <- 2000 Pij <- matrix(NA, nrow = 14, ncol = replicates) for(i in 1:nrow(Pij)){ Pij[i,] <- rbinom(replicates, ni[i], p0[i])/ni[i] } zij <- matrix(NA, nrow = 14, ncol = replicates) for(j in 1:ncol(zij)){ zij[,j] <- (Pij[,j]-p0)/sqrt((p0*(1-p0))/ni) } ratio3to1 <- zij[1:7,] ratio2to1 <- zij[8:14,] Y2.3j <- apply(ratio3to1^2, 2, sum) mean(Y2.3j) var(Y2.3j) Y2.2j <- apply(ratio2to1^2, 2, sum) mean(Y2.2j) var(Y2.2j) ##observed chi2 from previous zi calculations chi2a <- sum(zi[1:7]^2) chi2b <- sum(zi[8:14]^2) ##P(Y2.3 <= chi2a | p0 = 3/4), p. 74 sum(Y2.3j <= chi2a)/length(Y2.3j) hist(Y2.3j, 100) ##P(Y2.2 <= chi2b | p0 = 2/3), p. 75 sum(Y2.2j <= chi2b)/length(Y2.2j) hist(Y2.2j, 50) ##joint p-value: P(Y2.3 <= chi2a AND Y2.2 <= chi2b), p. 76 sum((Y2.3j <= chi2a) & (Y2.2j <= chi2b))/length(Y2.3j) ##plot plot(Y2.3j, Y2.2j, pch=1, cex=.5, xlab = "test-statistic(Y3)", ylab = "test-statistic(Y2)") abline(v = chi2a, h = chi2b)