##http://socrates.berkeley.edu/~biostat/Courses/Spring/Ph29632/Data/index.html ##read multiple.1.data multdat <- read.table("http://www.medepi.net/selvin/multiple.1.data", header = FALSE, sep="") multdat <- data.frame(year = 1983:1991, multdat) names(multdat) <- c("year", "one+", "one") attr(multdat, "dict") <- c("year = year of birth", "one+ = one or more congenital defects (1)", "one = one congenital defect (0)") multdat attributes(multdat) ##TRADITIONAL CHI-SQUARE TEST ##compare observed vs. expected counts oi <- t(as.matrix(multdat[,2:3])) colnames(oi) <- 1983:1991 oi rtot <- apply(oi, 1, sum) ctot <- apply(oi, 2, sum) ei <- outer(rtot, ctot, "*")/sum(oi) ei chi2.T <- sum((oi - ei)^2/ei) chi2.T pchisq(q = chi2.T, df = 8, lower.tail = FALSE) ## one definition of residuals residi <- (oi - ei)/sqrt(ei) residi sum(residi^2) ##chi2.T ####ASSESSING LINEARITY AND FIT, p. 5 ##pi = a + b*(xi - 1982) ##create complete data set xi <- c(rep(1983:1991, oi["one+",]), rep(1983:1991, oi["one",])) yi <- c(rep(rep(1, ncol(oi)), oi["one+",]), rep(rep(0, ncol(oi)), oi["one",])) Sxx <- sum((xi - mean(xi))^2) Sxx Syy <- sum((yi - mean(yi))^2) Syy Sxy <- sum((xi - mean(xi))*(yi - mean(yi))) Sxy bhat <- Sxy/Sxx bhat ##calculate fitted proportions, p. 7 ahat.prime <- mean(yi) - bhat*mean(xi) ahat.prime ahat <- mean(yi) - bhat*mean(xi-1982) ahat ##calculate observed proportions phati <- sweep(oi, 2, ctot, "/")["one+",] phati years <- 1983:1991 ptildei <- ahat + bhat * (years - 1982) ptildei ni <- ctot Spi <- sqrt(phati * (1 - phati)/ni) Spi zi <- (phati - ptildei)/Spi zi chi2.fit <- sum(zi^2) chi2.fit pchisq(q = chi2.fit, df = 7, lower.tail = FALSE) ##Table 1.4. p. 7 round(cbind(phati, ptildei, pdiffi = phati-ptildei, Spi, zi),3) ####PARTITIONING CHI-SQUARE, p. 8 ##need to get chi2.L, then chi2.NL n <- length(xi) S2bhat <- Syy/(n*Sxx) S2bhat z2 <- chi2.L <- (bhat/sqrt(S2bhat))^2 z2 chi2.NL <- chi2.T - chi2.L chi2.NL ##proportion of chi2.T variation "explained" by straight line, p. 8 chat <- chi2.L/chi2.T chat ####COMPARING TWO MEAN VALUES, p. 9 tapply(xi, yi, mean) ##alternative xbar1 <- mean(xi[yi==1]) xbar1 xbar2 <- mean(xi[yi==0]) xbar2 m1 <- rtot["one+"] m2 <- rtot["one"] Sx1x2 <- sqrt((Sxx/n)*(1/m1 + 1/m2)) z <- (xbar1 - xbar2)/Sx1x2 z z^2 ## = chi2.L ####USE CORRELATION COEFFICIENT rxy <- Sxy/sqrt(Sxx*Syy) rxy n*rxy^2 ## = chi2.L ####USE LOGISTIC MODEL ##using full data xi.ctr <- xi-1982 logm <- summary(glm(yi~xi.ctr, family = binomial(link = logit))) logm A <- logm$coef[1] A B <- logm$coef[2] B ##using table of counts and weights yrs.ctr <- years-1982 logm.wt <- summary(glm(phati~yrs.ctr, weights = ctot, family = binomial(link = logit))) logm.wt A <- logm.wt$coef[1] A B <- logm.wt$coef[2] B ptildeprimei <- 1/(1 + exp(-(A + B * (years - 1982)))) ptildeprimei ni <- ctot Spi <- sqrt(phati * (1 - phati)/ni) Spi zi <- (phati - ptildeprimei)/Spi zi chi2.logistic.fit <- sum(zi^2) chi2.logistic.fit pchisq(q = chi2.logistic.fit, df = 7, lower.tail = FALSE) ##Table 1.5, p. 7 round(cbind(phati, ptildeprimei, pdiffi = phati-ptildeprimei, Spi, zi),3) ##stats covered ##glm ##pchisq ##NOT COVERED ##lm, lsfit ##cor ##chisq.test ##prop.test ##prop.trend.test