##http://socrates.berkeley.edu/~biostat/Courses/Spring/Ph29632/Data/index.html ##Chapter 11: p. 161 ##Assignment 11: ##PART I: STANDARDIZED MORTALITY RATIO ##brain.11.data bdat <- read.table("http://www.medepi.net/selvin/brain.11.data", header = FALSE, sep="") head(bdat) names(bdat) <- c("Age", "MaleCases", "MalePop", "FemaleCases", "FemalePop") str(bdat) ##Create Table 11.1 FemaleRate <- bdat$FemaleCases/bdat$FemalePop MaleRate <- bdat$MaleCases/bdat$MalePop RateRatio <- MaleRate/FemaleRate tab11.1 <- cbind(MaleRate = 100000*MaleRate, FemaleRate = 100000*FemaleRate, RateRatio) attr(tab11.1, "Note") <- "Rate per 100,000 per year" tab11.1 ##SMR calculation on p. 165 ##Use female rate as reference ObsMaleCases <- sum(bdat$MaleCases) ExpMaleCases <- sum(FemaleRate*bdat$MalePop) SMR <- ObsMaleCases/ExpMaleCases SMR ##Create Figure 11.2 plot(log(100000*FemaleRate), log(100000*MaleRate), type = "p", pch = 16) lsfit.coef <- lsfit(log(100000*FemaleRate), log(100000*MaleRate))$coef abline(lsfit.coef) text(1, 1, paste("Intercept =", round(lsfit.coef[1], 3)), adj = 0) text(1, .9, paste("Slope =", round(lsfit.coef[2], 3)), adj = 0) ##Common Rate Ratio, p. 168 S2logRi <- 1/bdat$MaleCases + 1/bdat$FemaleCases wi <- 1/S2logRi logRi <- log(MaleRate/FemaleRate) logRbar <- sum(wi*logRi)/sum(wi) ##variance of mean log(R), R = common rate ratio S2logRbar <- 1/sum(wi) SlogRbar <- sqrt(S2logRbar) conf.level <- 0.95 Z <- qnorm(0.5*(1+conf.level)) lower <- logRbar - Z*SlogRbar upper <- logRbar + Z*SlogRbar c(log.rate.ratio = logRbar, lower = lower, upper = upper) c(rate.ratio = exp(logRbar), lower = exp(lower), upper = exp(upper)) ##Four more methods for common rate ratio, p. 169 ##Poisson model ##Prepare data for Poisson Model agelabs <- c("20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85+") bmtx <- rbind(as.matrix(bdat[,c("MaleCases", "MalePop")]), as.matrix(bdat[,c("FemaleCases", "FemalePop")]) ) colnames(bmtx) <- c("Cases", "Population") Sex <- c(rep("Male", 14),rep("Female", 14)) Age <- rep(agelabs, 2) bdat2 <- data.frame(bmtx, Sex, Age) head(bdat2) m1 <- glm(Cases ~ Sex + Age, offset = log(Population), family = poisson, data = bdat2) summary(m1) ##Compare expected to observed cases bdat2$Fitted <- fitted(m1) bdat2[,c("Cases","Fitted")] ##Now we can create Figure 11.1 and 11.3 RateFitted <- bdat2$Fitted/bdat2$Population ##Figure 11.1 matplot(bdat$Age, 100000*cbind(MaleRate, FemaleRate, RateFitted[bdat2$Sex=="Male"], RateFitted[bdat2$Sex=="Female"]), type = c("b", "b", "l", "l"), pch = c(1, 16, NA, NA), lty = c(2, 2, 1, 1), col = 1, xlab = "Age", ylab = "Rate per 100,000 per year") ##Figure 11.3 matplot(bdat$Age, 100000*cbind(MaleRate, FemaleRate, RateFitted[bdat2$Sex=="Male"], RateFitted[bdat2$Sex=="Female"]), type = c("b", "b", "l", "l"), pch = c(1, 16, NA, NA), lty = c(2, 2, 1, 1), col = 1, log = "y", xlab = "Age", ylab = "Rate per 100,000 per year (Log scale)") ##Pearson goodness of fit on p. 171 chi2 <- sum(((bdat2$Cases-bdat2$Fitted)^2)/bdat2$Fitted) chi2 pchisq(chi2, df = 13, lower.tail = FALSE) ##Summary rate ratio for Sex, adjusting for age exp(m1$coef["SexMale"]) ##compare to SMR SMR ##PART II: ADJUSTED RATES ##perinatal.11.data pdat <- read.table("http://www.medepi.net/selvin/perinatal.11.data", header = FALSE, sep="") names(pdat) <- c("wt1", "wt2", "bdeaths", "bpop", "wdeaths", "wpop") attr(pdat, "dict") <- list(wt1 = "lower bound of birth weight interval (grams)", wt2 = "upper bound of birth weight interval (grams)", bdeaths = "number of recorded black perinatal and fetal deaths", bpop = "number of recorded black births and fetal deaths", wdeaths = "number of recorded white perinatal and fetal deaths", wpop = "number of recorded white births and fetal deaths" ) ##FIRST APPROACH: WEIGHT-SPECIFIC COMPARISONS ##Create Table 11.4, p. 176 ##1st and last rows were not included Weights <- paste(pdat$wt1,"-",pdat$wt2, sep="")[-c(1, length(pdat$wt1))] WDeaths <- pdat$wdeaths[-c(1, length(pdat$wt1))] BDeaths <- pdat$bdeaths[-c(1, length(pdat$wt1))] WBirths <- pdat$wpop[-c(1, length(pdat$wt1))] BBirths <- pdat$bpop[-c(1, length(pdat$wt1))] WRate <- WDeaths/WBirths BRate <- BDeaths/BBirths Ratio <- BRate/WRate tab11.4 <- cbind(WDeaths, WBirths, WRate = WRate*1000, BDeaths, BBirths, BRate = BRate*1000, Ratio) rownames(tab11.4) <- Weights round(tab11.4, 3) ##Create Figure 11.4, p. 177 Weight1 <- (pdat$wt1+49)[-c(1, length(pdat$wt1))] wprop <- WBirths/sum(WBirths) bprop <- BBirths/sum(BBirths) wmean <- sum(wprop*Weight1) bmean <- sum(bprop*Weight1) matplot(Weight1, cbind(bprop, wprop), type = "l", lty = 1:2, col = 1, xlab = "Birth weight (grams)", ylab = "Distribution") points(c(bmean, wmean), c(0, 0), pch = 4, lwd = 2, cex = 2) segments(x0 = c(bmean, wmean), y0 = c(0, 0), x1 = c(bmean-100, wmean+100), y1 = c(0.01, 0.01), type = "l", lty = 1) text(c(bmean-100, wmean+100), c(0.015, 0.015), c("mean\nblacks", "mean\nwhites")) abline(h = 0) legend(1000, 0.06, legend = c("black", "white"), lty = 1:2, col = 1) ##Create Figure 11.5, p. 177 windows(width = 5.5, height = 3) par(mfrow=c(1,2)) matplot(Weight1, 1000*cbind(BRate, WRate), type = "l", lty = 1:2, col = 1, xlab = "Birth weight (grams)", ylab = "Rate per 1000") legend(2500, 500, legend = c("black", "white"), lty = 1:2, col = 1, cex = .8) matplot(Weight1, 1000*cbind(BRate, WRate), type = "l", lty = 1:2, col = 1, log = "y", xlab = "Birth weight (grams)", ylab = "Rate per 1000 (log scale)") legend(2500, 500, legend = c("black", "white"), lty = 1:2, col = 1, cex = .8) par(mfrow=c(1,1)) ##SECOND APPROACH: A MODEL FREE SUMMARY ##Common Rate Ratio, p. 179 S2logRi <- 1/BDeaths + 1/WDeaths wi <- 1/S2logRi logRi <- log(BRate/WRate) logRbar <- sum(wi*logRi)/sum(wi) ##variance of mean log(R), R = common rate ratio S2logRbar <- 1/sum(wi) SlogRbar <- sqrt(S2logRbar) conf.level <- 0.95 Z <- qnorm(0.5*(1+conf.level)) lower <- logRbar - Z*SlogRbar upper <- logRbar + Z*SlogRbar c(log.rate.ratio = logRbar, lower = lower, upper = upper) c(rate.ratio = exp(logRbar), lower = exp(lower), upper = exp(upper)) ##Compare to crude rate ratio (sum(BDeaths)/sum(BBirths))/(sum(WDeaths)/sum(WBirths)) ##THIRD APPROACH: POISSON REGRESSION ##Prepare data ##Use mid-interval birth weights (i.e., Weight1) nn <- length(WBirths) pdat2 <- data.frame(Weight1 = c(Weight1, Weight1), Race = c(rep("White", nn), rep("Black", nn)), Deaths = c(WDeaths, BDeaths), Births = c(WBirths, BBirths) ) ##Set factor pdat2$Race <- factor(pdat2$Race, levels = c("White", "Black")) m2 <- glm(Deaths ~ Race + poly(Weight1, 5), offset = log(Births), data = pdat2, family = poisson) summary(m2) pdat2$Fitted <- fitted(m2) ##Pearson goodness of fit on p. 182 chi2 <- sum(((pdat2$Deaths-pdat2$Fitted)^2)/pdat2$Fitted) chi2 pchisq(chi2, df = 63, lower.tail = FALSE) ##Create Figure 11.6, p. 182 WRateFitted <- pdat2$Fitted[pdat2$Race=="White"]/pdat2$Births[pdat2$Race=="White"] BRateFitted <- pdat2$Fitted[pdat2$Race=="Black"]/pdat2$Births[pdat2$Race=="Black"] windows(width = 5.5, height = 3) par(mfrow=c(1,2)) matplot(Weight1, cbind(WRateFitted, WRate), log = "y", type = "l", lty = 1:2, col = 1, xlab = "Birth weight (grams)", ylab = "Rate per 1000 (log scale)") legend(2300, 0.500, legend = c("Expected", "Observed"), lty = 1:2, col = 1, cex = .7) matplot(Weight1, cbind(BRateFitted, BRate), log = "y", type = "l", lty = 1:2, col = 1, xlab = "Birth weight (grams)", ylab = "Rate (log scale)") legend(2300, 0.500, legend = c("Expected", "Observed"), lty = 1:2, col = 1, cex = .7) par(mfrow=c(1,1)) ##Create Figure 11.7, p. 184 windows(width = 5.5, height = 3) par(mfrow=c(1,2)) matplot(Weight1, cbind(BRateFitted, WRateFitted), log = "y", type = "l", lty = 1:2, col = 1, xlab = "Birth weight (grams)", ylab = "Rate per 1000 (log scale)") legend(2400, 0.500, legend = c("Black", "White"), lty = 1:2, col = 1, cex = .8) matplot(Weight1, cbind(BRateFitted, WRateFitted), type = "l", lty = 1:2, col = 1, xlab = "Birth weight (grams)", ylab = "Rate") legend(2400, 0.550, legend = c("Black", "White"), lty = 1:2, col = 1, cex = .8) par(mfrow=c(1,1))