Chapter 7
7.1 二分类Logistic回归
7.1.1 Logistic回归模型
rm(list = ls())
p <- seq(from = 0, to = 1, by = .01)
odds <- p/(1 - p)
plot(log(odds), p, type = "l", col = "blue", ylab = "Probability", las = 1)
abline(h = 0.5, lty = "dashed")
abline(v = 0, lty = "dashed")
7.1.2 Logistic回归实例
library(MASS)
data(birthwt)
str(birthwt)
library(epiDisplay)
tab1(birthwt$ptl)
tab1(birthwt$ftv)
library(dplyr)
birthweight <- birthwt %>%
mutate(race = factor(race, labels = c("white", "black", "other")),
smoke = factor(smoke, labels = c("no", "yes")),
ptl = ifelse(ptl > 0, "1+", ptl),
ptl = factor(ptl),
ht = factor(ht, labels = c("no", "yes")),
ui = factor(ui, labels = c("no", "yes")),
ftv = ifelse(ftv > 1, "2+", ftv),
ftv = factor(ftv)
)
str(birthweight)
glm1 <- glm(low ~ age + lwt + race + smoke + ptl + ht + ui + ftv,
family = binomial,
data = birthweight)
summary(glm1)
pchisq(39.19, df = 188 - 178, lower.tail = FALSE)
drop1(glm1)
glm2 <- step(glm1, trace = FALSE)
summary(glm2)
lnL1 <- as.numeric(logLik(glm1))
lnL2 <- as.numeric(logLik(glm2))
pchisq(2 * (lnL1 - lnL2), df = 3, lower.tail = FALSE)
anova(glm1, glm2, test = "Chisq")
AIC(glm1, glm2)
coef(glm2)
exp(coef(glm2))
confint(glm2)
exp(confint(glm2))
newdata <- data.frame(lwt = 120, race = "black",
smoke = "yes", ptl = "0",
ht = "yes", ui = "no")
logit <- predict(glm2, newdata = newdata)
logit
exp(logit)/(1 + exp(logit))
predict(glm2, newdata = newdata, type = "response")
install.packages("ResourceSelection")
library(ResourceSelection)
hoslem.test(birthweight$low, fitted(glm2))
library(epiDisplay)
logistic.display(glm2)
write.csv(logistic.display(glm2)$table, file = "model.csv")
7.1.3 表格数据的Logistic回归
dat.array <- array(c(136, 57, 107, 151, 63, 44, 63, 265),
dim = c(2, 2, 2),
dimnames = list(smoke = c("no", "yes"),
drink = c("no", "yes"),
outcome = c("control", "case")))
dat.table <- as.table(dat.array)
dat.table
dat <- as.data.frame(dat.table)
dat
mod1 <- glm(outcome ~ smoke + drink, family = binomial,
weights = Freq, data = dat)
summary(mod1)
library(tidyr)
dat.wide <- pivot_wider(dat, names_from = outcome, values_from = Freq)
dat.wide
mod2 <- glm(cbind(case, control) ~ smoke + drink,
family = binomial, data = dat.wide)
summary(mod2)
dat.original <- dat[rep(1:nrow(dat), dat$Freq), -4]
nrow(dat.original)
mod3 <- glm(outcome ~ smoke + drink, family = binomial,
data = dat.original)
summary(mod3)
7.2 条件Logistic回归
library(epiDisplay)
data("VC1to1")
str(VC1to1)
head(VC1to1)
clogit1 <- clogit(case ~ smoking + alcohol + rubber + strata(matset),
data = VC1to1)
clogit1
drop1(clogit1, test = "Chisq")
clogit2 <- clogit(case ~ alcohol + rubber + strata(matset), data = VC1to1)
drop1(clogit2, test = "Chisq")
clogit3 <- clogit(case ~ alcohol + strata(matset), data = VC1to1)
drop1(clogit3, test = "Chisq")
AIC(clogit1, clogit2, clogit3)
summary(clogit3)
clogistic.display(clogit3)
write.csv(clogistic.display(clogit3)$table, file = "clogit.csv")
7.3 无序多分类Logistic回归
library(epiDisplay)
data(Ectopic)
des(Ectopic)
str(Ectopic)
multi1 <- multinom(outc ~ hia, data = Ectopic)
summary(multi1)
st <- summary(multi1)$standard.errors
z <- coef(multi1) / st; z
p.values <- pnorm(abs(z), lower.tail = FALSE) * 2
p.values
confint(multi1)
exp(coef(multi1))
exp(confint(multi1))
mlogit.display(multi1)
multi2 <- multinom(outc ~ hia + gravi, data = Ectopic)
mlogit.display(multi2)
lrtest(multi1, multi2)
AIC(multi1, multi2)
7.4 有序Logistic回归
dat <- array(c(10, 7, 19, 6, 0, 2, 7, 5, 1, 5, 6, 16),
dim = c(2, 2, 3),
dimnames = list(method = c("old", "new"),
sex = c("male", "female"),
outcome = c("effectless", "effective", "recover")))
dat <- as.table(dat)
data1 <- as.data.frame(dat)
data1
str(data1)
data1$outcome <- ordered(data1$outcome)
data1$outcome
library(MASS)
polr1 <- polr(outcome ~ sex + method, weights = Freq, data = data1)
summary(polr1)
data2 <- data1[rep(1:nrow(data1), data1$Freq), -4]
head(data2)
str(data2)
polr2 <- polr(outcome ~ sex + method, data = data2)
summary(polr2)
exp(coef(polr1))
exp(confint(polr1))
install.packages("VGAM")
library(VGAM)
polr.p <- vglm(outcome ~ sex + method,
family = cumulative(parallel = TRUE),
data = data2)
polr.np <- vglm(outcome ~ sex + method,
family = cumulative(parallel = FALSE),
data = data2)
VGAM::lrtest(polr.p, polr.np)
library(epiDisplay)
ordinal.or.display(polr1)
write.csv(ordinal.or.display(polr1), file = "polr.csv")
|