library(DAAG)
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.3.2
data(frogs)
plot(northing ~ easting, data=frogs, pch=c(1,16)[frogs$pres.abs+1],
xlab="Meters east of reference point", ylab="Meters north")
library(DAAG)
data(spam7)
head(spam7)
## crl.tot dollar bang money n000 make yesno
## 1 278 0.000 0.778 0.00 0.00 0.00 y
## 2 1028 0.180 0.372 0.43 0.43 0.21 y
## 3 2259 0.184 0.276 0.06 1.16 0.06 y
## 4 191 0.000 0.137 0.00 0.00 0.00 y
## 5 191 0.000 0.135 0.00 0.00 0.00 y
## 6 54 0.000 0.000 0.00 0.00 0.00 y
spam = spam7
m1 = glm(pres.abs ~ altitude + distance + NoOfPools + NoOfSites + avrain + meanmin + meanmax, family = binomial, data = frogs)
summary(m1)
##
## Call:
## glm(formula = pres.abs ~ altitude + distance + NoOfPools + NoOfSites +
## avrain + meanmin + meanmax, family = binomial, data = frogs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7215 -0.7590 -0.2237 0.8320 2.6789
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.105e+02 1.388e+02 0.796 0.42587
## altitude -3.086e-02 4.076e-02 -0.757 0.44901
## distance -4.800e-04 2.055e-04 -2.336 0.01949 *
## NoOfPools 2.986e-02 9.276e-03 3.219 0.00129 **
## NoOfSites 4.364e-02 1.061e-01 0.411 0.68077
## avrain -1.140e-02 5.995e-02 -0.190 0.84920
## meanmin 4.899e+00 1.564e+00 3.133 0.00173 **
## meanmax -5.660e+00 5.049e+00 -1.121 0.26224
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 279.99 on 211 degrees of freedom
## Residual deviance: 198.74 on 204 degrees of freedom
## AIC: 214.74
##
## Number of Fisher Scoring iterations: 6
x0 = c(1, 1700, 400, 30, 8, 150, 4, 16)
sum(x0 * m1$coefficients)
## [1] -13.58643
exp(sum(x0 * m1$coefficients))
## [1] 1.257443e-06
exp(sum(x0 * m1$coefficients))/(1 + exp(sum(x0 * m1$coefficients)))
## [1] 1.257441e-06
m1$fitted.values
## 2 3 4 5 6
## 9.994421e-01 9.391188e-01 8.683363e-01 7.443973e-01 9.427198e-01
## 7 8 9 10 11
## 7.107780e-01 7.940785e-01 7.134759e-01 6.791648e-01 7.487912e-01
## 12 13 14 15 16
## 7.031597e-01 7.081588e-01 7.051631e-01 7.895711e-01 7.724424e-01
## 17 18 19 20 21
## 5.410053e-01 4.002440e-01 5.520545e-01 6.308968e-01 6.015028e-01
## 22 23 24 25 26
## 5.282532e-01 4.096861e-01 5.839915e-01 7.693920e-01 5.815080e-01
## 27 28 29 30 31
## 4.784519e-01 6.059715e-01 7.600451e-01 7.538528e-01 7.706771e-01
## 32 33 34 35 36
## 6.434393e-01 6.079763e-01 3.708934e-01 6.463018e-01 5.586491e-01
## 37 38 39 40 41
## 5.183736e-01 8.117926e-01 6.602267e-01 5.538921e-01 5.806614e-01
## 42 43 44 45 46
## 7.341727e-01 6.316477e-01 9.355365e-01 7.829397e-01 5.807378e-01
## 47 48 49 50 51
## 6.410415e-01 7.011278e-01 6.990184e-01 7.945711e-01 5.983744e-01
## 52 53 54 55 56
## 5.464413e-01 6.694340e-01 9.160177e-01 6.497045e-01 7.793881e-01
## 57 58 59 60 61
## 5.989858e-01 8.742394e-01 4.269678e-01 5.230343e-01 4.696082e-01
## 62 63 64 65 66
## 9.058378e-01 8.310582e-01 2.065644e-01 6.630455e-01 2.304770e-01
## 67 68 69 70 71
## 2.055509e-01 4.103215e-01 3.107167e-01 2.173230e-01 6.849344e-02
## 72 73 74 75 76
## 4.689979e-01 1.964217e-01 3.039997e-01 3.140598e-02 2.960040e-02
## 77 78 79 80 81
## 2.764354e-02 9.572332e-02 2.708695e-02 3.894189e-01 4.384454e-01
## 82 83 84 85 86
## 1.801431e-02 2.958477e-02 2.245579e-02 1.716036e-02 3.869230e-01
## 87 88 89 90 91
## 1.422145e-01 8.723242e-02 1.258070e-01 2.402273e-01 4.425048e-02
## 92 93 94 95 96
## 1.245244e-01 6.044002e-02 8.657481e-02 3.093076e-01 7.608919e-01
## 97 98 99 100 101
## 7.581414e-02 2.519808e-01 2.299453e-01 1.333157e-01 3.224717e-01
## 102 103 104 105 106
## 1.382807e-01 6.569506e-01 2.173365e-02 1.894038e-02 1.502382e-02
## 107 108 109 110 111
## 1.980114e-02 1.768114e-02 1.294026e-02 1.940694e-01 9.026678e-02
## 112 113 114 115 116
## 2.401582e-01 1.801129e-01 1.950594e-01 1.012234e-01 1.100011e-01
## 117 118 119 120 121
## 7.922404e-02 4.383750e-01 1.381195e-01 1.202370e-03 7.190976e-03
## 122 123 124 125 126
## 5.285983e-03 1.501354e-02 6.798822e-03 5.557685e-01 3.414967e-01
## 127 128 129 130 131
## 3.538713e-01 1.215839e-02 3.587468e-03 4.820373e-01 6.801676e-01
## 132 133 134 135 136
## 4.905071e-01 5.450916e-01 6.139790e-01 5.265536e-01 5.113742e-01
## 137 138 139 140 141
## 1.487969e-03 1.537802e-03 9.498472e-02 1.584109e-01 5.019242e-01
## 142 143 144 145 146
## 2.217258e-01 5.499423e-01 8.072697e-02 2.867668e-01 5.892181e-01
## 147 148 149 150 151
## 4.793328e-01 3.172925e-01 2.456935e-01 3.648957e-01 4.426478e-01
## 152 153 154 155 156
## 3.659800e-01 2.737078e-02 1.261571e-03 8.908085e-04 8.917574e-04
## 157 158 159 160 161
## 1.564091e-03 4.022912e-01 3.554192e-01 3.771886e-01 6.108751e-01
## 162 163 164 165 166
## 1.135652e-01 8.305509e-02 3.248273e-01 2.027767e-01 1.320571e-01
## 167 168 169 170 171
## 2.998112e-01 4.705620e-01 4.664971e-01 1.893874e-01 3.202811e-01
## 172 173 174 175 176
## 2.350189e-01 2.264083e-01 1.720698e-01 3.624017e-01 2.497301e-01
## 177 178 179 180 181
## 7.719488e-01 7.008708e-01 7.727554e-01 1.249298e-01 7.968601e-04
## 182 183 184 185 186
## 9.646891e-05 6.891946e-01 3.317216e-01 6.494793e-01 6.489840e-01
## 187 188 189 190 191
## 6.574312e-01 5.476705e-01 4.789413e-02 7.693637e-02 6.853117e-02
## 192 193 194 195 196
## 1.346704e-01 4.074532e-01 3.981571e-02 7.631064e-02 1.221353e-01
## 197 198 199 200 201
## 6.273025e-01 5.747749e-01 4.664689e-01 2.265998e-01 1.467408e-01
## 202 203 204 205 206
## 1.589058e-01 4.219191e-01 3.164506e-01 2.998835e-01 1.540243e-01
## 207 208 209 210 211
## 6.556601e-02 9.745198e-02 2.134439e-01 8.078311e-04 9.752315e-04
## 212 213
## 2.536105e-03 3.889125e-01
i = 45
rrg = c(1, frogs$altitude[i], frogs$distance[i], frogs$NoOfPools[i], frogs$NoOfSites[i], frogs$avrain[i], frogs$meanmin[i], frogs$meanmax[i])
#or use model.matrix(m1)[i,]
eta = sum(rrg*m1$coefficients)
prr = exp(eta)/(1 + exp(eta))
#This agrees with m1$fitted.values[i]
c(prr, m1$fitted.values[i])
## 46
## 0.5807378 0.5807378
plot(frogs$pres.abs, m1$fitted.values)
Checking the unusual points:
sel <- frogs$pres.abs==0 & m1$fitted >.7
plot(northing ~ easting, data=frogs, pch=c(1,16)[frogs$pres.abs+1], col=sel+1, xlab="Meters east of reference point", ylab="Meters north")
sel <- frogs$pres.abs==1 & m1$fitted <.2
plot(northing ~ easting, data=frogs, pch=c(1,16)[frogs$pres.abs+1], col=sel+1, xlab="Meters east of reference point", ylab="Meters north")
thr = 0.5
y = frogs$pres.abs
yhat = m1$fitted.values
a <- sum((!y) & (yhat<=thr))
b <- sum((!y) & (yhat>thr))
c <- sum((y) & (yhat<=thr))
d <- sum((y) & (yhat>thr))
c(a, b, c, d)
## [1] 112 21 21 58
thr = 0.3
y = frogs$pres.abs
yhat = m1$fitted.values
a <- sum((!y) & (yhat<=thr))
b <- sum((!y) & (yhat>thr))
c <- sum((y) & (yhat<=thr))
d <- sum((y) & (yhat>thr))
c(a, b, c, d)
## [1] 84 49 10 69
conf <- matrix(0,nrow=21,ncol=5)
colnames(conf) <- c("thr","a","b","c","d")
conf[,1] <- seq(0,1,by=.05)
y <- frogs$pres.abs
yhat <- m1$fitted.values
for ( i in 1:21)
{
a <- sum((!y) & (yhat<=conf[i,1]))
b <- sum((!y) & (yhat>conf[i,1]))
c <- sum((y) & (yhat<=conf[i,1]))
d <- sum((y) & (yhat>conf[i,1]))
conf[i,2:5] <- c(a,b,c,d)
}
conf
## thr a b c d
## [1,] 0.00 0 133 0 79
## [2,] 0.05 33 100 3 76
## [3,] 0.10 47 86 5 74
## [4,] 0.15 61 72 5 74
## [5,] 0.20 69 64 6 73
## [6,] 0.25 80 53 10 69
## [7,] 0.30 84 49 10 69
## [8,] 0.35 91 42 13 66
## [9,] 0.40 100 33 14 65
## [10,] 0.45 106 27 18 61
## [11,] 0.50 112 21 21 58
## [12,] 0.55 118 15 26 53
## [13,] 0.60 121 12 35 44
## [14,] 0.65 126 7 44 35
## [15,] 0.70 129 4 50 29
## [16,] 0.75 130 3 59 20
## [17,] 0.80 133 0 69 10
## [18,] 0.85 133 0 71 8
## [19,] 0.90 133 0 73 6
## [20,] 0.95 133 0 78 1
## [21,] 1.00 133 0 79 0
plot(conf[,1],conf[,3]+conf[,4], xlab="threshold", ylab="b+c")
library(DAAG)
data(spam7)
head(spam7)
## crl.tot dollar bang money n000 make yesno
## 1 278 0.000 0.778 0.00 0.00 0.00 y
## 2 1028 0.180 0.372 0.43 0.43 0.21 y
## 3 2259 0.184 0.276 0.06 1.16 0.06 y
## 4 191 0.000 0.137 0.00 0.00 0.00 y
## 5 191 0.000 0.135 0.00 0.00 0.00 y
## 6 54 0.000 0.000 0.00 0.00 0.00 y
summary(spam)
## crl.tot dollar bang money
## Min. : 1.0 Min. :0.00000 Min. : 0.0000 Min. : 0.00000
## 1st Qu.: 35.0 1st Qu.:0.00000 1st Qu.: 0.0000 1st Qu.: 0.00000
## Median : 95.0 Median :0.00000 Median : 0.0000 Median : 0.00000
## Mean : 283.3 Mean :0.07581 Mean : 0.2691 Mean : 0.09427
## 3rd Qu.: 266.0 3rd Qu.:0.05200 3rd Qu.: 0.3150 3rd Qu.: 0.00000
## Max. :15841.0 Max. :6.00300 Max. :32.4780 Max. :12.50000
## n000 make yesno
## Min. :0.0000 Min. :0.0000 n:2788
## 1st Qu.:0.0000 1st Qu.:0.0000 y:1813
## Median :0.0000 Median :0.0000
## Mean :0.1016 Mean :0.1046
## 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :5.4500 Max. :4.5400
par(mfrow=c(3,2))
for (i in 1:5)
hist(spam[,i],main="",xlab=names(spam)[i],breaks=10000)
par(mfrow=c(1,1))
pairs(spam, cex=0.5)
s = 0.001 #correction for zero as log(0) is infinity.
pairs(~log(crl.tot)+log(dollar+s) +log(bang+s)+log(money+s)+log(n000+s)+log(make+s) + yesno, data=spam, cex=.5)
spam.glm <- glm(yesno~ log(crl.tot) + log(dollar+s) + log(bang+s)
+log(money+s) + log(n000+s) + log(make+s),
family=binomial, data=spam)
summary(spam.glm)
##
## Call:
## glm(formula = yesno ~ log(crl.tot) + log(dollar + s) + log(bang +
## s) + log(money + s) + log(n000 + s) + log(make + s), family = binomial,
## data = spam)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1657 -0.4367 -0.2863 0.3609 2.7152
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.11947 0.36342 11.335 < 2e-16 ***
## log(crl.tot) 0.30228 0.03693 8.185 2.71e-16 ***
## log(dollar + s) 0.32586 0.02365 13.777 < 2e-16 ***
## log(bang + s) 0.40984 0.01597 25.661 < 2e-16 ***
## log(money + s) 0.34563 0.02800 12.345 < 2e-16 ***
## log(n000 + s) 0.18947 0.02931 6.463 1.02e-10 ***
## log(make + s) -0.11418 0.02206 -5.177 2.25e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6170.2 on 4600 degrees of freedom
## Residual deviance: 3245.1 on 4594 degrees of freedom
## AIC: 3259.1
##
## Number of Fisher Scoring iterations: 6
spam.lm <- lm(as.numeric(yesno=="y")~ log(crl.tot) + log(dollar+s)
+ log(bang+s) +log(money+s) + log(n000+s) + log(make+s)
,data=spam)
summary(spam.lm)
##
## Call:
## lm(formula = as.numeric(yesno == "y") ~ log(crl.tot) + log(dollar +
## s) + log(bang + s) + log(money + s) + log(n000 + s) + log(make +
## s), data = spam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.10937 -0.13830 -0.05674 0.15262 1.05619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.078531 0.034188 31.547 < 2e-16 ***
## log(crl.tot) 0.028611 0.003978 7.193 7.38e-13 ***
## log(dollar + s) 0.054878 0.002934 18.703 < 2e-16 ***
## log(bang + s) 0.064522 0.001919 33.619 < 2e-16 ***
## log(money + s) 0.039776 0.002751 14.457 < 2e-16 ***
## log(n000 + s) 0.018530 0.002815 6.582 5.16e-11 ***
## log(make + s) -0.017380 0.002370 -7.335 2.61e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3391 on 4594 degrees of freedom
## Multiple R-squared: 0.5193, Adjusted R-squared: 0.5186
## F-statistic: 827.1 on 6 and 4594 DF, p-value: < 2.2e-16
plot(spam.lm$fitted.values, spam.glm$fitted.values)
abline(c(0, 1), col = "red")
confusion <- function (y, yhat, thres)
{
n <- length(thres)
conf <- matrix(0,length(thres),ncol=4)
colnames(conf) <- c("a","b","c","d")
for ( i in 1:n)
{
a <- sum((!y) & (yhat<=thres[i]))
b <- sum((!y) & (yhat>thres[i]))
c <- sum((y) & (yhat<=thres[i]))
d <- sum((y) & (yhat>thres[i]))
conf[i,] <- c(a,b,c,d)
}
return(conf)
}
v <- seq(0.05,0.95,by=0.05)
y <- as.numeric(spam$yesno=="y")
glm.conf <- confusion(y,spam.glm$fitted,v)
glm.conf
## a b c d
## [1,] 769 2019 32 1781
## [2,] 1709 1079 151 1662
## [3,] 1825 963 164 1649
## [4,] 1896 892 174 1639
## [5,] 1996 792 189 1624
## [6,] 2079 709 210 1603
## [7,] 2231 557 247 1566
## [8,] 2382 406 295 1518
## [9,] 2536 252 351 1462
## [10,] 2607 181 406 1407
## [11,] 2642 146 478 1335
## [12,] 2671 117 546 1267
## [13,] 2685 103 623 1190
## [14,] 2700 88 667 1146
## [15,] 2714 74 704 1109
## [16,] 2735 53 771 1042
## [17,] 2747 41 901 912
## [18,] 2767 21 1027 786
## [19,] 2775 13 1235 578
lm.conf <- confusion(y,spam.lm$fitted,v)
lm.conf
## a b c d
## [1,] 683 2105 22 1791
## [2,] 1400 1388 97 1716
## [3,] 1798 990 163 1650
## [4,] 1833 955 166 1647
## [5,] 1876 912 171 1642
## [6,] 1972 816 178 1635
## [7,] 2058 730 195 1618
## [8,] 2235 553 257 1556
## [9,] 2450 338 330 1483
## [10,] 2633 155 441 1372
## [11,] 2676 112 565 1248
## [12,] 2697 91 673 1140
## [13,] 2722 66 752 1061
## [14,] 2738 50 844 969
## [15,] 2755 33 928 885
## [16,] 2767 21 1057 756
## [17,] 2774 14 1158 655
## [18,] 2780 8 1249 564
## [19,] 2782 6 1361 452
matplot(v,cbind((glm.conf[,"b"]+glm.conf[,"c"])/4601, (lm.conf[,"b"]+lm.conf[,"c"])/4601), xlab="threshold", ylab="b+c",type="l")
legend(.8, .4, lty=1:2, col=1:2,c("glm","lm"))
spam.lm1 = lm(as.numeric(yesno=="y")~ crl.tot + dollar
+ bang + money + n000 + make, data=spam)
summary(spam.lm1)
##
## Call:
## lm(formula = as.numeric(yesno == "y") ~ crl.tot + dollar + bang +
## money + n000 + make, data = spam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8650 -0.2758 -0.2519 0.4459 0.7499
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.498e-01 7.488e-03 33.365 <2e-16 ***
## crl.tot 1.241e-04 1.058e-05 11.734 <2e-16 ***
## dollar 3.481e-01 2.733e-02 12.740 <2e-16 ***
## bang 1.113e-01 7.725e-03 14.407 <2e-16 ***
## money 1.765e-01 1.440e-02 12.262 <2e-16 ***
## n000 3.218e-01 1.891e-02 17.014 <2e-16 ***
## make 3.212e-02 2.101e-02 1.529 0.126
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4223 on 4594 degrees of freedom
## Multiple R-squared: 0.2543, Adjusted R-squared: 0.2533
## F-statistic: 261.1 on 6 and 4594 DF, p-value: < 2.2e-16
summary(spam.lm)
##
## Call:
## lm(formula = as.numeric(yesno == "y") ~ log(crl.tot) + log(dollar +
## s) + log(bang + s) + log(money + s) + log(n000 + s) + log(make +
## s), data = spam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.10937 -0.13830 -0.05674 0.15262 1.05619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.078531 0.034188 31.547 < 2e-16 ***
## log(crl.tot) 0.028611 0.003978 7.193 7.38e-13 ***
## log(dollar + s) 0.054878 0.002934 18.703 < 2e-16 ***
## log(bang + s) 0.064522 0.001919 33.619 < 2e-16 ***
## log(money + s) 0.039776 0.002751 14.457 < 2e-16 ***
## log(n000 + s) 0.018530 0.002815 6.582 5.16e-11 ***
## log(make + s) -0.017380 0.002370 -7.335 2.61e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3391 on 4594 degrees of freedom
## Multiple R-squared: 0.5193, Adjusted R-squared: 0.5186
## F-statistic: 827.1 on 6 and 4594 DF, p-value: < 2.2e-16
Precision and recall for the GLM:
cbind(glm.conf[,"d"]/(glm.conf[,"b"]+glm.conf[,"d"]),glm.conf[,"d"]/(glm.conf[,"c"]+glm.conf[,"d"]) )
## [,1] [,2]
## [1,] 0.4686842 0.9823497
## [2,] 0.6063480 0.9167126
## [3,] 0.6313170 0.9095422
## [4,] 0.6475701 0.9040265
## [5,] 0.6721854 0.8957529
## [6,] 0.6933391 0.8841699
## [7,] 0.7376354 0.8637617
## [8,] 0.7889813 0.8372863
## [9,] 0.8529755 0.8063982
## [10,] 0.8860202 0.7760618
## [11,] 0.9014180 0.7363486
## [12,] 0.9154624 0.6988417
## [13,] 0.9203403 0.6563707
## [14,] 0.9286872 0.6321015
## [15,] 0.9374472 0.6116933
## [16,] 0.9515982 0.5747380
## [17,] 0.9569780 0.5030336
## [18,] 0.9739777 0.4335356
## [19,] 0.9780034 0.3188086
plot(glm.conf[,"d"]/(glm.conf[,"b"]+glm.conf[,"d"]),glm.conf[,"d"]/(glm.conf[,"c"]+glm.conf[,"d"]) )
matplot(x = cbind(glm.conf[,"d"]/(glm.conf[,"b"]+glm.conf[,"d"]),
lm.conf[,"d"]/(lm.conf[,"b"]+lm.conf[,"d"])),
y = cbind(glm.conf[,"d"]/(glm.conf[,"c"]+glm.conf[,"d"]),
lm.conf[,"d"]/(lm.conf[,"c"]+lm.conf[,"d"])),
xlab="Precision", ylab="Recall",type="l")
legend(.8, .4, lty=1:2, col=1:2,c("glm","lm"))
library(DAAG)
data(frogs)
m1 = glm(pres.abs ~ altitude + distance + NoOfPools + NoOfSites + avrain + meanmin + meanmax, family = binomial, data = frogs)
summary(m1)
##
## Call:
## glm(formula = pres.abs ~ altitude + distance + NoOfPools + NoOfSites +
## avrain + meanmin + meanmax, family = binomial, data = frogs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7215 -0.7590 -0.2237 0.8320 2.6789
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.105e+02 1.388e+02 0.796 0.42587
## altitude -3.086e-02 4.076e-02 -0.757 0.44901
## distance -4.800e-04 2.055e-04 -2.336 0.01949 *
## NoOfPools 2.986e-02 9.276e-03 3.219 0.00129 **
## NoOfSites 4.364e-02 1.061e-01 0.411 0.68077
## avrain -1.140e-02 5.995e-02 -0.190 0.84920
## meanmin 4.899e+00 1.564e+00 3.133 0.00173 **
## meanmax -5.660e+00 5.049e+00 -1.121 0.26224
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 279.99 on 211 degrees of freedom
## Residual deviance: 198.74 on 204 degrees of freedom
## AIC: 214.74
##
## Number of Fisher Scoring iterations: 6
deviance(m1)
## [1] 198.7384
rd = sum(frogs$pres.abs * (-2) * log(m1$fitted.values)) + sum((1 - frogs$pres.abs)*(-2) * log(1 - m1$fitted.values))
rd
## [1] 198.7384
m2 = glm(pres.abs ~ altitude + distance + NoOfSites + avrain + meanmin + meanmax, family = binomial, data = frogs)
summary(m2)
##
## Call:
## glm(formula = pres.abs ~ altitude + distance + NoOfSites + avrain +
## meanmin + meanmax, family = binomial, data = frogs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7814 -0.7902 -0.2938 0.9300 3.3822
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.496e+01 1.376e+02 0.254 0.79938
## altitude -8.375e-03 4.034e-02 -0.208 0.83552
## distance -5.350e-04 2.066e-04 -2.589 0.00962 **
## NoOfSites 5.748e-02 1.025e-01 0.561 0.57481
## avrain 1.506e-02 5.993e-02 0.251 0.80154
## meanmin 4.358e+00 1.491e+00 2.922 0.00347 **
## meanmax -2.782e+00 4.986e+00 -0.558 0.57680
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 279.99 on 211 degrees of freedom
## Residual deviance: 210.84 on 205 degrees of freedom
## AIC: 224.84
##
## Number of Fisher Scoring iterations: 6
AIC
m1$aic
## [1] 214.7384
m1$deviance + 2*(7+1)
## [1] 214.7384
step(m1)
## Start: AIC=214.74
## pres.abs ~ altitude + distance + NoOfPools + NoOfSites + avrain +
## meanmin + meanmax
##
## Df Deviance AIC
## - avrain 1 198.78 212.78
## - NoOfSites 1 198.91 212.91
## - altitude 1 199.30 213.30
## - meanmax 1 199.97 213.97
## <none> 198.74 214.74
## - distance 1 206.39 220.39
## - meanmin 1 209.60 223.60
## - NoOfPools 1 210.84 224.84
##
## Step: AIC=212.77
## pres.abs ~ altitude + distance + NoOfPools + NoOfSites + meanmin +
## meanmax
##
## Df Deviance AIC
## - NoOfSites 1 198.94 210.94
## - altitude 1 199.58 211.58
## <none> 198.78 212.78
## - meanmax 1 201.76 213.76
## - distance 1 206.39 218.39
## - meanmin 1 210.75 222.75
## - NoOfPools 1 210.90 222.90
##
## Step: AIC=210.94
## pres.abs ~ altitude + distance + NoOfPools + meanmin + meanmax
##
## Df Deviance AIC
## - altitude 1 199.63 209.63
## <none> 198.94 210.94
## - meanmax 1 201.76 211.76
## - distance 1 209.68 219.68
## - meanmin 1 210.83 220.83
## - NoOfPools 1 211.22 221.22
##
## Step: AIC=209.63
## pres.abs ~ distance + NoOfPools + meanmin + meanmax
##
## Df Deviance AIC
## <none> 199.63 209.63
## - distance 1 209.73 217.73
## - NoOfPools 1 211.43 219.43
## - meanmax 1 216.10 224.10
## - meanmin 1 226.94 234.94
##
## Call: glm(formula = pres.abs ~ distance + NoOfPools + meanmin + meanmax,
## family = binomial, data = frogs)
##
## Coefficients:
## (Intercept) distance NoOfPools meanmin meanmax
## 14.0074032 -0.0005138 0.0285643 5.6230647 -2.3717579
##
## Degrees of Freedom: 211 Total (i.e. Null); 207 Residual
## Null Deviance: 280
## Residual Deviance: 199.6 AIC: 209.6
step(m1, direction = "both")
## Start: AIC=214.74
## pres.abs ~ altitude + distance + NoOfPools + NoOfSites + avrain +
## meanmin + meanmax
##
## Df Deviance AIC
## - avrain 1 198.78 212.78
## - NoOfSites 1 198.91 212.91
## - altitude 1 199.30 213.30
## - meanmax 1 199.97 213.97
## <none> 198.74 214.74
## - distance 1 206.39 220.39
## - meanmin 1 209.60 223.60
## - NoOfPools 1 210.84 224.84
##
## Step: AIC=212.77
## pres.abs ~ altitude + distance + NoOfPools + NoOfSites + meanmin +
## meanmax
##
## Df Deviance AIC
## - NoOfSites 1 198.94 210.94
## - altitude 1 199.58 211.58
## <none> 198.78 212.78
## - meanmax 1 201.76 213.76
## + avrain 1 198.74 214.74
## - distance 1 206.39 218.39
## - meanmin 1 210.75 222.75
## - NoOfPools 1 210.90 222.90
##
## Step: AIC=210.94
## pres.abs ~ altitude + distance + NoOfPools + meanmin + meanmax
##
## Df Deviance AIC
## - altitude 1 199.63 209.63
## <none> 198.94 210.94
## - meanmax 1 201.76 211.76
## + NoOfSites 1 198.78 212.78
## + avrain 1 198.91 212.91
## - distance 1 209.68 219.68
## - meanmin 1 210.83 220.83
## - NoOfPools 1 211.22 221.22
##
## Step: AIC=209.63
## pres.abs ~ distance + NoOfPools + meanmin + meanmax
##
## Df Deviance AIC
## <none> 199.63 209.63
## + altitude 1 198.94 210.94
## + avrain 1 199.39 211.39
## + NoOfSites 1 199.58 211.58
## - distance 1 209.73 217.73
## - NoOfPools 1 211.43 219.43
## - meanmax 1 216.10 224.10
## - meanmin 1 226.94 234.94
##
## Call: glm(formula = pres.abs ~ distance + NoOfPools + meanmin + meanmax,
## family = binomial, data = frogs)
##
## Coefficients:
## (Intercept) distance NoOfPools meanmin meanmax
## 14.0074032 -0.0005138 0.0285643 5.6230647 -2.3717579
##
## Degrees of Freedom: 211 Total (i.e. Null); 207 Residual
## Null Deviance: 280
## Residual Deviance: 199.6 AIC: 209.6