#Chapitre 4

#page 248
#Si le package n'est pas installe, enlever le commentaire
#puis executer la commande ci-dessous.
#if(!require("BioStatR")){install.packages("BioStatR")}
library(BioStatR)
str(Mesures)
## 'data.frame':    252 obs. of  3 variables:
##  $ masse : num  28.6 20.6 29.2 32 24.5 29 28.9 18.2 7.9 15.5 ...
##  $ taille: num  19.1 14.8 19.7 21.1 19.4 19.5 18.9 14.6 10.2 14.6 ...
##  $ espece: Factor w/ 4 levels "bignone","glycine blanche",..: 2 2 2 2 2 2 2 2 2 2 ...
summary(Mesures)
##      masse           taille                   espece  
##  Min.   : 1.00   Min.   : 4.80   bignone         :70  
##  1st Qu.: 4.50   1st Qu.:11.00   glycine blanche :54  
##  Median : 8.40   Median :13.20   glycine violette:56  
##  Mean   :11.13   Mean   :13.37   laurier rose    :72  
##  3rd Qu.:14.60   3rd Qu.:15.30                        
##  Max.   :49.20   Max.   :27.00
#page 249
#Si le package n'est pas installe, enlever le commentaire
#puis executer la commande ci-dessous.
#if(!require("ISLR")){install.packages("ISLR")}
library(ISLR)
summary(Hitters[,17:20])
##     Assists          Errors          Salary       NewLeague
##  Min.   :  0.0   Min.   : 0.00   Min.   :  67.5   A:176    
##  1st Qu.:  7.0   1st Qu.: 3.00   1st Qu.: 190.0   N:146    
##  Median : 39.5   Median : 6.00   Median : 425.0            
##  Mean   :106.9   Mean   : 8.04   Mean   : 535.9            
##  3rd Qu.:166.0   3rd Qu.:11.00   3rd Qu.: 750.0            
##  Max.   :492.0   Max.   :32.00   Max.   :2460.0            
##                                  NA's   :59
Mes.B = subset(Mesures,Mesures$espece=="bignone")
model<-lm(masse~taille,data=Mes.B)
summary(model)
## 
## Call:
## lm(formula = masse ~ taille, data = Mes.B)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.137 -1.551 -0.455  1.529 10.699 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -10.01645    1.06941  -9.366 7.39e-14 ***
## taille        1.53572    0.09002  17.059  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.463 on 68 degrees of freedom
## Multiple R-squared:  0.8106, Adjusted R-squared:  0.8078 
## F-statistic:   291 on 1 and 68 DF,  p-value: < 2.2e-16
Mes.B$masse
##  [1] 10.9  6.6 22.5 33.7 20.6 16.6 14.2 13.8 14.0  8.7 14.2 10.6 10.9  3.3
## [15]  9.7  9.3 17.2 10.1  9.0  7.1  7.1  1.5  4.1  8.0  7.4  7.2  6.9  2.9
## [29]  2.4 10.7 13.8 10.9 10.3  8.8  9.0  8.2  9.6  9.0  5.3  1.5  6.7  2.9
## [43]  2.9  3.5  3.4  4.9  4.7  4.7  5.2  2.1  2.2  1.4  2.7  1.0  2.5  5.5
## [57]  2.7  6.7  7.3  2.9  3.8  7.6  3.6  3.0  5.8  5.3  3.2  4.4  3.4  2.9
#page 250
round(fitted(model), 2)
##   111   112   113   114   115   116   117   118   119   120   121   122 
##  9.64  6.11 17.63 23.00 17.32 15.02 16.71 14.09 16.55 10.56 11.33 12.41 
##   123   124   125   126   127   128   129   130   131   132   133   134 
## 11.79  3.65  9.95  8.72 15.32 12.56 10.56  7.80  9.79 -0.03  4.57  8.11 
##   135   136   137   138   139   140   141   142   143   144   145   146 
## 10.87  9.79  7.80  4.42  4.57 11.48 10.72  8.57  7.80 10.56  6.72  8.72 
##   147   148   149   150   151   152   153   154   155   156   157   158 
## 10.56  9.18  6.11  0.73 10.26  4.27  2.12  5.80  1.50  3.04  5.49  2.73 
##   159   160   161   162   163   164   165   166   167   168   169   170 
##  6.57  2.73  0.89 -0.19 -0.03 -2.65  1.35  4.27  3.19  5.19 11.33  3.34 
##   171   172   173   174   175   176   177   178   179   180 
##  4.27 11.02  2.73  2.42  7.18  8.41  7.34  0.27  6.26  3.65
round(residuals(model), 2)
##   111   112   113   114   115   116   117   118   119   120   121   122 
##  1.26  0.49  4.87 10.70  3.28  1.58 -2.51 -0.29 -2.55 -1.86  2.87 -1.81 
##   123   124   125   126   127   128   129   130   131   132   133   134 
## -0.89 -0.35 -0.25  0.58  1.88 -2.46 -1.56 -0.70 -2.69  1.53 -0.47 -0.11 
##   135   136   137   138   139   140   141   142   143   144   145   146 
## -3.47 -2.59 -0.90 -1.52 -2.17 -0.78  3.08  2.33  2.50 -1.76  2.28 -0.52 
##   147   148   149   150   151   152   153   154   155   156   157   158 
## -0.96 -0.18 -0.81  0.77 -3.56 -1.37  0.78 -2.30  1.90  1.86 -0.79  1.97 
##   159   160   161   162   163   164   165   166   167   168   169   170 
## -1.37 -0.63  1.31  1.59  2.73  3.65  1.15  1.23 -0.49  1.51 -4.03 -0.44 
##   171   172   173   174   175   176   177   178   179   180 
## -0.47 -3.42  0.87  0.58 -1.38 -3.11 -4.14  4.13 -2.86 -0.75
#Si le package n'est pas installe, enlever le commentaire
#puis executer la commande ci-dessous.
#if(!require("broom")){install.packages("broom")}
library(broom)
(model.diag.metrics<-augment(model))
## # A tibble: 70 x 10
##    .rownames masse taille .fitted .se.fit .resid   .hat .sigma .cooksd
##    <chr>     <dbl>  <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>   <dbl>
##  1 111        10.9   12.8    9.64   0.320  1.26  0.0168   2.48 2.27e-3
##  2 112         6.6   10.5    6.11   0.306  0.491 0.0154   2.48 3.16e-4
##  3 113        22.5   18     17.6    0.661  4.87  0.0721   2.40 1.64e-1
##  4 114        33.7   21.5   23.0    0.954 10.7   0.150    2.04 1.96e+0
##  5 115        20.6   17.8   17.3    0.645  3.28  0.0687   2.45 7.02e-2
##  6 116        16.6   16.3   15.0    0.529  1.58  0.0461   2.47 1.05e-2
##  7 117        14.2   17.4   16.7    0.614 -2.51  0.0620   2.46 3.65e-2
##  8 118        13.8   15.7   14.1    0.485 -0.294 0.0388   2.48 2.99e-4
##  9 119        14     17.3   16.6    0.606 -2.55  0.0605   2.46 3.67e-2
## 10 120         8.7   13.4   10.6    0.344 -1.86  0.0195   2.47 5.80e-3
## # … with 60 more rows, and 1 more variable: .std.resid <dbl>
#page 252
tidy(model)
## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   -10.0     1.07       -9.37 7.39e-14
## 2 taille          1.54    0.0900     17.1  2.89e-26
glance(model)
## # A tibble: 1 x 11
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <int>  <dbl> <dbl> <dbl>
## 1     0.811         0.808  2.46      291. 2.89e-26     2  -161.  329.  336.
## # … with 2 more variables: deviance <dbl>, df.residual <int>
with(Mes.B,plot(taille,masse,pch=20,xlab="Taille",ylab="Masse"))
abline(model, lwd=2, col="red")
with(Mes.B,lines(lowess(taille, masse), lty=2, lwd=2, col="blue"))
with(Mes.B,lines(smooth.spline(taille,masse),col="orange",lwd=2, 
                 lty=4))
with(Mes.B,segments(taille, masse, taille, fitted(model), lty=2, 
                    col="red"))
legend("topleft",lty=c(1,2,4),legend = c("lm","lowess", 
                                         "smooth.spline"),lwd=2,col=c("red","blue","orange"))

#page 253
#Si le package n'est pas installe, enlever le commentaire
#puis executer la commande ci-dessous.
#if(!require("ggiraphExtra")){install.packages("ggiraphExtra")}
library(ggiraphExtra)
ggPredict(model)

shapiro.test(residuals(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.92886, p-value = 0.0006601
with(Mes.B,plot(fitted(model),masse-fitted(model),pch=20,xlab= 
                  "Valeurs pr\u00e9dites",main="R\u00e9sidus en fonction de la valeur pr\u00e9dite", 
                ylab="R\u00e9sidus = valeurs observ\u00e9es - valeurs pr\u00e9dites"))
abline(h=0)
with(Mes.B,lines(lowess(fitted(model),masse-fitted(model)),col= 
                   "red",lwd=2))

#page 254
library(MASS)
model.r <- lqs(masse ~ taille, data=Mes.B)
summary(model.r)
##               Length Class      Mode     
## crit           1     -none-     numeric  
## sing           1     -none-     character
## coefficients   2     -none-     numeric  
## bestone        2     -none-     numeric  
## fitted.values 70     -none-     numeric  
## residuals     70     -none-     numeric  
## scale          2     -none-     numeric  
## terms          3     terms      call     
## call           3     -none-     call     
## xlevels        0     -none-     list     
## model          2     data.frame list
#page 255
coefficients(model.r)
## (Intercept)      taille 
##   -7.070691    1.217949
model.r$bestone
## [1] 13 52
coef(lm(masse ~ taille, data=Mes.B[model.r$bestone,]))
## (Intercept)      taille 
##   -6.394872    1.217949
with(Mes.B,1-sum(residuals(model.r)^2)/sum((masse-mean(masse))^2))
## [1] 0.760885
#page 256
with(Mes.B,1-sum(residuals(model)^2)/sum((masse-mean(masse))^2))
## [1] 0.8105863
plot(masse ~ taille, data=Mes.B, xlab="Taille", ylab="Masse")
abline(model.r, lty=1)
abline(model, lty=2)
with(Mes.B,points(taille[model.r$bestone],masse[model.r$bestone],
                  pch=19,col="red"))
abline(lm(masse ~ taille, data=Mes.B[model.r$bestone,]), lty=3)
legend("topleft", legend=c("R\u00e9sistante : moindres carr\u00e9s tronqu\u00e9s",
       "Moindres carr\u00e9s ordinaires",
       "Droite ajust\u00e9e entre les deux observations"), lty=1:3)