Commit 2a53517291fe08b8322505d4ba7757715570a364

Authored by Jordi Inglada
1 parent f470b3ca
Exists in master

Modele de regression

Showing 1 changed file with 140 additions and 0 deletions   Show diff stats
notebook/exploration.org
... ... @@ -576,6 +576,146 @@ matplot_lib_filename
576 576  
577 577 *** Différence entre surface admin et vraie surface
578 578 *** Latitude
  579 +*** Un modèle de régression
  580 +**** OLS
  581 +#+begin_src python :results output :session :exports both
  582 +import statsmodels.api as sm
  583 +import statsmodels.formula.api as smf
  584 +
  585 +rend31lm = smf.ols('RENDNORME ~ SURF_ADM + MMEAU + BIO + LIBCULTURE', data=df31par).fit()
  586 +print(rend31lm.summary())
  587 +#+end_src
  588 +
  589 +#+RESULTS:
  590 +#+begin_example
  591 +OLS Regression Results
  592 +==============================================================================
  593 +Dep. Variable: RENDNORME R-squared: 0.812
  594 +Model: OLS Adj. R-squared: 0.790
  595 +Method: Least Squares F-statistic: 36.70
  596 +Date: Thu, 06 Dec 2018 Prob (F-statistic): 2.68e-46
  597 +Time: 16:17:06 Log-Likelihood: -623.34
  598 +No. Observations: 172 AIC: 1285.
  599 +Df Residuals: 153 BIC: 1344.
  600 +Df Model: 18
  601 +Covariance Type: nonrobust
  602 +=========================================================================================================
  603 + coef std err t P>|t| [0.025 0.975]
  604 +---------------------------------------------------------------------------------------------------------
  605 +Intercept 49.8717 2.239 22.276 0.000 45.449 54.295
  606 +LIBCULTURE[T.02_BLE_DUR] -8.5714 4.756 -1.802 0.073 -17.968 0.825
  607 +LIBCULTURE[T.03_ORGE_HIVER] 2.8211 2.958 0.954 0.342 -3.023 8.665
  608 +LIBCULTURE[T.04_ORGE_PRINTEMPS] -2.9000 5.279 -0.549 0.584 -13.330 7.530
  609 +LIBCULTURE[T.05_AVOINE] -4.4867 6.042 -0.743 0.459 -16.423 7.449
  610 +LIBCULTURE[T.06_SEIGLE] 7.4260 7.151 1.038 0.301 -6.702 21.554
  611 +LIBCULTURE[T.07_TRITICALE] -4.0144 4.186 -0.959 0.339 -12.285 4.256
  612 +LIBCULTURE[T.09_COLZA] -15.6105 3.803 -4.104 0.000 -23.124 -8.096
  613 +LIBCULTURE[T.11_POIS_PROTEAGINEUX] -7.8701 3.196 -2.462 0.015 -14.184 -1.556
  614 +LIBCULTURE[T.12_FEVE_FEVEROLE] -19.0013 4.304 -4.414 0.000 -27.505 -10.498
  615 +LIBCULTURE[T.13_TOURNESOL] -14.8190 3.298 -4.493 0.000 -21.335 -8.303
  616 +LIBCULTURE[T.14_SOJA] -9.9723 3.295 -3.026 0.003 -16.482 -3.463
  617 +LIBCULTURE[T.15_SORGHO] 0.7608 3.627 0.210 0.834 -6.405 7.926
  618 +LIBCULTURE[T.17_MAIS_GRAIN] 6.5508 3.221 2.034 0.044 0.188 12.914
  619 +LIBCULTURE[T.20_MAIS_FOURRAGE] 13.4571 9.847 1.367 0.174 -5.996 32.910
  620 +LIBCULTURE[T.31_POMME_DE_TERRE_CONSO] 59.8367 9.874 6.060 0.000 40.330 79.343
  621 +SURF_ADM -2.3181 0.281 -8.244 0.000 -2.874 -1.763
  622 +MMEAU 0.5421 0.052 10.330 0.000 0.438 0.646
  623 +BIO -13.4732 2.507 -5.375 0.000 -18.426 -8.521
  624 +==============================================================================
  625 +Omnibus: 3.507 Durbin-Watson: 1.875
  626 +Prob(Omnibus): 0.173 Jarque-Bera (JB): 4.106
  627 +Skew: 0.022 Prob(JB): 0.128
  628 +Kurtosis: 3.756 Cond. No. 842.
  629 +==============================================================================
  630 +
  631 +Warnings:
  632 +[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
  633 +#+end_example
  634 +
  635 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  636 +
  637 +pred_lm = rend31lm.predict()
  638 +plt.figure(figsize=(15,10))
  639 +g= sns.jointplot(df31par.RENDNORME, pred_lm, kind='reg').set_axis_labels('REN', 'PREDICTION')
  640 +rsquare = lambda a, b: stats.pearsonr(a, b)[0] ** 2
  641 +g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$R^2$", loc="upper left", fontsize=12)
  642 +plt.savefig(matplot_lib_filename)
  643 +matplot_lib_filename
  644 +#+end_src
  645 +
  646 +#+RESULTS:
  647 +[[file:/tmp/babel-X17w0V/figureWkaF0Q.png]]
  648 +
  649 +**** Logit
  650 +Il faut que la variable endogène (cible) soit dans [0,1]
  651 +#+begin_src python :results output :session :exports both
  652 +import statsmodels.api as sm
  653 +import statsmodels.formula.api as smf
  654 +df31par['RENDN150'] = df31par.RENDNORME/150
  655 +rend31logit = smf.logit('RENDN150 ~ SURF_ADM + MMEAU + BIO + LIBCULTURE', data=df31par).fit()
  656 +print(rend31logit.summary())
  657 +#+end_src
  658 +
  659 +#+RESULTS:
  660 +#+begin_example
  661 +__main__:3: SettingWithCopyWarning:
  662 +A value is trying to be set on a copy of a slice from a DataFrame.
  663 +Try using .loc[row_indexer,col_indexer] = value instead
  664 +
  665 +See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  666 +Optimization terminated successfully.
  667 + Current function value: 0.494043
  668 + Iterations 6
  669 + Logit Regression Results
  670 +==============================================================================
  671 +Dep. Variable: RENDN150 No. Observations: 172
  672 +Model: Logit Df Residuals: 153
  673 +Method: MLE Df Model: 18
  674 +Date: Thu, 06 Dec 2018 Pseudo R-squ.: -1.315
  675 +Time: 16:29:13 Log-Likelihood: -84.975
  676 +converged: True LL-Null: -36.706
  677 + LLR p-value: 1.000
  678 +=========================================================================================================
  679 + coef std err z P>|z| [0.025 0.975]
  680 +---------------------------------------------------------------------------------------------------------
  681 +Intercept -0.6816 0.499 -1.365 0.172 -1.660 0.297
  682 +LIBCULTURE[T.02_BLE_DUR] -0.2568 1.061 -0.242 0.809 -2.336 1.823
  683 +LIBCULTURE[T.03_ORGE_HIVER] 0.0697 0.646 0.108 0.914 -1.197 1.336
  684 +LIBCULTURE[T.04_ORGE_PRINTEMPS] -0.0713 1.248 -0.057 0.954 -2.517 2.374
  685 +LIBCULTURE[T.05_AVOINE] -0.1482 1.503 -0.099 0.921 -3.094 2.798
  686 +LIBCULTURE[T.06_SEIGLE] 0.2715 1.598 0.170 0.865 -2.860 3.403
  687 +LIBCULTURE[T.07_TRITICALE] -0.1247 0.951 -0.131 0.896 -1.989 1.739
  688 +LIBCULTURE[T.09_COLZA] -0.6254 0.974 -0.642 0.521 -2.534 1.284
  689 +LIBCULTURE[T.11_POIS_PROTEAGINEUX] -0.2478 0.742 -0.334 0.738 -1.701 1.206
  690 +LIBCULTURE[T.12_FEVE_FEVEROLE] -0.8365 1.257 -0.665 0.506 -3.300 1.628
  691 +LIBCULTURE[T.13_TOURNESOL] -0.5624 0.827 -0.680 0.497 -2.184 1.059
  692 +LIBCULTURE[T.14_SOJA] -0.3340 0.795 -0.420 0.674 -1.892 1.224
  693 +LIBCULTURE[T.15_SORGHO] -0.0042 0.782 -0.005 0.996 -1.537 1.529
  694 +LIBCULTURE[T.17_MAIS_GRAIN] 0.1378 0.696 0.198 0.843 -1.226 1.501
  695 +LIBCULTURE[T.20_MAIS_FOURRAGE] 0.3436 2.055 0.167 0.867 -3.684 4.372
  696 +LIBCULTURE[T.31_POMME_DE_TERRE_CONSO] 1.6842 2.315 0.727 0.467 -2.854 6.222
  697 +SURF_ADM -0.0771 0.071 -1.092 0.275 -0.215 0.061
  698 +MMEAU 0.0173 0.013 1.368 0.171 -0.007 0.042
  699 +BIO -0.5137 0.658 -0.781 0.435 -1.803 0.775
  700 +=========================================================================================================
  701 +#+end_example
  702 +
  703 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  704 +
  705 +pred_logit = rend31logit.predict() *150
  706 +plt.figure(figsize=(25,10))
  707 +g= sns.jointplot(df31par.RENDNORME, pred_logit, kind='reg').set_axis_labels('REN', 'PREDICTION')
  708 +rsquare = lambda a, b: stats.pearsonr(a, b)[0] ** 2
  709 +g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$R^2$", loc="upper left", fontsize=12)
  710 +plt.savefig(matplot_lib_filename)
  711 +matplot_lib_filename
  712 +#+end_src
  713 +
  714 +#+RESULTS:
  715 +[[file:/tmp/babel-X17w0V/figureQYHSMd.png]]
  716 +
  717 +
  718 +
579 719 * Quels sont les départements contenant des informations de rendement?
580 720 * Quelles sont les cultures pour lesquelles les informations de rendement sont les plus nombreuses?
581 721 * Peut-on prédire le rendement à partir des données TERLAB seules?
... ...