Commit 6c7638dfb0423f2042adcf9d93462cb54afd4575

Authored by Jordi Inglada
1 parent 01b3a1ae
Exists in master

Adaptations HAL

Showing 1 changed file with 45 additions and 41 deletions   Show diff stats
notebook/exploration.org
... ... @@ -33,6 +33,11 @@ def find_terlab_file(dpt):
33 33 #+end_src
34 34  
35 35 #+RESULTS:
  36 +: Python 3.7.1 (default, Oct 23 2018, 19:19:42)
  37 +: [GCC 7.3.0] :: Anaconda, Inc. on linux
  38 +: Type "help", "copyright", "credits" or "license" for more information.
  39 +: python.el: native completion setup loaded
  40 +
36 41  
37 42 #+begin_src python :results output :session :exports both
38 43 print(terlabdir)
... ... @@ -42,6 +47,7 @@ print(terlabdir)
42 47 : /work/OT/theia/oso/shapes/TERLAB/
43 48  
44 49  
  50 +
45 51 #+begin_src python :results output :session :exports both
46 52 assert terlabdir+'RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('94')
47 53 assert terlabdir+'RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('95')
... ... @@ -69,7 +75,6 @@ def shape2df(departement):
69 75 #+RESULTS:
70 76  
71 77 * Quelles sont les variables intéressantes dans les données TERLAB
72   -
73 78 ** On vérifie que le fichier est bien lu
74 79 #+begin_src python :results output :session :exports both
75 80 df31 = shape2df(31)
... ... @@ -77,12 +82,12 @@ print(df31.head())
77 82 #+end_src
78 83  
79 84 #+RESULTS:
80   -: CODE_CULTU SURF_ADM PRECISION ... RENDNORME MMEAU geometry
81   -: 0 ORH 1.42 None ... NaN NaN POLYGON ((506016.382600002 6234988.195600003, ...
82   -: 1 PPH 0.00 None ... NaN NaN POLYGON ((506032.0680000037 6235002.251000002,...
83   -: 2 PPH 0.00 None ... NaN NaN POLYGON ((506108.3540000021 6234766.824000001,...
84   -: 3 ORH 1.87 None ... NaN NaN POLYGON ((506107.6996000037 6234765.226100001,...
85   -: 4 PPH 0.00 None ... NaN NaN POLYGON ((506874.6119000018 6234495.103300001,...
  85 +: CODE_CULTU SURF_ADM PRECISION SEMENCE DEST_ICHN CULTURE_D1 CULTURE_D2 ... TLENQ CODUTISOL LIBCULTURE SURUTISOL RENDNORME MMEAU geometry
  86 +: 0 ORH 1.42 None 0 None None None ... 1 03 03_ORGE_HIVER NaN NaN NaN POLYGON ((506016.382600002 6234988.195600003, ...
  87 +: 1 PPH 0.00 None 0 None None None ... 0 00 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506032.0680000037 6235002.251000002,...
  88 +: 2 PPH 0.00 None 0 None None None ... 0 00 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506108.3540000021 6234766.824000001,...
  89 +: 3 ORH 1.87 None 0 None None None ... 1 03 03_ORGE_HIVER NaN NaN NaN POLYGON ((506107.6996000037 6234765.226100001,...
  90 +: 4 PPH 0.00 None 0 None None None ... 0 00 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506874.6119000018 6234495.103300001,...
86 91 :
87 92 : [5 rows x 18 columns]
88 93  
... ... @@ -137,7 +142,7 @@ print(codes_cultures)
137 142  
138 143 #+RESULTS:
139 144 : 167
140   -: {'PCL', 'LO7', 'PH7', 'AGR', 'SAI', 'VE7', 'MCT', 'DTY', 'PHI', 'MOT', 'PPO', 'J6S', 'BTN', 'LIH', 'FLP', 'MPA', 'FNU', 'NOX', 'LUZ', 'CHT', 'OIG', 'CCT', 'MCR', 'POR', 'ME7', 'VRC', 'PAS', 'PVP', 'HBL', 'LOT', 'MLT', 'BTA', 'ML6', 'CEL', 'PH6', 'ANE', 'AUB', 'SGH', 'LIP', 'ORT', 'J5M', 'TRN', 'FAG', 'HAR', 'PFR', 'SRS', 'CHU', 'FEV', 'TR6', 'CAR', 'VES', 'BDH', 'ART', 'TOM', 'PPP', 'MH7', 'LEC', 'FRA', 'SA6', 'TTP', 'NOS', 'RGA', 'TR5', 'PFP', 'LH6', 'PCH', 'ORH', 'LH7', 'RDI', 'PAN', 'BFS', 'FET', 'EPE', 'MPC', 'ME6', 'BDP', 'SA7', 'SOJ', 'CPL', 'PAG', 'J6P', 'GFP', 'RVI', 'LBF', 'CPH', 'MH6', 'LU6', 'LU5', 'BOR', 'CZP', 'MC5', 'LUD', 'TCR', 'PPR', 'SA5', 'TRU', 'PP7', 'PPA', 'FNO', 'CGO', 'AVP', 'TR7', 'LAV', 'VRG', 'BTP', 'TRE', 'CML', 'PTC', 'MIE', 'ML5', 'SNE', 'MH5', 'PTR', 'CZH', 'LU7', 'FF7', 'CCN', 'LDH', 'CRD', 'MC6', 'CHS', 'OAG', 'OLI', 'VRT', 'BOP', 'SGE', 'GES', 'PPH', 'SPH', 'BVF', 'CTG', 'AIL', 'FVL', 'CID', 'NVT', 'MLG', 'BRO', 'PRL', 'FLA', 'SPL', 'SBO', 'ORP', 'BTH', 'MC7', 'TTH', 'CAG', 'AVH', 'BFP', 'ML7', 'POT', 'TOP', 'PSL', 'FSG', 'CIT', 'MID', 'LP7', 'PFH', 'MIS', 'CES', 'CMB', 'PEP', 'TAB', 'VE6', 'MLO', 'MOH', 'SOG', 'ROQ'}
  145 +: {'LBF', 'PAS', 'LUD', 'MIS', 'CGO', 'RVI', 'SGE', 'MOH', 'VRT', 'PAN', 'LOT', 'RDI', 'MC5', 'FVL', 'LH7', 'J5M', 'NOS', 'SRS', 'PEP', 'PH7', 'GFP', 'CZP', 'LU7', 'HBL', 'EPE', 'CHT', 'RGA', 'ORT', 'MH5', 'GES', 'PCH', 'PHI', 'OLI', 'BVF', 'NOX', 'CAG', 'FLA', 'TAB', 'SBO', 'BOR', 'FNO', 'AVH', 'BTN', 'CRD', 'MPA', 'J6P', 'SPL', 'MC6', 'ROQ', 'ML7', 'PFR', 'SGH', 'FAG', 'SOJ', 'TR6', 'TTP', 'CHS', 'VE7', 'BDP', 'PCL', 'MOT', 'SAI', 'BTP', 'CML', 'FEV', 'TTH', 'ANE', 'VRG', 'CPL', 'TCR', 'PAG', 'LU5', 'CES', 'PPO', 'ART', 'PTR', 'PPP', 'TOP', 'LP7', 'ORP', 'PTC', 'AUB', 'TR5', 'BRO', 'HAR', 'BTH', 'BFS', 'ML5', 'CZH', 'FRA', 'LAV', 'MH7', 'CTG', 'CCT', 'FET', 'PFH', 'POR', 'CCN', 'PRL', 'MLG', 'OIG', 'CMB', 'ORH', 'LIP', 'SA5', 'CAR', 'PP7', 'VRC', 'AGR', 'SPH', 'SOG', 'BTA', 'ME6', 'PFP', 'FSG', 'PPR', 'PSL', 'J6S', 'NVT', 'AVP', 'MID', 'LO7', 'BDH', 'TRE', 'LIH', 'LU6', 'VE6', 'PVP', 'DTY', 'TR7', 'LDH', 'PPH', 'FF7', 'CHU', 'ME7', 'LEC', 'MCT', 'FNU', 'FLP', 'PH6', 'POT', 'ML6', 'TOM', 'CPH', 'MCR', 'MLO', 'MH6', 'OAG', 'MIE', 'PPA', 'LH6', 'MPC', 'SA6', 'MC7', 'VES', 'CID', 'MLT', 'SNE', 'TRN', 'LUZ', 'TRU', 'CIT', 'BFP', 'SA7', 'AIL', 'CEL', 'BOP'}
141 146  
142 147 Mais il est plus lisible de regarder ~LIBCULTURE~
143 148 #+begin_src python :results output :session :exports both
... ... @@ -148,7 +153,7 @@ print(libs_cultures)
148 153  
149 154 #+RESULTS:
150 155 : 20
151   -: {'11_POIS_PROTEAGINEUX', '62_MELANGE_PROTEAGINEUX', '12_FEVE_FEVEROLE', '00_XXXXXXXXXX', '31_POMME_DE_TERRE_CONSO', '14_SOJA', '03_ORGE_HIVER', '01_BLE_TENDRE', '13_TOURNESOL', '21_BETTERAVE_INDUSTRIELLE', '04_ORGE_PRINTEMPS', '07_TRITICALE', '09_COLZA', '05_AVOINE', '15_SORGHO', '06_SEIGLE', '61_MELANGE_CEREALES', '02_BLE_DUR', '17_MAIS_GRAIN', '20_MAIS_FOURRAGE'}
  156 +: {'21_BETTERAVE_INDUSTRIELLE', '00_XXXXXXXXXX', '11_POIS_PROTEAGINEUX', '01_BLE_TENDRE', '62_MELANGE_PROTEAGINEUX', '06_SEIGLE', '12_FEVE_FEVEROLE', '14_SOJA', '04_ORGE_PRINTEMPS', '15_SORGHO', '20_MAIS_FOURRAGE', '05_AVOINE', '03_ORGE_HIVER', '17_MAIS_GRAIN', '09_COLZA', '07_TRITICALE', '02_BLE_DUR', '13_TOURNESOL', '61_MELANGE_CEREALES', '31_POMME_DE_TERRE_CONSO'}
152 157  
153 158 Mais il y a beaucoup plus de ~CODE_CULTU~ que de ~LIBCULTURE~. Peut-être que le 2è n'existe que pour les parcelles avec rendement?
154 159  
... ... @@ -218,7 +223,7 @@ matplot_lib_filename
218 223 #+end_src
219 224  
220 225 #+RESULTS:
221   -[[file:/tmp/babel-X17w0V/figureR5oWbq.png]]
  226 +[[file:/tmp/babel-dHk8X9/figureym1xLQ.png]]
222 227  
223 228 Valeurs manquantes de rendement?
224 229 #+begin_src python :results output :session :exports both
... ... @@ -266,7 +271,7 @@ matplot_lib_filename
266 271 #+end_src
267 272  
268 273 #+RESULTS:
269   -[[file:/tmp/babel-X17w0V/figureJIfLg1.png]]
  274 +[[file:/tmp/babel-dHk8X9/figureLFYhfU.png]]
270 275  
271 276 ** Distribution statisique des rendements par culture
272 277  
... ... @@ -285,7 +290,7 @@ matplot_lib_filename
285 290 #+end_src
286 291  
287 292 #+RESULTS:
288   -[[file:/tmp/babel-X17w0V/figureukzYe2.png]]
  293 +[[file:/tmp/babel-dHk8X9/figure7FPnTM.png]]
289 294  
290 295  
291 296  
... ... @@ -313,8 +318,8 @@ SURF_ADM SURUTISOL
313 318 03_ORGE_HIVER 20
314 319 14_SOJA 20
315 320 11_POIS_PROTEAGINEUX 19
316   -17_MAIS_GRAIN 18
317 321 13_TOURNESOL 18
  322 +17_MAIS_GRAIN 18
318 323 09_COLZA 12
319 324 15_SORGHO 11
320 325 07_TRITICALE 7
... ... @@ -323,8 +328,8 @@ SURF_ADM SURUTISOL
323 328 04_ORGE_PRINTEMPS 4
324 329 05_AVOINE 3
325 330 06_SEIGLE 2
326   -20_MAIS_FOURRAGE 1
327 331 31_POMME_DE_TERRE_CONSO 1
  332 +20_MAIS_FOURRAGE 1
328 333 Name: LIBCULTURE, dtype: int64
329 334 #+end_example
330 335 ** Histogramme des rendements par culture sur les parcelles pures
... ... @@ -343,7 +348,7 @@ matplot_lib_filename
343 348 #+end_src
344 349  
345 350 #+RESULTS:
346   -[[file:/tmp/babel-X17w0V/figurevurUbh.png]]
  351 +[[file:/tmp/babel-dHk8X9/figure4znAK6.png]]
347 352  
348 353 ** Ajouter une colonne avec le centroïde de la parcelle
349 354 #+begin_src python :results output :session :exports both
... ... @@ -352,13 +357,19 @@ print(df31par.centroid.head())
352 357 #+end_src
353 358  
354 359 #+RESULTS:
355   -: 78 POINT (576943.0567814494 6248166.951789126)
356   -: 644 POINT (573025.7548245641 6264003.533300648)
357   -: 937 POINT (572496.3795754968 6266017.564098726)
358   -: 5115 POINT (560246.9060276946 6236853.034810145)
359   -: 5303 POINT (583985.3814635368 6245950.934973895)
360   -: dtype: object
361   -: 576943.0567814494
  360 +#+begin_example
  361 +__main__:1: SettingWithCopyWarning:
  362 +A value is trying to be set on a copy of a slice from a DataFrame.
  363 +Try using .loc[row_indexer,col_indexer] = value instead
  364 +
  365 +See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  366 +78 POINT (576943.0567814494 6248166.951789126)
  367 +644 POINT (573025.7548245641 6264003.533300648)
  368 +937 POINT (572496.3795754968 6266017.564098726)
  369 +5115 POINT (560246.9060276946 6236853.034810145)
  370 +5303 POINT (583985.3814635368 6245950.934973895)
  371 +dtype: object
  372 +#+end_example
362 373  
363 374 #+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
364 375 import matplotlib.pyplot as plt
... ... @@ -370,7 +381,7 @@ matplot_lib_filename
370 381 #+end_src
371 382  
372 383 #+RESULTS:
373   -[[file:/tmp/babel-X17w0V/figure4XvQFY.png]]
  384 +[[file:/tmp/babel-dHk8X9/figurek3Dtu3.png]]
374 385  
375 386  
376 387  
... ... @@ -384,7 +395,7 @@ colors = [colordict[c] for c in df31par['LIBCULTURE']]
384 395 #+end_src
385 396  
386 397 #+RESULTS:
387   -: {'14_SOJA': 0, '02_BLE_DUR': 1, '03_ORGE_HIVER': 2, '01_BLE_TENDRE': 3, '11_POIS_PROTEAGINEUX': 4, '13_TOURNESOL': 5, '04_ORGE_PRINTEMPS': 6, '07_TRITICALE': 7, '15_SORGHO': 8, '09_COLZA': 9, '12_FEVE_FEVEROLE': 10, '17_MAIS_GRAIN': 11, '20_MAIS_FOURRAGE': 12, '31_POMME_DE_TERRE_CONSO': 13, '05_AVOINE': 14, '06_SEIGLE': 15}
  398 +: {'06_SEIGLE': 0, '12_FEVE_FEVEROLE': 1, '14_SOJA': 2, '04_ORGE_PRINTEMPS': 3, '15_SORGHO': 4, '20_MAIS_FOURRAGE': 5, '01_BLE_TENDRE': 6, '11_POIS_PROTEAGINEUX': 7, '09_COLZA': 8, '07_TRITICALE': 9, '02_BLE_DUR': 10, '05_AVOINE': 11, '31_POMME_DE_TERRE_CONSO': 12, '03_ORGE_HIVER': 13, '17_MAIS_GRAIN': 14, '13_TOURNESOL': 15}
388 399  
389 400  
390 401  
... ... @@ -404,7 +415,7 @@ matplot_lib_filename
404 415 #+end_src
405 416  
406 417 #+RESULTS:
407   -[[file:/tmp/babel-X17w0V/figureUjRq6C.png]]
  418 +[[file:/tmp/babel-dHk8X9/figureJrlI9h.png]]
408 419  
409 420 Il semble y avoir une corrélation positive entre rendement et taille de la parcelle, conditionnée par la classe
410 421  
... ... @@ -414,13 +425,6 @@ maj6_rnd = rnd_counts.iloc[:6]
414 425 #+end_src
415 426  
416 427 #+RESULTS:
417   -: 01_BLE_TENDRE 23
418   -: 03_ORGE_HIVER 20
419   -: 14_SOJA 20
420   -: 11_POIS_PROTEAGINEUX 19
421   -: 17_MAIS_GRAIN 18
422   -: 13_TOURNESOL 18
423   -: Name: LIBCULTURE, dtype: int64
424 428  
425 429 #+begin_src python :session :results output :exports both
426 430 import matplotlib.pyplot as plt
... ... @@ -511,7 +515,7 @@ matplot_lib_filename
511 515 #+end_src
512 516  
513 517 #+RESULTS:
514   -[[file:/tmp/babel-X17w0V/figureYa6zlX.png]]
  518 +[[file:/tmp/babel-dHk8X9/figureVmQj9r.png]]
515 519  
516 520 La corrélation n'est valable que pour le blé tendre et le tournesol. Il faudra évidemment refaire tout ça avec tous les départements.
517 521  
... ... @@ -544,7 +548,7 @@ matplot_lib_filename
544 548 #+end_src
545 549  
546 550 #+RESULTS:
547   -[[file:/tmp/babel-X17w0V/figureVmssHE.png]]
  551 +[[file:/tmp/babel-dHk8X9/figure8GibKh.png]]
548 552  
549 553 *** Bio
550 554 #+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
... ... @@ -574,7 +578,7 @@ matplot_lib_filename
574 578 #+end_src
575 579  
576 580 #+RESULTS:
577   -[[file:/tmp/babel-X17w0V/figureE5mY6F.png]]
  581 +[[file:/tmp/babel-dHk8X9/figure2huezh.png]]
578 582  
579 583 *** Différence entre surface admin et vraie surface
580 584 *** Latitude
... ... @@ -595,8 +599,8 @@ OLS Regression Results
595 599 Dep. Variable: RENDNORME R-squared: 0.812
596 600 Model: OLS Adj. R-squared: 0.790
597 601 Method: Least Squares F-statistic: 36.70
598   -Date: Thu, 06 Dec 2018 Prob (F-statistic): 2.68e-46
599   -Time: 16:17:06 Log-Likelihood: -623.34
  602 +Date: Thu, 31 Jan 2019 Prob (F-statistic): 2.68e-46
  603 +Time: 13:44:19 Log-Likelihood: -623.34
600 604 No. Observations: 172 AIC: 1285.
601 605 Df Residuals: 153 BIC: 1344.
602 606 Df Model: 18
... ... @@ -646,7 +650,7 @@ matplot_lib_filename
646 650 #+end_src
647 651  
648 652 #+RESULTS:
649   -[[file:/tmp/babel-X17w0V/figureWkaF0Q.png]]
  653 +[[file:/tmp/babel-dHk8X9/figuremqAIp9.png]]
650 654  
651 655 **** Logit
652 656 Il faut que la variable endogène (cible) soit dans [0,1]
... ... @@ -673,8 +677,8 @@ Optimization terminated successfully.
673 677 Dep. Variable: RENDN150 No. Observations: 172
674 678 Model: Logit Df Residuals: 153
675 679 Method: MLE Df Model: 18
676   -Date: Thu, 06 Dec 2018 Pseudo R-squ.: -1.315
677   -Time: 16:29:13 Log-Likelihood: -84.975
  680 +Date: Thu, 31 Jan 2019 Pseudo R-squ.: -1.315
  681 +Time: 13:44:20 Log-Likelihood: -84.975
678 682 converged: True LL-Null: -36.706
679 683 LLR p-value: 1.000
680 684 =========================================================================================================
... ... @@ -714,7 +718,7 @@ matplot_lib_filename
714 718 #+end_src
715 719  
716 720 #+RESULTS:
717   -[[file:/tmp/babel-X17w0V/figureQYHSMd.png]]
  721 +[[file:/tmp/babel-dHk8X9/figureQcPDrQ.png]]
718 722  
719 723  
720 724  
... ...