Commit f470b3caeb1c963a64e93c823a67e0e2403f92d5

Authored by Jordi Inglada
1 parent e0091d8e
Exists in master

Corrélations entre rendements et vars contenues dans TERLAB

Showing 1 changed file with 306 additions and 81 deletions   Show diff stats
notebook/exploration.org
... ... @@ -38,6 +38,8 @@ def find_terlab_file(dpt):
38 38 : python.el: native completion setup loaded
39 39  
40 40  
  41 +
  42 +
41 43 #+begin_src python :results output :session :exports both
42 44 assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('94')
43 45 assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('95')
... ... @@ -64,8 +66,6 @@ def shape2df(departement):
64 66  
65 67 #+RESULTS:
66 68  
67   -
68   -
69 69 * Quelles sont les variables intéressantes dans les données TERLAB
70 70  
71 71 ** On vérifie que le fichier est bien lu
... ... @@ -75,12 +75,12 @@ print(df31.head())
75 75 #+end_src
76 76  
77 77 #+RESULTS:
78   -: CODE_CULTU SURF_ADM PRECISION SEMENCE ... SURUTISOL RENDNORME MMEAU geometry
79   -: 0 ORH 1.42 None 0 ... NaN NaN NaN POLYGON ((506016.382600002 6234988.195600003, ...
80   -: 1 PPH 0.00 None 0 ... NaN NaN NaN POLYGON ((506032.0680000037 6235002.251000002,...
81   -: 2 PPH 0.00 None 0 ... NaN NaN NaN POLYGON ((506108.3540000021 6234766.824000001,...
82   -: 3 ORH 1.87 None 0 ... NaN NaN NaN POLYGON ((506107.6996000037 6234765.226100001,...
83   -: 4 PPH 0.00 None 0 ... NaN NaN NaN POLYGON ((506874.6119000018 6234495.103300001,...
  78 +: CODE_CULTU SURF_ADM PRECISION SEMENCE DEST_ICHN ... LIBCULTURE SURUTISOL RENDNORME MMEAU geometry
  79 +: 0 ORH 1.42 None 0 None ... 03_ORGE_HIVER NaN NaN NaN POLYGON ((506016.382600002 6234988.195600003, ...
  80 +: 1 PPH 0.00 None 0 None ... 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506032.0680000037 6235002.251000002,...
  81 +: 2 PPH 0.00 None 0 None ... 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506108.3540000021 6234766.824000001,...
  82 +: 3 ORH 1.87 None 0 None ... 03_ORGE_HIVER NaN NaN NaN POLYGON ((506107.6996000037 6234765.226100001,...
  83 +: 4 PPH 0.00 None 0 None ... 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506874.6119000018 6234495.103300001,...
84 84 :
85 85 : [5 rows x 18 columns]
86 86  
... ... @@ -96,6 +96,7 @@ print(df31.columns)
96 96 : 'RENDNORME', 'MMEAU', 'geometry'],
97 97 : dtype='object')
98 98  
  99 +
99 100 Une ligne
100 101 #+begin_src python :results output :session :exports both
101 102 print(df31.iloc[0])
... ... @@ -134,7 +135,7 @@ print(codes_cultures)
134 135  
135 136 #+RESULTS:
136 137 : 167
137   -: {'LOT', 'MPC', 'AVH', 'GES', 'PCH', 'TTP', 'DTY', 'LH6', 'PAG', 'FNU', 'PPO', 'SGH', 'MC5', 'TRN', 'MLG', 'CZH', 'PPP', 'SGE', 'LAV', 'ORP', 'BRO', 'OLI', 'ME7', 'POR', 'CML', 'SOJ', 'FEV', 'FLA', 'CHS', 'LH7', 'SNE', 'LU5', 'MIS', 'SPL', 'NOX', 'FVL', 'BVF', 'LUD', 'CID', 'MIE', 'SAI', 'RGA', 'PTR', 'PEP', 'BDH', 'LU6', 'ORH', 'CPL', 'ML6', 'CTG', 'SPH', 'LEC', 'TOP', 'PFP', 'MC7', 'FRA', 'SRS', 'CRD', 'VRT', 'VRG', 'BTP', 'ORT', 'BOR', 'BTA', 'RVI', 'TR6', 'PPR', 'MOT', 'LUZ', 'CAG', 'OIG', 'CHU', 'CIT', 'TAB', 'MC6', 'RDI', 'MLO', 'CCT', 'LU7', 'FF7', 'MID', 'HBL', 'PH6', 'MLT', 'TR5', 'SBO', 'ME6', 'EPE', 'PHI', 'CEL', 'SOG', 'SA5', 'ANE', 'BTH', 'PSL', 'VE7', 'LDH', 'ML5', 'SA6', 'PCL', 'CAR', 'POT', 'PRL', 'PAS', 'MCT', 'NVT', 'OAG', 'CPH', 'MOH', 'BFS', 'J6S', 'TCR', 'CCN', 'AUB', 'J5M', 'AIL', 'FAG', 'TTH', 'MCR', 'TRE', 'LBF', 'TOM', 'PAN', 'CZP', 'LIH', 'J6P', 'AGR', 'MH7', 'GFP', 'TRU', 'LO7', 'CGO', 'PVP', 'AVP', 'FNO', 'SA7', 'PFR', 'PPH', 'ML7', 'PFH', 'FET', 'BDP', 'BOP', 'NOS', 'MH5', 'PTC', 'MPA', 'TR7', 'PH7', 'CES', 'FLP', 'CHT', 'LP7', 'HAR', 'VE6', 'CMB', 'BTN', 'MH6', 'FSG', 'BFP', 'PPA', 'VES', 'ROQ', 'PP7', 'LIP', 'VRC', 'ART'}
  138 +: {'PCL', 'LO7', 'PH7', 'AGR', 'SAI', 'VE7', 'MCT', 'DTY', 'PHI', 'MOT', 'PPO', 'J6S', 'BTN', 'LIH', 'FLP', 'MPA', 'FNU', 'NOX', 'LUZ', 'CHT', 'OIG', 'CCT', 'MCR', 'POR', 'ME7', 'VRC', 'PAS', 'PVP', 'HBL', 'LOT', 'MLT', 'BTA', 'ML6', 'CEL', 'PH6', 'ANE', 'AUB', 'SGH', 'LIP', 'ORT', 'J5M', 'TRN', 'FAG', 'HAR', 'PFR', 'SRS', 'CHU', 'FEV', 'TR6', 'CAR', 'VES', 'BDH', 'ART', 'TOM', 'PPP', 'MH7', 'LEC', 'FRA', 'SA6', 'TTP', 'NOS', 'RGA', 'TR5', 'PFP', 'LH6', 'PCH', 'ORH', 'LH7', 'RDI', 'PAN', 'BFS', 'FET', 'EPE', 'MPC', 'ME6', 'BDP', 'SA7', 'SOJ', 'CPL', 'PAG', 'J6P', 'GFP', 'RVI', 'LBF', 'CPH', 'MH6', 'LU6', 'LU5', 'BOR', 'CZP', 'MC5', 'LUD', 'TCR', 'PPR', 'SA5', 'TRU', 'PP7', 'PPA', 'FNO', 'CGO', 'AVP', 'TR7', 'LAV', 'VRG', 'BTP', 'TRE', 'CML', 'PTC', 'MIE', 'ML5', 'SNE', 'MH5', 'PTR', 'CZH', 'LU7', 'FF7', 'CCN', 'LDH', 'CRD', 'MC6', 'CHS', 'OAG', 'OLI', 'VRT', 'BOP', 'SGE', 'GES', 'PPH', 'SPH', 'BVF', 'CTG', 'AIL', 'FVL', 'CID', 'NVT', 'MLG', 'BRO', 'PRL', 'FLA', 'SPL', 'SBO', 'ORP', 'BTH', 'MC7', 'TTH', 'CAG', 'AVH', 'BFP', 'ML7', 'POT', 'TOP', 'PSL', 'FSG', 'CIT', 'MID', 'LP7', 'PFH', 'MIS', 'CES', 'CMB', 'PEP', 'TAB', 'VE6', 'MLO', 'MOH', 'SOG', 'ROQ'}
138 139  
139 140 Mais il est plus lisible de regarder ~LIBCULTURE~
140 141 #+begin_src python :results output :session :exports both
... ... @@ -145,7 +146,7 @@ print(libs_cultures)
145 146  
146 147 #+RESULTS:
147 148 : 20
148   -: {'07_TRITICALE', '00_XXXXXXXXXX', '21_BETTERAVE_INDUSTRIELLE', '20_MAIS_FOURRAGE', '31_POMME_DE_TERRE_CONSO', '17_MAIS_GRAIN', '11_POIS_PROTEAGINEUX', '15_SORGHO', '02_BLE_DUR', '13_TOURNESOL', '05_AVOINE', '09_COLZA', '06_SEIGLE', '01_BLE_TENDRE', '61_MELANGE_CEREALES', '62_MELANGE_PROTEAGINEUX', '12_FEVE_FEVEROLE', '03_ORGE_HIVER', '14_SOJA', '04_ORGE_PRINTEMPS'}
  149 +: {'11_POIS_PROTEAGINEUX', '62_MELANGE_PROTEAGINEUX', '12_FEVE_FEVEROLE', '00_XXXXXXXXXX', '31_POMME_DE_TERRE_CONSO', '14_SOJA', '03_ORGE_HIVER', '01_BLE_TENDRE', '13_TOURNESOL', '21_BETTERAVE_INDUSTRIELLE', '04_ORGE_PRINTEMPS', '07_TRITICALE', '09_COLZA', '05_AVOINE', '15_SORGHO', '06_SEIGLE', '61_MELANGE_CEREALES', '02_BLE_DUR', '17_MAIS_GRAIN', '20_MAIS_FOURRAGE'}
149 150  
150 151 Mais il y a beaucoup plus de ~CODE_CULTU~ que de ~LIBCULTURE~. Peut-être que le 2è n'existe que pour les parcelles avec rendement?
151 152  
... ... @@ -158,6 +159,7 @@ print(len(codes_libs))
158 159 #+RESULTS:
159 160 : 167
160 161  
  162 +
161 163 ** Répartition des cultures
162 164 #+begin_src python :results output :session :exports both
163 165 df31.LIBCULTURE.value_counts()
... ... @@ -214,7 +216,7 @@ matplot_lib_filename
214 216 #+end_src
215 217  
216 218 #+RESULTS:
217   -[[file:/tmp/babel-QEj3SP/figureDH9QA6.png]]
  219 +[[file:/tmp/babel-X17w0V/figureR5oWbq.png]]
218 220  
219 221 Valeurs manquantes de rendement?
220 222 #+begin_src python :results output :session :exports both
... ... @@ -262,77 +264,9 @@ matplot_lib_filename
262 264 #+end_src
263 265  
264 266 #+RESULTS:
265   -[[file:/tmp/babel-QEj3SP/figureaXXL2p.png]]
  267 +[[file:/tmp/babel-X17w0V/figureJIfLg1.png]]
266 268  
267 269 ** Distribution statisique des rendements par culture
268   -#+RESULTS:
269   -#+begin_example
270   -TRN
271   - CODE_CULTU SURF_ADM PRECISION SEMENCE ... SURUTISOL RENDNORME MMEAU geometry
272   -24 TRN 0.60 None 0 ... NaN NaN NaN POLYGON ((555017.3030000031 6288487.261, 55503...
273   -30 TRN 3.65 None 0 ... NaN NaN NaN POLYGON ((581263.6560000032 6252087.490000002,...
274   -31 TRN 6.69 None 0 ... NaN NaN NaN POLYGON ((580446.200000003 6251350.913000003, ...
275   -33 TRN 14.25 None 0 ... NaN NaN NaN POLYGON ((580111.6431000009 6250744.730800003,...
276   -45 TRN 1.05 None 0 ... NaN NaN NaN POLYGON ((581790.2880000025 6251575.461000003,...
277   -48 TRN 12.44 None 0 ... NaN NaN NaN POLYGON ((581401.3687999994 6250285.316800002,...
278   -53 TRN 2.16 None 0 ... NaN NaN NaN POLYGON ((554394.9580000043 6288677.411000002,...
279   -54 TRN 8.48 None 0 ... NaN NaN NaN POLYGON ((554866.626000002 6288673.861000001, ...
280   -58 TRN 3.52 None 0 ... NaN NaN NaN POLYGON ((553234.800999999 6287448.078000002, ...
281   -72 TRN 0.44 None 0 ... 25.00 24.0 60.00 POLYGON ((575801.138000004 6245039.913000003, ...
282   -81 TRN 9.79 None 0 ... 25.00 24.0 60.00 POLYGON ((575622.563000001 6245093.604600001, ...
283   -85 TRN 0.60 None 0 ... 25.00 24.0 60.00 POLYGON ((576226.2412 6245008.710800003, 57621...
284   -86 TRN 3.60 None 0 ... 25.00 24.0 60.00 POLYGON ((576052.189199999 6245216.165200002, ...
285   -88 TRN 6.77 None 0 ... 25.00 24.0 60.00 POLYGON ((577236.5139000043 6244098.652800001,...
286   -90 TRN 2.04 None 0 ... 25.00 24.0 60.00 POLYGON ((577029.3339999989 6244296.425000001,...
287   -92 TRN 2.49 None 0 ... 25.00 24.0 60.00 POLYGON ((578223.6330000013 6246276.920000002,...
288   -104 TRN 1.00 None 0 ... NaN NaN NaN POLYGON ((555160.8339999989 6288614.073000003,...
289   -111 TRN 5.59 None 0 ... NaN NaN NaN POLYGON ((609866.8835000023 6263464.590600003,...
290   -119 TRN 7.70 None 0 ... NaN NaN NaN POLYGON ((586628.8779999986 6287265.739100002,...
291   -228 TRN 29.52 None 0 ... NaN NaN NaN POLYGON ((586507.767400004 6249033.661900003, ...
292   -240 TRN 4.68 None 0 ... NaN NaN NaN POLYGON ((511739.449000001 6241834.759, 511737...
293   -322 TRN 4.32 None 0 ... NaN NaN NaN POLYGON ((610430.0773999989 6261651.982700001,...
294   -323 TRN 5.69 None 0 ... NaN NaN NaN POLYGON ((614708.3079999983 6277454.932, 61471...
295   -326 TRN 1.67 None 0 ... NaN NaN NaN POLYGON ((615043.6757000014 6277122.2916, 6150...
296   -333 TRN 1.68 None 0 ... NaN NaN NaN POLYGON ((608512.7588 6262650.3046, 608452.698...
297   -334 TRN 5.63 None 0 ... NaN NaN NaN POLYGON ((608843.5855000019 6262790.020500001,...
298   -338 TRN 9.24 None 0 ... NaN NaN NaN POLYGON ((610236.4738000035 6262741.7326, 6102...
299   -341 TRN 0.44 None 1 ... NaN NaN NaN POLYGON ((610038.8352999985 6262889.500300001,...
300   -344 TRN 0.41 None 1 ... NaN NaN NaN POLYGON ((610132.8236000016 6262748.6778, 6101...
301   -346 TRN 2.38 None 0 ... NaN NaN NaN POLYGON ((609944.3612000048 6262859.592700001,...
302   -... ... ... ... ... ... ... ... ...
303   -130858 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((591821.9479999989 6246798.5711, 5917...
304   -130862 TRN 2.47 None 0 ... NaN NaN NaN POLYGON ((561445.3229999989 6247174.507000003,...
305   -130885 TRN 6.04 None 0 ... NaN NaN NaN POLYGON ((518236.2965000048 6252405.700800002,...
306   -130900 TRN 4.37 None 0 ... NaN NaN NaN POLYGON ((568873.9149999991 6253983.294, 56885...
307   -130922 TRN 5.74 None 0 ... NaN NaN NaN POLYGON ((604401.186999999 6269327.276300002, ...
308   -130923 TRN 2.04 None 0 ... NaN NaN NaN POLYGON ((604401.1928000003 6269327.282900002,...
309   -130927 TRN 0.15 None 0 ... NaN NaN NaN POLYGON ((546528.9905000031 6313053.770800002,...
310   -130952 TRN 7.88 None 0 ... NaN NaN NaN POLYGON ((562847.7470000014 6247395.255000003,...
311   -130957 TRN 1.85 None 0 ... NaN NaN NaN POLYGON ((562305.936999999 6243942.735000003, ...
312   -130958 TRN 1.80 None 0 ... NaN NaN NaN POLYGON ((562131.1979999989 6244150.639700003,...
313   -130959 TRN 6.73 None 0 ... NaN NaN NaN POLYGON ((562298.2080999985 6244368.4877, 5624...
314   -130980 TRN 0.63 None 0 ... NaN NaN NaN POLYGON ((546709.0170000046 6271094.791000001,...
315   -130981 TRN 0.41 None 0 ... NaN NaN NaN POLYGON ((546708.8593000025 6271093.847900003,...
316   -130987 TRN 2.59 None 0 ... 85.00 24.0 204.00 POLYGON ((597935.5368999988 6259834.054000001,...
317   -130994 TRN 1.17 None 0 ... NaN NaN NaN POLYGON ((612614.8017000034 6263528.291700002,...
318   -130995 TRN 3.37 None 0 ... NaN NaN NaN POLYGON ((612437.2829999998 6263578.228300001,...
319   -131012 TRN 3.18 None 0 ... NaN NaN NaN POLYGON ((546528.6845000014 6313043.021100003,...
320   -131023 TRN 0.70 None 0 ... NaN NaN NaN POLYGON ((541206.675999999 6300451.175000001, ...
321   -131095 TRN 0.59 None 0 ... NaN NaN NaN POLYGON ((541208.5989000052 6300274.455000002,...
322   -131096 TRN 1.33 None 0 ... NaN NaN NaN POLYGON ((541206.8114000037 6300275.685600001,...
323   -131120 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((580034.3561000004 6250682.503200002,...
324   -131134 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((597674.930399999 6258343.746300001, ...
325   -131136 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((591802.4574000016 6246689.483200002,...
326   -131146 TRN 4.60 None 0 ... NaN NaN NaN POLYGON ((581203.7241000012 6256713.529000003,...
327   -131152 TRN 0.00 None 0 ... 80.16 24.0 192.38 POLYGON ((598665.1539999992 6259764.382800002,...
328   -131156 TRN 2.49 None 0 ... NaN NaN NaN POLYGON ((602208.8986000046 6260887.712900002,...
329   -131212 TRN 1.09 None 0 ... NaN NaN NaN POLYGON ((595624.1480000019 6250820.873, 59563...
330   -131240 TRN 1.91 None 0 ... NaN NaN NaN POLYGON ((501596.7771000043 6236236.866, 50158...
331   -131241 TRN 1.99 None 0 ... NaN NaN NaN POLYGON ((501600.5104999989 6236239.7509, 5015...
332   -131243 TRN 0.38 None 0 ... NaN NaN NaN POLYGON ((592146.2823000029 6256409.391900003,...
333   -
334   -[10073 rows x 18 columns]
335   -#+end_example
336 270  
337 271 #+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
338 272 rnd_counts = df31[df31.RENDNORME.notnull()]['CODE_CULTU'].value_counts()
... ... @@ -349,10 +283,301 @@ matplot_lib_filename
349 283 #+end_src
350 284  
351 285 #+RESULTS:
352   -[[file:/tmp/babel-QEj3SP/figureIq8abo.png]]
  286 +[[file:/tmp/babel-X17w0V/figureukzYe2.png]]
  287 +
353 288  
354 289  
  290 +* Distribution spatiale des rendements
  291 +** Ne garder que les parcelles avec une seule culture
  292 +Le champ SURF_ADM est la surface de la parcelle. Le champ SURUTISOL est la surface totale de toutes les parcelles de la même culture dans la même exploitation. S'il n'y a qu'une parcelle de cette culture dans cette exploitation, ces 2 surfaces sont très proches (ou égales). Le rendemenet moyen pour toutes les parcelles de l'exploitation de même culture est donné par RENDNORME.
  293 +
  294 +#+begin_src python :results output :session :exports both
  295 +df31par = df31.query('RENDNORME.notnull() & SURUTISOL.notnull() & (abs(SURUTISOL - SURF_ADM)/(SURF_ADM) < 0.05)')
  296 +print(df31par[['SURF_ADM', 'SURUTISOL']].head())
  297 +print('------------')
  298 +print(df31par['LIBCULTURE'].value_counts())
  299 +#+end_src
  300 +
  301 +#+RESULTS:
  302 +#+begin_example
  303 +SURF_ADM SURUTISOL
  304 +78 27.69 27.00
  305 +644 3.42 3.50
  306 +937 15.87 16.00
  307 +5115 1.05 1.06
  308 +5303 19.41 20.13
  309 +------------
  310 +01_BLE_TENDRE 23
  311 +03_ORGE_HIVER 20
  312 +14_SOJA 20
  313 +11_POIS_PROTEAGINEUX 19
  314 +17_MAIS_GRAIN 18
  315 +13_TOURNESOL 18
  316 +09_COLZA 12
  317 +15_SORGHO 11
  318 +07_TRITICALE 7
  319 +12_FEVE_FEVEROLE 7
  320 +02_BLE_DUR 6
  321 +04_ORGE_PRINTEMPS 4
  322 +05_AVOINE 3
  323 +06_SEIGLE 2
  324 +20_MAIS_FOURRAGE 1
  325 +31_POMME_DE_TERRE_CONSO 1
  326 +Name: LIBCULTURE, dtype: int64
  327 +#+end_example
  328 +** Histogramme des rendements par culture sur les parcelles pures
  329 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  330 +rnd_counts = df31par['CODE_CULTU'].value_counts()
  331 +maj6_rnd = rnd_counts.iloc[:6]
  332 +fig, axes = plt.subplots(3, 2, sharex=True, sharey=True)
  333 +print(axes)
  334 +for current_cult, axis in zip(maj6_rnd.index, axes.flatten()):
  335 + axis.set_xlabel(current_cult)
  336 + df31par[df31par['CODE_CULTU']==current_cult].hist(column='RENDNORME', ax=axis)
  337 + axis.set_title('')
  338 +plt.suptitle('RENDNORME')
  339 +plt.savefig(matplot_lib_filename)
  340 +matplot_lib_filename
  341 +#+end_src
  342 +
  343 +#+RESULTS:
  344 +[[file:/tmp/babel-X17w0V/figurevurUbh.png]]
  345 +
  346 +** Ajouter une colonne avec le centroïde de la parcelle
  347 +#+begin_src python :results output :session :exports both
  348 +df31par['centroid'] = df31par.geometry.centroid
  349 +print(df31par.centroid.head())
  350 +#+end_src
  351 +
  352 +#+RESULTS:
  353 +: 78 POINT (576943.0567814494 6248166.951789126)
  354 +: 644 POINT (573025.7548245641 6264003.533300648)
  355 +: 937 POINT (572496.3795754968 6266017.564098726)
  356 +: 5115 POINT (560246.9060276946 6236853.034810145)
  357 +: 5303 POINT (583985.3814635368 6245950.934973895)
  358 +: dtype: object
  359 +: 576943.0567814494
  360 +
  361 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  362 +import matplotlib.pyplot as plt
  363 +plt.figure(figsize=(200,100))
  364 +df31par.plot(column='RENDNORME', cmap='viridis')
  365 +#plt.show()
  366 +plt.savefig(matplot_lib_filename)
  367 +matplot_lib_filename
  368 +#+end_src
  369 +
  370 +#+RESULTS:
  371 +[[file:/tmp/babel-X17w0V/figure4XvQFY.png]]
  372 +
  373 +
  374 +
  375 +Il n'y a pas de corrélation visible entre rendement et position dans le département.
  376 +** Corrélation entre rendement et taille de la parcelle
  377 +
  378 +#+begin_src python :results output :session :exports both
  379 +colordict = {lc:i for i, lc in enumerate(set(df31par['LIBCULTURE']))}
  380 +print(colordict)
  381 +colors = [colordict[c] for c in df31par['LIBCULTURE']]
  382 +#+end_src
  383 +
  384 +#+RESULTS:
  385 +: {'14_SOJA': 0, '02_BLE_DUR': 1, '03_ORGE_HIVER': 2, '01_BLE_TENDRE': 3, '11_POIS_PROTEAGINEUX': 4, '13_TOURNESOL': 5, '04_ORGE_PRINTEMPS': 6, '07_TRITICALE': 7, '15_SORGHO': 8, '09_COLZA': 9, '12_FEVE_FEVEROLE': 10, '17_MAIS_GRAIN': 11, '20_MAIS_FOURRAGE': 12, '31_POMME_DE_TERRE_CONSO': 13, '05_AVOINE': 14, '06_SEIGLE': 15}
  386 +
  387 +
  388 +
  389 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  390 +import matplotlib.pyplot as plt
  391 +import matplotlib.cm as cm
  392 +plt.figure(figsize=(10,5))
  393 +plt.scatter(df31par['SURF_ADM'], df31par['RENDNORME'], c=colors, cmap=cm.get_cmap('tab20',len(colordict)))
  394 +cbar = plt.colorbar()
  395 +cbar.ax.get_yaxis().set_ticks([])
  396 +for lab, j in colordict.items():
  397 + cbar.ax.text(.5, j*0.95, lab, ha='left')
  398 +plt.xlabel('SURF_ADM')
  399 +plt.ylabel('REND')
  400 +plt.savefig(matplot_lib_filename)
  401 +matplot_lib_filename
  402 +#+end_src
  403 +
  404 +#+RESULTS:
  405 +[[file:/tmp/babel-X17w0V/figureUjRq6C.png]]
  406 +
  407 +Il semble y avoir une corrélation positive entre rendement et taille de la parcelle, conditionnée par la classe
  408 +
  409 +#+begin_src python :session :results output :exports both
  410 +rnd_counts = df31par['LIBCULTURE'].value_counts()
  411 +maj6_rnd = rnd_counts.iloc[:6]
  412 +#+end_src
  413 +
  414 +#+RESULTS:
  415 +: 01_BLE_TENDRE 23
  416 +: 03_ORGE_HIVER 20
  417 +: 14_SOJA 20
  418 +: 11_POIS_PROTEAGINEUX 19
  419 +: 17_MAIS_GRAIN 18
  420 +: 13_TOURNESOL 18
  421 +: Name: LIBCULTURE, dtype: int64
  422 +
  423 +#+begin_src python :session :results output :exports both
  424 +import matplotlib.pyplot as plt
  425 +import matplotlib.gridspec as gridspec
  426 +import seaborn as sns
  427 +import numpy as np
  428 +
  429 +class SeabornFig2Grid():
  430 +
  431 + def __init__(self, seaborngrid, fig, subplot_spec):
  432 + self.fig = fig
  433 + self.sg = seaborngrid
  434 + self.subplot = subplot_spec
  435 + if isinstance(self.sg, sns.axisgrid.FacetGrid) or \
  436 + isinstance(self.sg, sns.axisgrid.PairGrid):
  437 + self._movegrid()
  438 + elif isinstance(self.sg, sns.axisgrid.JointGrid):
  439 + self._movejointgrid()
  440 + self._finalize()
  441 +
  442 + def _movegrid(self):
  443 + """ Move PairGrid or Facetgrid """
  444 + self._resize()
  445 + n = self.sg.axes.shape[0]
  446 + m = self.sg.axes.shape[1]
  447 + self.subgrid = gridspec.GridSpecFromSubplotSpec(n,m, subplot_spec=self.subplot)
  448 + for i in range(n):
  449 + for j in range(m):
  450 + self._moveaxes(self.sg.axes[i,j], self.subgrid[i,j])
  451 +
  452 + def _movejointgrid(self):
  453 + """ Move Jointgrid """
  454 + h= self.sg.ax_joint.get_position().height
  455 + h2= self.sg.ax_marg_x.get_position().height
  456 + r = int(np.round(h/h2))
  457 + self._resize()
  458 + self.subgrid = gridspec.GridSpecFromSubplotSpec(r+1,r+1, subplot_spec=self.subplot)
  459 +
  460 + self._moveaxes(self.sg.ax_joint, self.subgrid[1:, :-1])
  461 + self._moveaxes(self.sg.ax_marg_x, self.subgrid[0, :-1])
  462 + self._moveaxes(self.sg.ax_marg_y, self.subgrid[1:, -1])
  463 +
  464 + def _moveaxes(self, ax, gs):
  465 + #https://stackoverflow.com/a/46906599/4124317
  466 + ax.remove()
  467 + ax.figure=self.fig
  468 + self.fig.axes.append(ax)
  469 + self.fig.add_axes(ax)
  470 + ax._subplotspec = gs
  471 + ax.set_position(gs.get_position(self.fig))
  472 + ax.set_subplotspec(gs)
  473 +
  474 + def _finalize(self):
  475 + plt.close(self.sg.fig)
  476 + self.fig.canvas.mpl_connect("resize_event", self._resize)
  477 + self.fig.canvas.draw()
  478 +
  479 + def _resize(self, evt=None):
  480 + self.sg.fig.set_size_inches(self.fig.get_size_inches())
  481 +#+end_src
  482 +
  483 +#+RESULTS:
  484 +
  485 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  486 +import matplotlib.pyplot as plt
  487 +import matplotlib.gridspec as gridspec
  488 +import seaborn as sns; sns.set()
  489 +from scipy import stats
  490 +
  491 +plotlist = []
  492 +for current_cult in maj6_rnd.index:
  493 + dfcurr = df31par.query('LIBCULTURE == "{}"'.format(current_cult))
  494 + g = sns.jointplot('SURF_ADM', 'RENDNORME', data=dfcurr, kind='reg').set_axis_labels('SURF_'+current_cult,'REND')
  495 + rsquare = lambda a, b: stats.spearmanr(a, b)[0] ** 2
  496 + g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$Spearman R^2$", loc="upper left", fontsize=12)
  497 + plotlist.append(g)
  498 +
  499 +fig = plt.figure(figsize=(10,10))
  500 +
  501 +gs = gridspec.GridSpec(3, 2)
  502 +
  503 +for i, p in enumerate(plotlist):
  504 + SeabornFig2Grid(p, fig, gs[i])
  505 +
  506 +gs.tight_layout(fig)
  507 +plt.savefig(matplot_lib_filename)
  508 +matplot_lib_filename
  509 +#+end_src
  510 +
  511 +#+RESULTS:
  512 +[[file:/tmp/babel-X17w0V/figureYa6zlX.png]]
  513 +
  514 +La corrélation n'est valable que pour le blé tendre et le tournesol. Il faudra évidemment refaire tout ça avec tous les départements.
  515 +
  516 +** Corrélation avec autres variables (qui n'ont pas besoin des images)
  517 +*** Irrigation
  518 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  519 +import matplotlib.pyplot as plt
  520 +import matplotlib.gridspec as gridspec
  521 +import seaborn as sns; sns.set()
  522 +from scipy import stats
  523 +
  524 +plotlist = []
  525 +for current_cult in maj6_rnd.index:
  526 + dfcurr = df31par.query('LIBCULTURE == "{}"'.format(current_cult))
  527 + g = sns.jointplot('MMEAU', 'RENDNORME', data=dfcurr, kind='reg').set_axis_labels('IRR_'+current_cult,'REND')
  528 + rsquare = lambda a, b: stats.spearmanr(a, b)[0] ** 2
  529 + g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$Spearman R^2$", loc="upper left", fontsize=12)
  530 + plotlist.append(g)
  531 +
  532 +fig = plt.figure(figsize=(10,10))
  533 +
  534 +gs = gridspec.GridSpec(3, 2)
  535 +
  536 +for i, p in enumerate(plotlist):
  537 + SeabornFig2Grid(p, fig, gs[i])
  538 +
  539 +gs.tight_layout(fig)
  540 +plt.savefig(matplot_lib_filename)
  541 +matplot_lib_filename
  542 +#+end_src
  543 +
  544 +#+RESULTS:
  545 +[[file:/tmp/babel-X17w0V/figureVmssHE.png]]
  546 +
  547 +*** Bio
  548 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  549 +import matplotlib.pyplot as plt
  550 +import matplotlib.gridspec as gridspec
  551 +import seaborn as sns; sns.set()
  552 +from scipy import stats
  553 +
  554 +plotlist = []
  555 +for current_cult in maj6_rnd.index:
  556 + dfcurr = df31par.query('LIBCULTURE == "{}"'.format(current_cult))
  557 + g = sns.jointplot('BIO', 'RENDNORME', data=dfcurr, kind='reg').set_axis_labels('BIO_'+current_cult,'REND')
  558 + rsquare = lambda a, b: stats.spearmanr(a, b)[0] ** 2
  559 + g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$Spearman R^2$", loc="upper left", fontsize=12)
  560 + plotlist.append(g)
  561 +
  562 +fig = plt.figure(figsize=(10,10))
  563 +
  564 +gs = gridspec.GridSpec(3, 2)
  565 +
  566 +for i, p in enumerate(plotlist):
  567 + SeabornFig2Grid(p, fig, gs[i])
  568 +
  569 +gs.tight_layout(fig)
  570 +plt.savefig(matplot_lib_filename)
  571 +matplot_lib_filename
  572 +#+end_src
  573 +
  574 +#+RESULTS:
  575 +[[file:/tmp/babel-X17w0V/figureE5mY6F.png]]
355 576  
  577 +*** Différence entre surface admin et vraie surface
  578 +*** Latitude
356 579 * Quels sont les départements contenant des informations de rendement?
357 580 * Quelles sont les cultures pour lesquelles les informations de rendement sont les plus nombreuses?
  581 +* Peut-on prédire le rendement à partir des données TERLAB seules?
  582 +- Localisation, culture, surface de la parcelle
358 583 * Quelles sont les tuiles S2 nécessaires pour couvrir chaque département?
... ...