Compare View

switch
from
...
to
 
Commits (3)
notebook/exploration.org 0 → 100644
... ... @@ -0,0 +1,583 @@
  1 +#+TITLE: Exploration des données TERLAB
  2 +#+AUTHOR: Jordi Inglada
  3 +#+LANGUAGE: fr
  4 +# #+PROPERTY: header-args :eval never-export
  5 +
  6 +
  7 +* Accès à un fichier TERLAB
  8 +** Trouver le fichier d'un département
  9 +Le nom des fichiers zippés est ~RPG_TERLAB_DEP65-66_2017.7z~ ou ~RPG_TERLAB_DEP10_2017.7z~
  10 +#+begin_src python :results output :session :exports code
  11 +
  12 +terlabdir = '/home/inglada/stok/DATA/OSO/MAA_SSP/'
  13 +import glob
  14 +def find_terlab_file(dpt):
  15 + tld = terlabdir
  16 + tlfiles = glob.glob('{}/*.7z'.format(tld))
  17 + filefound = None
  18 + for f in tlfiles:
  19 + tok = f.split('/')[-1].split('_')[2]
  20 + if tok == 'Corse' and (dpt=='2A' or dpt=='2B'):
  21 + return f
  22 + else:
  23 + tok = tok[3:]
  24 + if len(tok) == 2 and tok == str(dpt):
  25 + return f
  26 + else:
  27 + toks = tok.split('-')
  28 + if str(dpt) in toks:
  29 + return f
  30 + return None
  31 +
  32 +#+end_src
  33 +
  34 +#+RESULTS:
  35 +: Python 3.7.0 (default, Oct 9 2018, 10:31:47)
  36 +: [GCC 7.3.0] :: Anaconda, Inc. on linux
  37 +: Type "help", "copyright", "credits" or "license" for more information.
  38 +: python.el: native completion setup loaded
  39 +
  40 +
  41 +
  42 +
  43 +#+begin_src python :results output :session :exports both
  44 +assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('94')
  45 +assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('95')
  46 +assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_DEP31_2017.7z'==find_terlab_file('31')
  47 +assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_Corse_2017.7z'==find_terlab_file('2A')
  48 +#+end_src
  49 +
  50 +#+RESULTS:
  51 +
  52 +
  53 +
  54 +** Mettre le fichier dans un data frame geopandas
  55 +#+begin_src python :results output :session :exports both
  56 +import subprocess
  57 +import os
  58 +import geopandas as gp
  59 +def shape2df(departement):
  60 + zipfile = find_terlab_file(str(departement))
  61 + devnull = open(os.devnull, 'w')
  62 + subprocess.call(["7z", "x", zipfile, "-aoa"], stdout=devnull)
  63 + shapefile = '{}/SURFACES-2017-PARCELLES-GRAPHIQUES-CONSTATEES_0{}_20180210.shp'.format(terlabdir, str(departement))
  64 + return gp.read_file(shapefile)
  65 +#+end_src
  66 +
  67 +#+RESULTS:
  68 +
  69 +* Quelles sont les variables intéressantes dans les données TERLAB
  70 +
  71 +** On vérifie que le fichier est bien lu
  72 +#+begin_src python :results output :session :exports both
  73 +df31 = shape2df(31)
  74 +print(df31.head())
  75 +#+end_src
  76 +
  77 +#+RESULTS:
  78 +: CODE_CULTU SURF_ADM PRECISION SEMENCE DEST_ICHN ... LIBCULTURE SURUTISOL RENDNORME MMEAU geometry
  79 +: 0 ORH 1.42 None 0 None ... 03_ORGE_HIVER NaN NaN NaN POLYGON ((506016.382600002 6234988.195600003, ...
  80 +: 1 PPH 0.00 None 0 None ... 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506032.0680000037 6235002.251000002,...
  81 +: 2 PPH 0.00 None 0 None ... 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506108.3540000021 6234766.824000001,...
  82 +: 3 ORH 1.87 None 0 None ... 03_ORGE_HIVER NaN NaN NaN POLYGON ((506107.6996000037 6234765.226100001,...
  83 +: 4 PPH 0.00 None 0 None ... 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506874.6119000018 6234495.103300001,...
  84 +:
  85 +: [5 rows x 18 columns]
  86 +
  87 +** On liste les colonnes les lignes
  88 +#+begin_src python :results output :session :exports both
  89 +print(df31.columns)
  90 +#+end_src
  91 +
  92 +#+RESULTS:
  93 +: Index(['CODE_CULTU', 'SURF_ADM', 'PRECISION', 'SEMENCE', 'DEST_ICHN',
  94 +: 'CULTURE_D1', 'CULTURE_D2', 'BIO', 'ENGAGEMENT', 'MARAICHAGE',
  95 +: 'AGROFOREST', 'TLENQ', 'CODUTISOL', 'LIBCULTURE', 'SURUTISOL',
  96 +: 'RENDNORME', 'MMEAU', 'geometry'],
  97 +: dtype='object')
  98 +
  99 +
  100 +Une ligne
  101 +#+begin_src python :results output :session :exports both
  102 +print(df31.iloc[0])
  103 +#+end_src
  104 +
  105 +#+RESULTS:
  106 +#+begin_example
  107 +CODE_CULTU ORH
  108 +SURF_ADM 1.42
  109 +PRECISION None
  110 +SEMENCE 0
  111 +DEST_ICHN None
  112 +CULTURE_D1 None
  113 +CULTURE_D2 None
  114 +BIO 0
  115 +ENGAGEMENT None
  116 +MARAICHAGE 0
  117 +AGROFOREST None
  118 +TLENQ 1
  119 +CODUTISOL 03
  120 +LIBCULTURE 03_ORGE_HIVER
  121 +SURUTISOL NaN
  122 +RENDNORME NaN
  123 +MMEAU NaN
  124 +geometry POLYGON ((506016.382600002 6234988.195600003, ...
  125 +Name: 0, dtype: object
  126 +#+end_example
  127 +
  128 +** Quels sont les codes des cultures?
  129 +
  130 +#+begin_src python :results output :session :exports both
  131 +codes_cultures = set(df31.CODE_CULTU)
  132 +print(len(codes_cultures))
  133 +print(codes_cultures)
  134 +#+end_src
  135 +
  136 +#+RESULTS:
  137 +: 167
  138 +: {'PCL', 'LO7', 'PH7', 'AGR', 'SAI', 'VE7', 'MCT', 'DTY', 'PHI', 'MOT', 'PPO', 'J6S', 'BTN', 'LIH', 'FLP', 'MPA', 'FNU', 'NOX', 'LUZ', 'CHT', 'OIG', 'CCT', 'MCR', 'POR', 'ME7', 'VRC', 'PAS', 'PVP', 'HBL', 'LOT', 'MLT', 'BTA', 'ML6', 'CEL', 'PH6', 'ANE', 'AUB', 'SGH', 'LIP', 'ORT', 'J5M', 'TRN', 'FAG', 'HAR', 'PFR', 'SRS', 'CHU', 'FEV', 'TR6', 'CAR', 'VES', 'BDH', 'ART', 'TOM', 'PPP', 'MH7', 'LEC', 'FRA', 'SA6', 'TTP', 'NOS', 'RGA', 'TR5', 'PFP', 'LH6', 'PCH', 'ORH', 'LH7', 'RDI', 'PAN', 'BFS', 'FET', 'EPE', 'MPC', 'ME6', 'BDP', 'SA7', 'SOJ', 'CPL', 'PAG', 'J6P', 'GFP', 'RVI', 'LBF', 'CPH', 'MH6', 'LU6', 'LU5', 'BOR', 'CZP', 'MC5', 'LUD', 'TCR', 'PPR', 'SA5', 'TRU', 'PP7', 'PPA', 'FNO', 'CGO', 'AVP', 'TR7', 'LAV', 'VRG', 'BTP', 'TRE', 'CML', 'PTC', 'MIE', 'ML5', 'SNE', 'MH5', 'PTR', 'CZH', 'LU7', 'FF7', 'CCN', 'LDH', 'CRD', 'MC6', 'CHS', 'OAG', 'OLI', 'VRT', 'BOP', 'SGE', 'GES', 'PPH', 'SPH', 'BVF', 'CTG', 'AIL', 'FVL', 'CID', 'NVT', 'MLG', 'BRO', 'PRL', 'FLA', 'SPL', 'SBO', 'ORP', 'BTH', 'MC7', 'TTH', 'CAG', 'AVH', 'BFP', 'ML7', 'POT', 'TOP', 'PSL', 'FSG', 'CIT', 'MID', 'LP7', 'PFH', 'MIS', 'CES', 'CMB', 'PEP', 'TAB', 'VE6', 'MLO', 'MOH', 'SOG', 'ROQ'}
  139 +
  140 +Mais il est plus lisible de regarder ~LIBCULTURE~
  141 +#+begin_src python :results output :session :exports both
  142 +libs_cultures = set(df31.LIBCULTURE)
  143 +print(len(libs_cultures))
  144 +print(libs_cultures)
  145 +#+end_src
  146 +
  147 +#+RESULTS:
  148 +: 20
  149 +: {'11_POIS_PROTEAGINEUX', '62_MELANGE_PROTEAGINEUX', '12_FEVE_FEVEROLE', '00_XXXXXXXXXX', '31_POMME_DE_TERRE_CONSO', '14_SOJA', '03_ORGE_HIVER', '01_BLE_TENDRE', '13_TOURNESOL', '21_BETTERAVE_INDUSTRIELLE', '04_ORGE_PRINTEMPS', '07_TRITICALE', '09_COLZA', '05_AVOINE', '15_SORGHO', '06_SEIGLE', '61_MELANGE_CEREALES', '02_BLE_DUR', '17_MAIS_GRAIN', '20_MAIS_FOURRAGE'}
  150 +
  151 +Mais il y a beaucoup plus de ~CODE_CULTU~ que de ~LIBCULTURE~. Peut-être que le 2è n'existe que pour les parcelles avec rendement?
  152 +
  153 +Lien entre les 2 champs?
  154 +#+begin_src python :results output :session :exports both
  155 +codes_libs = df31[['CODE_CULTU', 'LIBCULTURE']].drop_duplicates()
  156 +print(len(codes_libs))
  157 +#+end_src
  158 +
  159 +#+RESULTS:
  160 +: 167
  161 +
  162 +
  163 +** Répartition des cultures
  164 +#+begin_src python :results output :session :exports both
  165 +df31.LIBCULTURE.value_counts()
  166 +#+end_src
  167 +
  168 +#+RESULTS:
  169 +#+begin_example
  170 +00_XXXXXXXXXX 83046
  171 +01_BLE_TENDRE 10423
  172 +13_TOURNESOL 10073
  173 +02_BLE_DUR 8268
  174 +17_MAIS_GRAIN 5470
  175 +03_ORGE_HIVER 3012
  176 +09_COLZA 2090
  177 +14_SOJA 1973
  178 +20_MAIS_FOURRAGE 1447
  179 +15_SORGHO 1404
  180 +07_TRITICALE 1201
  181 +11_POIS_PROTEAGINEUX 1021
  182 +12_FEVE_FEVEROLE 470
  183 +62_MELANGE_PROTEAGINEUX 324
  184 +05_AVOINE 293
  185 +61_MELANGE_CEREALES 290
  186 +04_ORGE_PRINTEMPS 251
  187 +31_POMME_DE_TERRE_CONSO 157
  188 +06_SEIGLE 19
  189 +21_BETTERAVE_INDUSTRIELLE 14
  190 +Name: LIBCULTURE, dtype: int64
  191 +#+end_example
  192 +
  193 +
  194 +** Histogramme valeurs de rendement:
  195 +#+begin_src python :results output :session :exports both
  196 +df31.RENDNORME.describe()
  197 +#+end_src
  198 +
  199 +#+RESULTS:
  200 +: count 4713.000000
  201 +: mean 46.262873
  202 +: std 20.104306
  203 +: min 1.000000
  204 +: 25% 27.840000
  205 +: 50% 48.000000
  206 +: 75% 63.000000
  207 +: max 120.000000
  208 +: Name: RENDNORME, dtype: float64
  209 +
  210 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  211 +import matplotlib.pyplot as plt
  212 +plt.figure(figsize=(10,5))
  213 +df31.hist(column='RENDNORME')
  214 +plt.savefig(matplot_lib_filename)
  215 +matplot_lib_filename
  216 +#+end_src
  217 +
  218 +#+RESULTS:
  219 +[[file:/tmp/babel-X17w0V/figureR5oWbq.png]]
  220 +
  221 +Valeurs manquantes de rendement?
  222 +#+begin_src python :results output :session :exports both
  223 +num_parcelles = len(df31.RENDNORME)
  224 +num_rendements = df31.RENDNORME.count()
  225 +print('{} parcelles dont {} avec information de rendement'.format(num_parcelles, num_rendements))
  226 +#+end_src
  227 +
  228 +#+RESULTS:
  229 +: 131246 parcelles dont 4713 avec information de rendement
  230 +
  231 +
  232 +** Disponibiité des données de rendement par culture
  233 +#+begin_src python :results output :session :exports both
  234 +df31[df31.RENDNORME.notnull()]['LIBCULTURE'].value_counts()
  235 +#+end_src
  236 +
  237 +#+RESULTS:
  238 +#+begin_example
  239 +13_TOURNESOL 1178
  240 +02_BLE_DUR 1125
  241 +01_BLE_TENDRE 935
  242 +17_MAIS_GRAIN 294
  243 +09_COLZA 258
  244 +03_ORGE_HIVER 250
  245 +14_SOJA 241
  246 +11_POIS_PROTEAGINEUX 136
  247 +15_SORGHO 118
  248 +07_TRITICALE 70
  249 +12_FEVE_FEVEROLE 57
  250 +20_MAIS_FOURRAGE 17
  251 +05_AVOINE 15
  252 +04_ORGE_PRINTEMPS 14
  253 +06_SEIGLE 4
  254 +31_POMME_DE_TERRE_CONSO 1
  255 +Name: LIBCULTURE, dtype: int64
  256 +#+end_example
  257 +
  258 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  259 +import matplotlib.pyplot as plt
  260 +plt.figure(figsize=(10,5))
  261 +df31[df31.RENDNORME.notnull()]['CODE_CULTU'].value_counts().plot.bar()
  262 +plt.savefig(matplot_lib_filename)
  263 +matplot_lib_filename
  264 +#+end_src
  265 +
  266 +#+RESULTS:
  267 +[[file:/tmp/babel-X17w0V/figureJIfLg1.png]]
  268 +
  269 +** Distribution statisique des rendements par culture
  270 +
  271 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  272 +rnd_counts = df31[df31.RENDNORME.notnull()]['CODE_CULTU'].value_counts()
  273 +maj6_rnd = rnd_counts.iloc[:6]
  274 +fig, axes = plt.subplots(3, 2, sharex=True, sharey=True)
  275 +print(axes)
  276 +for current_cult, axis in zip(maj6_rnd.index, axes.flatten()):
  277 + axis.set_xlabel(current_cult)
  278 + df31[df31['CODE_CULTU']==current_cult].hist(column='RENDNORME', ax=axis)
  279 + axis.set_title('')
  280 +plt.suptitle('RENDNORME')
  281 +plt.savefig(matplot_lib_filename)
  282 +matplot_lib_filename
  283 +#+end_src
  284 +
  285 +#+RESULTS:
  286 +[[file:/tmp/babel-X17w0V/figureukzYe2.png]]
  287 +
  288 +
  289 +
  290 +* Distribution spatiale des rendements
  291 +** Ne garder que les parcelles avec une seule culture
  292 +Le champ SURF_ADM est la surface de la parcelle. Le champ SURUTISOL est la surface totale de toutes les parcelles de la même culture dans la même exploitation. S'il n'y a qu'une parcelle de cette culture dans cette exploitation, ces 2 surfaces sont très proches (ou égales). Le rendemenet moyen pour toutes les parcelles de l'exploitation de même culture est donné par RENDNORME.
  293 +
  294 +#+begin_src python :results output :session :exports both
  295 +df31par = df31.query('RENDNORME.notnull() & SURUTISOL.notnull() & (abs(SURUTISOL - SURF_ADM)/(SURF_ADM) < 0.05)')
  296 +print(df31par[['SURF_ADM', 'SURUTISOL']].head())
  297 +print('------------')
  298 +print(df31par['LIBCULTURE'].value_counts())
  299 +#+end_src
  300 +
  301 +#+RESULTS:
  302 +#+begin_example
  303 +SURF_ADM SURUTISOL
  304 +78 27.69 27.00
  305 +644 3.42 3.50
  306 +937 15.87 16.00
  307 +5115 1.05 1.06
  308 +5303 19.41 20.13
  309 +------------
  310 +01_BLE_TENDRE 23
  311 +03_ORGE_HIVER 20
  312 +14_SOJA 20
  313 +11_POIS_PROTEAGINEUX 19
  314 +17_MAIS_GRAIN 18
  315 +13_TOURNESOL 18
  316 +09_COLZA 12
  317 +15_SORGHO 11
  318 +07_TRITICALE 7
  319 +12_FEVE_FEVEROLE 7
  320 +02_BLE_DUR 6
  321 +04_ORGE_PRINTEMPS 4
  322 +05_AVOINE 3
  323 +06_SEIGLE 2
  324 +20_MAIS_FOURRAGE 1
  325 +31_POMME_DE_TERRE_CONSO 1
  326 +Name: LIBCULTURE, dtype: int64
  327 +#+end_example
  328 +** Histogramme des rendements par culture sur les parcelles pures
  329 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  330 +rnd_counts = df31par['CODE_CULTU'].value_counts()
  331 +maj6_rnd = rnd_counts.iloc[:6]
  332 +fig, axes = plt.subplots(3, 2, sharex=True, sharey=True)
  333 +print(axes)
  334 +for current_cult, axis in zip(maj6_rnd.index, axes.flatten()):
  335 + axis.set_xlabel(current_cult)
  336 + df31par[df31par['CODE_CULTU']==current_cult].hist(column='RENDNORME', ax=axis)
  337 + axis.set_title('')
  338 +plt.suptitle('RENDNORME')
  339 +plt.savefig(matplot_lib_filename)
  340 +matplot_lib_filename
  341 +#+end_src
  342 +
  343 +#+RESULTS:
  344 +[[file:/tmp/babel-X17w0V/figurevurUbh.png]]
  345 +
  346 +** Ajouter une colonne avec le centroïde de la parcelle
  347 +#+begin_src python :results output :session :exports both
  348 +df31par['centroid'] = df31par.geometry.centroid
  349 +print(df31par.centroid.head())
  350 +#+end_src
  351 +
  352 +#+RESULTS:
  353 +: 78 POINT (576943.0567814494 6248166.951789126)
  354 +: 644 POINT (573025.7548245641 6264003.533300648)
  355 +: 937 POINT (572496.3795754968 6266017.564098726)
  356 +: 5115 POINT (560246.9060276946 6236853.034810145)
  357 +: 5303 POINT (583985.3814635368 6245950.934973895)
  358 +: dtype: object
  359 +: 576943.0567814494
  360 +
  361 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  362 +import matplotlib.pyplot as plt
  363 +plt.figure(figsize=(200,100))
  364 +df31par.plot(column='RENDNORME', cmap='viridis')
  365 +#plt.show()
  366 +plt.savefig(matplot_lib_filename)
  367 +matplot_lib_filename
  368 +#+end_src
  369 +
  370 +#+RESULTS:
  371 +[[file:/tmp/babel-X17w0V/figure4XvQFY.png]]
  372 +
  373 +
  374 +
  375 +Il n'y a pas de corrélation visible entre rendement et position dans le département.
  376 +** Corrélation entre rendement et taille de la parcelle
  377 +
  378 +#+begin_src python :results output :session :exports both
  379 +colordict = {lc:i for i, lc in enumerate(set(df31par['LIBCULTURE']))}
  380 +print(colordict)
  381 +colors = [colordict[c] for c in df31par['LIBCULTURE']]
  382 +#+end_src
  383 +
  384 +#+RESULTS:
  385 +: {'14_SOJA': 0, '02_BLE_DUR': 1, '03_ORGE_HIVER': 2, '01_BLE_TENDRE': 3, '11_POIS_PROTEAGINEUX': 4, '13_TOURNESOL': 5, '04_ORGE_PRINTEMPS': 6, '07_TRITICALE': 7, '15_SORGHO': 8, '09_COLZA': 9, '12_FEVE_FEVEROLE': 10, '17_MAIS_GRAIN': 11, '20_MAIS_FOURRAGE': 12, '31_POMME_DE_TERRE_CONSO': 13, '05_AVOINE': 14, '06_SEIGLE': 15}
  386 +
  387 +
  388 +
  389 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  390 +import matplotlib.pyplot as plt
  391 +import matplotlib.cm as cm
  392 +plt.figure(figsize=(10,5))
  393 +plt.scatter(df31par['SURF_ADM'], df31par['RENDNORME'], c=colors, cmap=cm.get_cmap('tab20',len(colordict)))
  394 +cbar = plt.colorbar()
  395 +cbar.ax.get_yaxis().set_ticks([])
  396 +for lab, j in colordict.items():
  397 + cbar.ax.text(.5, j*0.95, lab, ha='left')
  398 +plt.xlabel('SURF_ADM')
  399 +plt.ylabel('REND')
  400 +plt.savefig(matplot_lib_filename)
  401 +matplot_lib_filename
  402 +#+end_src
  403 +
  404 +#+RESULTS:
  405 +[[file:/tmp/babel-X17w0V/figureUjRq6C.png]]
  406 +
  407 +Il semble y avoir une corrélation positive entre rendement et taille de la parcelle, conditionnée par la classe
  408 +
  409 +#+begin_src python :session :results output :exports both
  410 +rnd_counts = df31par['LIBCULTURE'].value_counts()
  411 +maj6_rnd = rnd_counts.iloc[:6]
  412 +#+end_src
  413 +
  414 +#+RESULTS:
  415 +: 01_BLE_TENDRE 23
  416 +: 03_ORGE_HIVER 20
  417 +: 14_SOJA 20
  418 +: 11_POIS_PROTEAGINEUX 19
  419 +: 17_MAIS_GRAIN 18
  420 +: 13_TOURNESOL 18
  421 +: Name: LIBCULTURE, dtype: int64
  422 +
  423 +#+begin_src python :session :results output :exports both
  424 +import matplotlib.pyplot as plt
  425 +import matplotlib.gridspec as gridspec
  426 +import seaborn as sns
  427 +import numpy as np
  428 +
  429 +class SeabornFig2Grid():
  430 +
  431 + def __init__(self, seaborngrid, fig, subplot_spec):
  432 + self.fig = fig
  433 + self.sg = seaborngrid
  434 + self.subplot = subplot_spec
  435 + if isinstance(self.sg, sns.axisgrid.FacetGrid) or \
  436 + isinstance(self.sg, sns.axisgrid.PairGrid):
  437 + self._movegrid()
  438 + elif isinstance(self.sg, sns.axisgrid.JointGrid):
  439 + self._movejointgrid()
  440 + self._finalize()
  441 +
  442 + def _movegrid(self):
  443 + """ Move PairGrid or Facetgrid """
  444 + self._resize()
  445 + n = self.sg.axes.shape[0]
  446 + m = self.sg.axes.shape[1]
  447 + self.subgrid = gridspec.GridSpecFromSubplotSpec(n,m, subplot_spec=self.subplot)
  448 + for i in range(n):
  449 + for j in range(m):
  450 + self._moveaxes(self.sg.axes[i,j], self.subgrid[i,j])
  451 +
  452 + def _movejointgrid(self):
  453 + """ Move Jointgrid """
  454 + h= self.sg.ax_joint.get_position().height
  455 + h2= self.sg.ax_marg_x.get_position().height
  456 + r = int(np.round(h/h2))
  457 + self._resize()
  458 + self.subgrid = gridspec.GridSpecFromSubplotSpec(r+1,r+1, subplot_spec=self.subplot)
  459 +
  460 + self._moveaxes(self.sg.ax_joint, self.subgrid[1:, :-1])
  461 + self._moveaxes(self.sg.ax_marg_x, self.subgrid[0, :-1])
  462 + self._moveaxes(self.sg.ax_marg_y, self.subgrid[1:, -1])
  463 +
  464 + def _moveaxes(self, ax, gs):
  465 + #https://stackoverflow.com/a/46906599/4124317
  466 + ax.remove()
  467 + ax.figure=self.fig
  468 + self.fig.axes.append(ax)
  469 + self.fig.add_axes(ax)
  470 + ax._subplotspec = gs
  471 + ax.set_position(gs.get_position(self.fig))
  472 + ax.set_subplotspec(gs)
  473 +
  474 + def _finalize(self):
  475 + plt.close(self.sg.fig)
  476 + self.fig.canvas.mpl_connect("resize_event", self._resize)
  477 + self.fig.canvas.draw()
  478 +
  479 + def _resize(self, evt=None):
  480 + self.sg.fig.set_size_inches(self.fig.get_size_inches())
  481 +#+end_src
  482 +
  483 +#+RESULTS:
  484 +
  485 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  486 +import matplotlib.pyplot as plt
  487 +import matplotlib.gridspec as gridspec
  488 +import seaborn as sns; sns.set()
  489 +from scipy import stats
  490 +
  491 +plotlist = []
  492 +for current_cult in maj6_rnd.index:
  493 + dfcurr = df31par.query('LIBCULTURE == "{}"'.format(current_cult))
  494 + g = sns.jointplot('SURF_ADM', 'RENDNORME', data=dfcurr, kind='reg').set_axis_labels('SURF_'+current_cult,'REND')
  495 + rsquare = lambda a, b: stats.spearmanr(a, b)[0] ** 2
  496 + g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$Spearman R^2$", loc="upper left", fontsize=12)
  497 + plotlist.append(g)
  498 +
  499 +fig = plt.figure(figsize=(10,10))
  500 +
  501 +gs = gridspec.GridSpec(3, 2)
  502 +
  503 +for i, p in enumerate(plotlist):
  504 + SeabornFig2Grid(p, fig, gs[i])
  505 +
  506 +gs.tight_layout(fig)
  507 +plt.savefig(matplot_lib_filename)
  508 +matplot_lib_filename
  509 +#+end_src
  510 +
  511 +#+RESULTS:
  512 +[[file:/tmp/babel-X17w0V/figureYa6zlX.png]]
  513 +
  514 +La corrélation n'est valable que pour le blé tendre et le tournesol. Il faudra évidemment refaire tout ça avec tous les départements.
  515 +
  516 +** Corrélation avec autres variables (qui n'ont pas besoin des images)
  517 +*** Irrigation
  518 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  519 +import matplotlib.pyplot as plt
  520 +import matplotlib.gridspec as gridspec
  521 +import seaborn as sns; sns.set()
  522 +from scipy import stats
  523 +
  524 +plotlist = []
  525 +for current_cult in maj6_rnd.index:
  526 + dfcurr = df31par.query('LIBCULTURE == "{}"'.format(current_cult))
  527 + g = sns.jointplot('MMEAU', 'RENDNORME', data=dfcurr, kind='reg').set_axis_labels('IRR_'+current_cult,'REND')
  528 + rsquare = lambda a, b: stats.spearmanr(a, b)[0] ** 2
  529 + g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$Spearman R^2$", loc="upper left", fontsize=12)
  530 + plotlist.append(g)
  531 +
  532 +fig = plt.figure(figsize=(10,10))
  533 +
  534 +gs = gridspec.GridSpec(3, 2)
  535 +
  536 +for i, p in enumerate(plotlist):
  537 + SeabornFig2Grid(p, fig, gs[i])
  538 +
  539 +gs.tight_layout(fig)
  540 +plt.savefig(matplot_lib_filename)
  541 +matplot_lib_filename
  542 +#+end_src
  543 +
  544 +#+RESULTS:
  545 +[[file:/tmp/babel-X17w0V/figureVmssHE.png]]
  546 +
  547 +*** Bio
  548 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  549 +import matplotlib.pyplot as plt
  550 +import matplotlib.gridspec as gridspec
  551 +import seaborn as sns; sns.set()
  552 +from scipy import stats
  553 +
  554 +plotlist = []
  555 +for current_cult in maj6_rnd.index:
  556 + dfcurr = df31par.query('LIBCULTURE == "{}"'.format(current_cult))
  557 + g = sns.jointplot('BIO', 'RENDNORME', data=dfcurr, kind='reg').set_axis_labels('BIO_'+current_cult,'REND')
  558 + rsquare = lambda a, b: stats.spearmanr(a, b)[0] ** 2
  559 + g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$Spearman R^2$", loc="upper left", fontsize=12)
  560 + plotlist.append(g)
  561 +
  562 +fig = plt.figure(figsize=(10,10))
  563 +
  564 +gs = gridspec.GridSpec(3, 2)
  565 +
  566 +for i, p in enumerate(plotlist):
  567 + SeabornFig2Grid(p, fig, gs[i])
  568 +
  569 +gs.tight_layout(fig)
  570 +plt.savefig(matplot_lib_filename)
  571 +matplot_lib_filename
  572 +#+end_src
  573 +
  574 +#+RESULTS:
  575 +[[file:/tmp/babel-X17w0V/figureE5mY6F.png]]
  576 +
  577 +*** Différence entre surface admin et vraie surface
  578 +*** Latitude
  579 +* Quels sont les départements contenant des informations de rendement?
  580 +* Quelles sont les cultures pour lesquelles les informations de rendement sont les plus nombreuses?
  581 +* Peut-on prédire le rendement à partir des données TERLAB seules?
  582 +- Localisation, culture, surface de la parcelle
  583 +* Quelles sont les tuiles S2 nécessaires pour couvrir chaque département?
... ...