From f470b3caeb1c963a64e93c823a67e0e2403f92d5 Mon Sep 17 00:00:00 2001 From: Jordi Inglada Date: Wed, 5 Dec 2018 18:58:12 +0100 Subject: [PATCH] Corrélations entre rendements et vars contenues dans TERLAB --- notebook/exploration.org | 387 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------- 1 file changed, 306 insertions(+), 81 deletions(-) diff --git a/notebook/exploration.org b/notebook/exploration.org index b872fd6..2aea508 100644 --- a/notebook/exploration.org +++ b/notebook/exploration.org @@ -38,6 +38,8 @@ def find_terlab_file(dpt): : python.el: native completion setup loaded + + #+begin_src python :results output :session :exports both assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('94') assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('95') @@ -64,8 +66,6 @@ def shape2df(departement): #+RESULTS: - - * Quelles sont les variables intéressantes dans les données TERLAB ** On vérifie que le fichier est bien lu @@ -75,12 +75,12 @@ print(df31.head()) #+end_src #+RESULTS: -: CODE_CULTU SURF_ADM PRECISION SEMENCE ... SURUTISOL RENDNORME MMEAU geometry -: 0 ORH 1.42 None 0 ... NaN NaN NaN POLYGON ((506016.382600002 6234988.195600003, ... -: 1 PPH 0.00 None 0 ... NaN NaN NaN POLYGON ((506032.0680000037 6235002.251000002,... -: 2 PPH 0.00 None 0 ... NaN NaN NaN POLYGON ((506108.3540000021 6234766.824000001,... -: 3 ORH 1.87 None 0 ... NaN NaN NaN POLYGON ((506107.6996000037 6234765.226100001,... -: 4 PPH 0.00 None 0 ... NaN NaN NaN POLYGON ((506874.6119000018 6234495.103300001,... +: CODE_CULTU SURF_ADM PRECISION SEMENCE DEST_ICHN ... LIBCULTURE SURUTISOL RENDNORME MMEAU geometry +: 0 ORH 1.42 None 0 None ... 03_ORGE_HIVER NaN NaN NaN POLYGON ((506016.382600002 6234988.195600003, ... +: 1 PPH 0.00 None 0 None ... 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506032.0680000037 6235002.251000002,... +: 2 PPH 0.00 None 0 None ... 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506108.3540000021 6234766.824000001,... +: 3 ORH 1.87 None 0 None ... 03_ORGE_HIVER NaN NaN NaN POLYGON ((506107.6996000037 6234765.226100001,... +: 4 PPH 0.00 None 0 None ... 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506874.6119000018 6234495.103300001,... : : [5 rows x 18 columns] @@ -96,6 +96,7 @@ print(df31.columns) : 'RENDNORME', 'MMEAU', 'geometry'], : dtype='object') + Une ligne #+begin_src python :results output :session :exports both print(df31.iloc[0]) @@ -134,7 +135,7 @@ print(codes_cultures) #+RESULTS: : 167 -: {'LOT', 'MPC', 'AVH', 'GES', 'PCH', 'TTP', 'DTY', 'LH6', 'PAG', 'FNU', 'PPO', 'SGH', 'MC5', 'TRN', 'MLG', 'CZH', 'PPP', 'SGE', 'LAV', 'ORP', 'BRO', 'OLI', 'ME7', 'POR', 'CML', 'SOJ', 'FEV', 'FLA', 'CHS', 'LH7', 'SNE', 'LU5', 'MIS', 'SPL', 'NOX', 'FVL', 'BVF', 'LUD', 'CID', 'MIE', 'SAI', 'RGA', 'PTR', 'PEP', 'BDH', 'LU6', 'ORH', 'CPL', 'ML6', 'CTG', 'SPH', 'LEC', 'TOP', 'PFP', 'MC7', 'FRA', 'SRS', 'CRD', 'VRT', 'VRG', 'BTP', 'ORT', 'BOR', 'BTA', 'RVI', 'TR6', 'PPR', 'MOT', 'LUZ', 'CAG', 'OIG', 'CHU', 'CIT', 'TAB', 'MC6', 'RDI', 'MLO', 'CCT', 'LU7', 'FF7', 'MID', 'HBL', 'PH6', 'MLT', 'TR5', 'SBO', 'ME6', 'EPE', 'PHI', 'CEL', 'SOG', 'SA5', 'ANE', 'BTH', 'PSL', 'VE7', 'LDH', 'ML5', 'SA6', 'PCL', 'CAR', 'POT', 'PRL', 'PAS', 'MCT', 'NVT', 'OAG', 'CPH', 'MOH', 'BFS', 'J6S', 'TCR', 'CCN', 'AUB', 'J5M', 'AIL', 'FAG', 'TTH', 'MCR', 'TRE', 'LBF', 'TOM', 'PAN', 'CZP', 'LIH', 'J6P', 'AGR', 'MH7', 'GFP', 'TRU', 'LO7', 'CGO', 'PVP', 'AVP', 'FNO', 'SA7', 'PFR', 'PPH', 'ML7', 'PFH', 'FET', 'BDP', 'BOP', 'NOS', 'MH5', 'PTC', 'MPA', 'TR7', 'PH7', 'CES', 'FLP', 'CHT', 'LP7', 'HAR', 'VE6', 'CMB', 'BTN', 'MH6', 'FSG', 'BFP', 'PPA', 'VES', 'ROQ', 'PP7', 'LIP', 'VRC', 'ART'} +: {'PCL', 'LO7', 'PH7', 'AGR', 'SAI', 'VE7', 'MCT', 'DTY', 'PHI', 'MOT', 'PPO', 'J6S', 'BTN', 'LIH', 'FLP', 'MPA', 'FNU', 'NOX', 'LUZ', 'CHT', 'OIG', 'CCT', 'MCR', 'POR', 'ME7', 'VRC', 'PAS', 'PVP', 'HBL', 'LOT', 'MLT', 'BTA', 'ML6', 'CEL', 'PH6', 'ANE', 'AUB', 'SGH', 'LIP', 'ORT', 'J5M', 'TRN', 'FAG', 'HAR', 'PFR', 'SRS', 'CHU', 'FEV', 'TR6', 'CAR', 'VES', 'BDH', 'ART', 'TOM', 'PPP', 'MH7', 'LEC', 'FRA', 'SA6', 'TTP', 'NOS', 'RGA', 'TR5', 'PFP', 'LH6', 'PCH', 'ORH', 'LH7', 'RDI', 'PAN', 'BFS', 'FET', 'EPE', 'MPC', 'ME6', 'BDP', 'SA7', 'SOJ', 'CPL', 'PAG', 'J6P', 'GFP', 'RVI', 'LBF', 'CPH', 'MH6', 'LU6', 'LU5', 'BOR', 'CZP', 'MC5', 'LUD', 'TCR', 'PPR', 'SA5', 'TRU', 'PP7', 'PPA', 'FNO', 'CGO', 'AVP', 'TR7', 'LAV', 'VRG', 'BTP', 'TRE', 'CML', 'PTC', 'MIE', 'ML5', 'SNE', 'MH5', 'PTR', 'CZH', 'LU7', 'FF7', 'CCN', 'LDH', 'CRD', 'MC6', 'CHS', 'OAG', 'OLI', 'VRT', 'BOP', 'SGE', 'GES', 'PPH', 'SPH', 'BVF', 'CTG', 'AIL', 'FVL', 'CID', 'NVT', 'MLG', 'BRO', 'PRL', 'FLA', 'SPL', 'SBO', 'ORP', 'BTH', 'MC7', 'TTH', 'CAG', 'AVH', 'BFP', 'ML7', 'POT', 'TOP', 'PSL', 'FSG', 'CIT', 'MID', 'LP7', 'PFH', 'MIS', 'CES', 'CMB', 'PEP', 'TAB', 'VE6', 'MLO', 'MOH', 'SOG', 'ROQ'} Mais il est plus lisible de regarder ~LIBCULTURE~ #+begin_src python :results output :session :exports both @@ -145,7 +146,7 @@ print(libs_cultures) #+RESULTS: : 20 -: {'07_TRITICALE', '00_XXXXXXXXXX', '21_BETTERAVE_INDUSTRIELLE', '20_MAIS_FOURRAGE', '31_POMME_DE_TERRE_CONSO', '17_MAIS_GRAIN', '11_POIS_PROTEAGINEUX', '15_SORGHO', '02_BLE_DUR', '13_TOURNESOL', '05_AVOINE', '09_COLZA', '06_SEIGLE', '01_BLE_TENDRE', '61_MELANGE_CEREALES', '62_MELANGE_PROTEAGINEUX', '12_FEVE_FEVEROLE', '03_ORGE_HIVER', '14_SOJA', '04_ORGE_PRINTEMPS'} +: {'11_POIS_PROTEAGINEUX', '62_MELANGE_PROTEAGINEUX', '12_FEVE_FEVEROLE', '00_XXXXXXXXXX', '31_POMME_DE_TERRE_CONSO', '14_SOJA', '03_ORGE_HIVER', '01_BLE_TENDRE', '13_TOURNESOL', '21_BETTERAVE_INDUSTRIELLE', '04_ORGE_PRINTEMPS', '07_TRITICALE', '09_COLZA', '05_AVOINE', '15_SORGHO', '06_SEIGLE', '61_MELANGE_CEREALES', '02_BLE_DUR', '17_MAIS_GRAIN', '20_MAIS_FOURRAGE'} Mais il y a beaucoup plus de ~CODE_CULTU~ que de ~LIBCULTURE~. Peut-être que le 2è n'existe que pour les parcelles avec rendement? @@ -158,6 +159,7 @@ print(len(codes_libs)) #+RESULTS: : 167 + ** Répartition des cultures #+begin_src python :results output :session :exports both df31.LIBCULTURE.value_counts() @@ -214,7 +216,7 @@ matplot_lib_filename #+end_src #+RESULTS: -[[file:/tmp/babel-QEj3SP/figureDH9QA6.png]] +[[file:/tmp/babel-X17w0V/figureR5oWbq.png]] Valeurs manquantes de rendement? #+begin_src python :results output :session :exports both @@ -262,77 +264,9 @@ matplot_lib_filename #+end_src #+RESULTS: -[[file:/tmp/babel-QEj3SP/figureaXXL2p.png]] +[[file:/tmp/babel-X17w0V/figureJIfLg1.png]] ** Distribution statisique des rendements par culture -#+RESULTS: -#+begin_example -TRN - CODE_CULTU SURF_ADM PRECISION SEMENCE ... SURUTISOL RENDNORME MMEAU geometry -24 TRN 0.60 None 0 ... NaN NaN NaN POLYGON ((555017.3030000031 6288487.261, 55503... -30 TRN 3.65 None 0 ... NaN NaN NaN POLYGON ((581263.6560000032 6252087.490000002,... -31 TRN 6.69 None 0 ... NaN NaN NaN POLYGON ((580446.200000003 6251350.913000003, ... -33 TRN 14.25 None 0 ... NaN NaN NaN POLYGON ((580111.6431000009 6250744.730800003,... -45 TRN 1.05 None 0 ... NaN NaN NaN POLYGON ((581790.2880000025 6251575.461000003,... -48 TRN 12.44 None 0 ... NaN NaN NaN POLYGON ((581401.3687999994 6250285.316800002,... -53 TRN 2.16 None 0 ... NaN NaN NaN POLYGON ((554394.9580000043 6288677.411000002,... -54 TRN 8.48 None 0 ... NaN NaN NaN POLYGON ((554866.626000002 6288673.861000001, ... -58 TRN 3.52 None 0 ... NaN NaN NaN POLYGON ((553234.800999999 6287448.078000002, ... -72 TRN 0.44 None 0 ... 25.00 24.0 60.00 POLYGON ((575801.138000004 6245039.913000003, ... -81 TRN 9.79 None 0 ... 25.00 24.0 60.00 POLYGON ((575622.563000001 6245093.604600001, ... -85 TRN 0.60 None 0 ... 25.00 24.0 60.00 POLYGON ((576226.2412 6245008.710800003, 57621... -86 TRN 3.60 None 0 ... 25.00 24.0 60.00 POLYGON ((576052.189199999 6245216.165200002, ... -88 TRN 6.77 None 0 ... 25.00 24.0 60.00 POLYGON ((577236.5139000043 6244098.652800001,... -90 TRN 2.04 None 0 ... 25.00 24.0 60.00 POLYGON ((577029.3339999989 6244296.425000001,... -92 TRN 2.49 None 0 ... 25.00 24.0 60.00 POLYGON ((578223.6330000013 6246276.920000002,... -104 TRN 1.00 None 0 ... NaN NaN NaN POLYGON ((555160.8339999989 6288614.073000003,... -111 TRN 5.59 None 0 ... NaN NaN NaN POLYGON ((609866.8835000023 6263464.590600003,... -119 TRN 7.70 None 0 ... NaN NaN NaN POLYGON ((586628.8779999986 6287265.739100002,... -228 TRN 29.52 None 0 ... NaN NaN NaN POLYGON ((586507.767400004 6249033.661900003, ... -240 TRN 4.68 None 0 ... NaN NaN NaN POLYGON ((511739.449000001 6241834.759, 511737... -322 TRN 4.32 None 0 ... NaN NaN NaN POLYGON ((610430.0773999989 6261651.982700001,... -323 TRN 5.69 None 0 ... NaN NaN NaN POLYGON ((614708.3079999983 6277454.932, 61471... -326 TRN 1.67 None 0 ... NaN NaN NaN POLYGON ((615043.6757000014 6277122.2916, 6150... -333 TRN 1.68 None 0 ... NaN NaN NaN POLYGON ((608512.7588 6262650.3046, 608452.698... -334 TRN 5.63 None 0 ... NaN NaN NaN POLYGON ((608843.5855000019 6262790.020500001,... -338 TRN 9.24 None 0 ... NaN NaN NaN POLYGON ((610236.4738000035 6262741.7326, 6102... -341 TRN 0.44 None 1 ... NaN NaN NaN POLYGON ((610038.8352999985 6262889.500300001,... -344 TRN 0.41 None 1 ... NaN NaN NaN POLYGON ((610132.8236000016 6262748.6778, 6101... -346 TRN 2.38 None 0 ... NaN NaN NaN POLYGON ((609944.3612000048 6262859.592700001,... -... ... ... ... ... ... ... ... ... -130858 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((591821.9479999989 6246798.5711, 5917... -130862 TRN 2.47 None 0 ... NaN NaN NaN POLYGON ((561445.3229999989 6247174.507000003,... -130885 TRN 6.04 None 0 ... NaN NaN NaN POLYGON ((518236.2965000048 6252405.700800002,... -130900 TRN 4.37 None 0 ... NaN NaN NaN POLYGON ((568873.9149999991 6253983.294, 56885... -130922 TRN 5.74 None 0 ... NaN NaN NaN POLYGON ((604401.186999999 6269327.276300002, ... -130923 TRN 2.04 None 0 ... NaN NaN NaN POLYGON ((604401.1928000003 6269327.282900002,... -130927 TRN 0.15 None 0 ... NaN NaN NaN POLYGON ((546528.9905000031 6313053.770800002,... -130952 TRN 7.88 None 0 ... NaN NaN NaN POLYGON ((562847.7470000014 6247395.255000003,... -130957 TRN 1.85 None 0 ... NaN NaN NaN POLYGON ((562305.936999999 6243942.735000003, ... -130958 TRN 1.80 None 0 ... NaN NaN NaN POLYGON ((562131.1979999989 6244150.639700003,... -130959 TRN 6.73 None 0 ... NaN NaN NaN POLYGON ((562298.2080999985 6244368.4877, 5624... -130980 TRN 0.63 None 0 ... NaN NaN NaN POLYGON ((546709.0170000046 6271094.791000001,... -130981 TRN 0.41 None 0 ... NaN NaN NaN POLYGON ((546708.8593000025 6271093.847900003,... -130987 TRN 2.59 None 0 ... 85.00 24.0 204.00 POLYGON ((597935.5368999988 6259834.054000001,... -130994 TRN 1.17 None 0 ... NaN NaN NaN POLYGON ((612614.8017000034 6263528.291700002,... -130995 TRN 3.37 None 0 ... NaN NaN NaN POLYGON ((612437.2829999998 6263578.228300001,... -131012 TRN 3.18 None 0 ... NaN NaN NaN POLYGON ((546528.6845000014 6313043.021100003,... -131023 TRN 0.70 None 0 ... NaN NaN NaN POLYGON ((541206.675999999 6300451.175000001, ... -131095 TRN 0.59 None 0 ... NaN NaN NaN POLYGON ((541208.5989000052 6300274.455000002,... -131096 TRN 1.33 None 0 ... NaN NaN NaN POLYGON ((541206.8114000037 6300275.685600001,... -131120 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((580034.3561000004 6250682.503200002,... -131134 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((597674.930399999 6258343.746300001, ... -131136 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((591802.4574000016 6246689.483200002,... -131146 TRN 4.60 None 0 ... NaN NaN NaN POLYGON ((581203.7241000012 6256713.529000003,... -131152 TRN 0.00 None 0 ... 80.16 24.0 192.38 POLYGON ((598665.1539999992 6259764.382800002,... -131156 TRN 2.49 None 0 ... NaN NaN NaN POLYGON ((602208.8986000046 6260887.712900002,... -131212 TRN 1.09 None 0 ... NaN NaN NaN POLYGON ((595624.1480000019 6250820.873, 59563... -131240 TRN 1.91 None 0 ... NaN NaN NaN POLYGON ((501596.7771000043 6236236.866, 50158... -131241 TRN 1.99 None 0 ... NaN NaN NaN POLYGON ((501600.5104999989 6236239.7509, 5015... -131243 TRN 0.38 None 0 ... NaN NaN NaN POLYGON ((592146.2823000029 6256409.391900003,... - -[10073 rows x 18 columns] -#+end_example #+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both rnd_counts = df31[df31.RENDNORME.notnull()]['CODE_CULTU'].value_counts() @@ -349,10 +283,301 @@ matplot_lib_filename #+end_src #+RESULTS: -[[file:/tmp/babel-QEj3SP/figureIq8abo.png]] +[[file:/tmp/babel-X17w0V/figureukzYe2.png]] + +* Distribution spatiale des rendements +** Ne garder que les parcelles avec une seule culture +Le champ SURF_ADM est la surface de la parcelle. Le champ SURUTISOL est la surface totale de toutes les parcelles de la même culture dans la même exploitation. S'il n'y a qu'une parcelle de cette culture dans cette exploitation, ces 2 surfaces sont très proches (ou égales). Le rendemenet moyen pour toutes les parcelles de l'exploitation de même culture est donné par RENDNORME. + +#+begin_src python :results output :session :exports both +df31par = df31.query('RENDNORME.notnull() & SURUTISOL.notnull() & (abs(SURUTISOL - SURF_ADM)/(SURF_ADM) < 0.05)') +print(df31par[['SURF_ADM', 'SURUTISOL']].head()) +print('------------') +print(df31par['LIBCULTURE'].value_counts()) +#+end_src + +#+RESULTS: +#+begin_example +SURF_ADM SURUTISOL +78 27.69 27.00 +644 3.42 3.50 +937 15.87 16.00 +5115 1.05 1.06 +5303 19.41 20.13 +------------ +01_BLE_TENDRE 23 +03_ORGE_HIVER 20 +14_SOJA 20 +11_POIS_PROTEAGINEUX 19 +17_MAIS_GRAIN 18 +13_TOURNESOL 18 +09_COLZA 12 +15_SORGHO 11 +07_TRITICALE 7 +12_FEVE_FEVEROLE 7 +02_BLE_DUR 6 +04_ORGE_PRINTEMPS 4 +05_AVOINE 3 +06_SEIGLE 2 +20_MAIS_FOURRAGE 1 +31_POMME_DE_TERRE_CONSO 1 +Name: LIBCULTURE, dtype: int64 +#+end_example +** Histogramme des rendements par culture sur les parcelles pures +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both +rnd_counts = df31par['CODE_CULTU'].value_counts() +maj6_rnd = rnd_counts.iloc[:6] +fig, axes = plt.subplots(3, 2, sharex=True, sharey=True) +print(axes) +for current_cult, axis in zip(maj6_rnd.index, axes.flatten()): + axis.set_xlabel(current_cult) + df31par[df31par['CODE_CULTU']==current_cult].hist(column='RENDNORME', ax=axis) + axis.set_title('') +plt.suptitle('RENDNORME') +plt.savefig(matplot_lib_filename) +matplot_lib_filename +#+end_src + +#+RESULTS: +[[file:/tmp/babel-X17w0V/figurevurUbh.png]] + +** Ajouter une colonne avec le centroïde de la parcelle +#+begin_src python :results output :session :exports both +df31par['centroid'] = df31par.geometry.centroid +print(df31par.centroid.head()) +#+end_src + +#+RESULTS: +: 78 POINT (576943.0567814494 6248166.951789126) +: 644 POINT (573025.7548245641 6264003.533300648) +: 937 POINT (572496.3795754968 6266017.564098726) +: 5115 POINT (560246.9060276946 6236853.034810145) +: 5303 POINT (583985.3814635368 6245950.934973895) +: dtype: object +: 576943.0567814494 + +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both +import matplotlib.pyplot as plt +plt.figure(figsize=(200,100)) +df31par.plot(column='RENDNORME', cmap='viridis') +#plt.show() +plt.savefig(matplot_lib_filename) +matplot_lib_filename +#+end_src + +#+RESULTS: +[[file:/tmp/babel-X17w0V/figure4XvQFY.png]] + + + +Il n'y a pas de corrélation visible entre rendement et position dans le département. +** Corrélation entre rendement et taille de la parcelle + +#+begin_src python :results output :session :exports both +colordict = {lc:i for i, lc in enumerate(set(df31par['LIBCULTURE']))} +print(colordict) +colors = [colordict[c] for c in df31par['LIBCULTURE']] +#+end_src + +#+RESULTS: +: {'14_SOJA': 0, '02_BLE_DUR': 1, '03_ORGE_HIVER': 2, '01_BLE_TENDRE': 3, '11_POIS_PROTEAGINEUX': 4, '13_TOURNESOL': 5, '04_ORGE_PRINTEMPS': 6, '07_TRITICALE': 7, '15_SORGHO': 8, '09_COLZA': 9, '12_FEVE_FEVEROLE': 10, '17_MAIS_GRAIN': 11, '20_MAIS_FOURRAGE': 12, '31_POMME_DE_TERRE_CONSO': 13, '05_AVOINE': 14, '06_SEIGLE': 15} + + + +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both +import matplotlib.pyplot as plt +import matplotlib.cm as cm +plt.figure(figsize=(10,5)) +plt.scatter(df31par['SURF_ADM'], df31par['RENDNORME'], c=colors, cmap=cm.get_cmap('tab20',len(colordict))) +cbar = plt.colorbar() +cbar.ax.get_yaxis().set_ticks([]) +for lab, j in colordict.items(): + cbar.ax.text(.5, j*0.95, lab, ha='left') +plt.xlabel('SURF_ADM') +plt.ylabel('REND') +plt.savefig(matplot_lib_filename) +matplot_lib_filename +#+end_src + +#+RESULTS: +[[file:/tmp/babel-X17w0V/figureUjRq6C.png]] + +Il semble y avoir une corrélation positive entre rendement et taille de la parcelle, conditionnée par la classe + +#+begin_src python :session :results output :exports both +rnd_counts = df31par['LIBCULTURE'].value_counts() +maj6_rnd = rnd_counts.iloc[:6] +#+end_src + +#+RESULTS: +: 01_BLE_TENDRE 23 +: 03_ORGE_HIVER 20 +: 14_SOJA 20 +: 11_POIS_PROTEAGINEUX 19 +: 17_MAIS_GRAIN 18 +: 13_TOURNESOL 18 +: Name: LIBCULTURE, dtype: int64 + +#+begin_src python :session :results output :exports both +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import seaborn as sns +import numpy as np + +class SeabornFig2Grid(): + + def __init__(self, seaborngrid, fig, subplot_spec): + self.fig = fig + self.sg = seaborngrid + self.subplot = subplot_spec + if isinstance(self.sg, sns.axisgrid.FacetGrid) or \ + isinstance(self.sg, sns.axisgrid.PairGrid): + self._movegrid() + elif isinstance(self.sg, sns.axisgrid.JointGrid): + self._movejointgrid() + self._finalize() + + def _movegrid(self): + """ Move PairGrid or Facetgrid """ + self._resize() + n = self.sg.axes.shape[0] + m = self.sg.axes.shape[1] + self.subgrid = gridspec.GridSpecFromSubplotSpec(n,m, subplot_spec=self.subplot) + for i in range(n): + for j in range(m): + self._moveaxes(self.sg.axes[i,j], self.subgrid[i,j]) + + def _movejointgrid(self): + """ Move Jointgrid """ + h= self.sg.ax_joint.get_position().height + h2= self.sg.ax_marg_x.get_position().height + r = int(np.round(h/h2)) + self._resize() + self.subgrid = gridspec.GridSpecFromSubplotSpec(r+1,r+1, subplot_spec=self.subplot) + + self._moveaxes(self.sg.ax_joint, self.subgrid[1:, :-1]) + self._moveaxes(self.sg.ax_marg_x, self.subgrid[0, :-1]) + self._moveaxes(self.sg.ax_marg_y, self.subgrid[1:, -1]) + + def _moveaxes(self, ax, gs): + #https://stackoverflow.com/a/46906599/4124317 + ax.remove() + ax.figure=self.fig + self.fig.axes.append(ax) + self.fig.add_axes(ax) + ax._subplotspec = gs + ax.set_position(gs.get_position(self.fig)) + ax.set_subplotspec(gs) + + def _finalize(self): + plt.close(self.sg.fig) + self.fig.canvas.mpl_connect("resize_event", self._resize) + self.fig.canvas.draw() + + def _resize(self, evt=None): + self.sg.fig.set_size_inches(self.fig.get_size_inches()) +#+end_src + +#+RESULTS: + +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import seaborn as sns; sns.set() +from scipy import stats + +plotlist = [] +for current_cult in maj6_rnd.index: + dfcurr = df31par.query('LIBCULTURE == "{}"'.format(current_cult)) + g = sns.jointplot('SURF_ADM', 'RENDNORME', data=dfcurr, kind='reg').set_axis_labels('SURF_'+current_cult,'REND') + rsquare = lambda a, b: stats.spearmanr(a, b)[0] ** 2 + g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$Spearman R^2$", loc="upper left", fontsize=12) + plotlist.append(g) + +fig = plt.figure(figsize=(10,10)) + +gs = gridspec.GridSpec(3, 2) + +for i, p in enumerate(plotlist): + SeabornFig2Grid(p, fig, gs[i]) + +gs.tight_layout(fig) +plt.savefig(matplot_lib_filename) +matplot_lib_filename +#+end_src + +#+RESULTS: +[[file:/tmp/babel-X17w0V/figureYa6zlX.png]] + +La corrélation n'est valable que pour le blé tendre et le tournesol. Il faudra évidemment refaire tout ça avec tous les départements. + +** Corrélation avec autres variables (qui n'ont pas besoin des images) +*** Irrigation +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import seaborn as sns; sns.set() +from scipy import stats + +plotlist = [] +for current_cult in maj6_rnd.index: + dfcurr = df31par.query('LIBCULTURE == "{}"'.format(current_cult)) + g = sns.jointplot('MMEAU', 'RENDNORME', data=dfcurr, kind='reg').set_axis_labels('IRR_'+current_cult,'REND') + rsquare = lambda a, b: stats.spearmanr(a, b)[0] ** 2 + g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$Spearman R^2$", loc="upper left", fontsize=12) + plotlist.append(g) + +fig = plt.figure(figsize=(10,10)) + +gs = gridspec.GridSpec(3, 2) + +for i, p in enumerate(plotlist): + SeabornFig2Grid(p, fig, gs[i]) + +gs.tight_layout(fig) +plt.savefig(matplot_lib_filename) +matplot_lib_filename +#+end_src + +#+RESULTS: +[[file:/tmp/babel-X17w0V/figureVmssHE.png]] + +*** Bio +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import seaborn as sns; sns.set() +from scipy import stats + +plotlist = [] +for current_cult in maj6_rnd.index: + dfcurr = df31par.query('LIBCULTURE == "{}"'.format(current_cult)) + g = sns.jointplot('BIO', 'RENDNORME', data=dfcurr, kind='reg').set_axis_labels('BIO_'+current_cult,'REND') + rsquare = lambda a, b: stats.spearmanr(a, b)[0] ** 2 + g.annotate(rsquare, template="{stat}: {val:.2f}", stat="$Spearman R^2$", loc="upper left", fontsize=12) + plotlist.append(g) + +fig = plt.figure(figsize=(10,10)) + +gs = gridspec.GridSpec(3, 2) + +for i, p in enumerate(plotlist): + SeabornFig2Grid(p, fig, gs[i]) + +gs.tight_layout(fig) +plt.savefig(matplot_lib_filename) +matplot_lib_filename +#+end_src + +#+RESULTS: +[[file:/tmp/babel-X17w0V/figureE5mY6F.png]] +*** Différence entre surface admin et vraie surface +*** Latitude * Quels sont les départements contenant des informations de rendement? * Quelles sont les cultures pour lesquelles les informations de rendement sont les plus nombreuses? +* Peut-on prédire le rendement à partir des données TERLAB seules? +- Localisation, culture, surface de la parcelle * Quelles sont les tuiles S2 nécessaires pour couvrir chaque département? -- libgit2 0.21.0