Commit e0091d8e1e283872a1a751091dbd4c7c320d05dc

Authored by Jordi Inglada
1 parent 93031e24
Exists in master

exploration des données de rendement par classe

Showing 1 changed file with 217 additions and 5 deletions   Show diff stats
notebook/exploration.org
... ... @@ -67,7 +67,8 @@ def shape2df(departement):
67 67  
68 68  
69 69 * Quelles sont les variables intéressantes dans les données TERLAB
70   -On vérifie que le fichier est bien lu
  70 +
  71 +** On vérifie que le fichier est bien lu
71 72 #+begin_src python :results output :session :exports both
72 73 df31 = shape2df(31)
73 74 print(df31.head())
... ... @@ -83,7 +84,7 @@ print(df31.head())
83 84 :
84 85 : [5 rows x 18 columns]
85 86  
86   -On liste les colonnes
  87 +** On liste les colonnes les lignes
87 88 #+begin_src python :results output :session :exports both
88 89 print(df31.columns)
89 90 #+end_src
... ... @@ -123,24 +124,235 @@ geometry POLYGON ((506016.382600002 6234988.195600003, ...
123 124 Name: 0, dtype: object
124 125 #+end_example
125 126  
126   -Quels sont les codes des cultures?
  127 +** Quels sont les codes des cultures?
127 128  
128 129 #+begin_src python :results output :session :exports both
129   -print(set(df31.CODE_CULTU))
  130 +codes_cultures = set(df31.CODE_CULTU)
  131 +print(len(codes_cultures))
  132 +print(codes_cultures)
130 133 #+end_src
131 134  
132 135 #+RESULTS:
  136 +: 167
133 137 : {'LOT', 'MPC', 'AVH', 'GES', 'PCH', 'TTP', 'DTY', 'LH6', 'PAG', 'FNU', 'PPO', 'SGH', 'MC5', 'TRN', 'MLG', 'CZH', 'PPP', 'SGE', 'LAV', 'ORP', 'BRO', 'OLI', 'ME7', 'POR', 'CML', 'SOJ', 'FEV', 'FLA', 'CHS', 'LH7', 'SNE', 'LU5', 'MIS', 'SPL', 'NOX', 'FVL', 'BVF', 'LUD', 'CID', 'MIE', 'SAI', 'RGA', 'PTR', 'PEP', 'BDH', 'LU6', 'ORH', 'CPL', 'ML6', 'CTG', 'SPH', 'LEC', 'TOP', 'PFP', 'MC7', 'FRA', 'SRS', 'CRD', 'VRT', 'VRG', 'BTP', 'ORT', 'BOR', 'BTA', 'RVI', 'TR6', 'PPR', 'MOT', 'LUZ', 'CAG', 'OIG', 'CHU', 'CIT', 'TAB', 'MC6', 'RDI', 'MLO', 'CCT', 'LU7', 'FF7', 'MID', 'HBL', 'PH6', 'MLT', 'TR5', 'SBO', 'ME6', 'EPE', 'PHI', 'CEL', 'SOG', 'SA5', 'ANE', 'BTH', 'PSL', 'VE7', 'LDH', 'ML5', 'SA6', 'PCL', 'CAR', 'POT', 'PRL', 'PAS', 'MCT', 'NVT', 'OAG', 'CPH', 'MOH', 'BFS', 'J6S', 'TCR', 'CCN', 'AUB', 'J5M', 'AIL', 'FAG', 'TTH', 'MCR', 'TRE', 'LBF', 'TOM', 'PAN', 'CZP', 'LIH', 'J6P', 'AGR', 'MH7', 'GFP', 'TRU', 'LO7', 'CGO', 'PVP', 'AVP', 'FNO', 'SA7', 'PFR', 'PPH', 'ML7', 'PFH', 'FET', 'BDP', 'BOP', 'NOS', 'MH5', 'PTC', 'MPA', 'TR7', 'PH7', 'CES', 'FLP', 'CHT', 'LP7', 'HAR', 'VE6', 'CMB', 'BTN', 'MH6', 'FSG', 'BFP', 'PPA', 'VES', 'ROQ', 'PP7', 'LIP', 'VRC', 'ART'}
134 138  
135 139 Mais il est plus lisible de regarder ~LIBCULTURE~
136 140 #+begin_src python :results output :session :exports both
137   -print(set(df31.LIBCULTURE))
  141 +libs_cultures = set(df31.LIBCULTURE)
  142 +print(len(libs_cultures))
  143 +print(libs_cultures)
138 144 #+end_src
139 145  
140 146 #+RESULTS:
  147 +: 20
141 148 : {'07_TRITICALE', '00_XXXXXXXXXX', '21_BETTERAVE_INDUSTRIELLE', '20_MAIS_FOURRAGE', '31_POMME_DE_TERRE_CONSO', '17_MAIS_GRAIN', '11_POIS_PROTEAGINEUX', '15_SORGHO', '02_BLE_DUR', '13_TOURNESOL', '05_AVOINE', '09_COLZA', '06_SEIGLE', '01_BLE_TENDRE', '61_MELANGE_CEREALES', '62_MELANGE_PROTEAGINEUX', '12_FEVE_FEVEROLE', '03_ORGE_HIVER', '14_SOJA', '04_ORGE_PRINTEMPS'}
142 149  
143 150 Mais il y a beaucoup plus de ~CODE_CULTU~ que de ~LIBCULTURE~. Peut-être que le 2è n'existe que pour les parcelles avec rendement?
  151 +
  152 +Lien entre les 2 champs?
  153 +#+begin_src python :results output :session :exports both
  154 +codes_libs = df31[['CODE_CULTU', 'LIBCULTURE']].drop_duplicates()
  155 +print(len(codes_libs))
  156 +#+end_src
  157 +
  158 +#+RESULTS:
  159 +: 167
  160 +
  161 +** Répartition des cultures
  162 +#+begin_src python :results output :session :exports both
  163 +df31.LIBCULTURE.value_counts()
  164 +#+end_src
  165 +
  166 +#+RESULTS:
  167 +#+begin_example
  168 +00_XXXXXXXXXX 83046
  169 +01_BLE_TENDRE 10423
  170 +13_TOURNESOL 10073
  171 +02_BLE_DUR 8268
  172 +17_MAIS_GRAIN 5470
  173 +03_ORGE_HIVER 3012
  174 +09_COLZA 2090
  175 +14_SOJA 1973
  176 +20_MAIS_FOURRAGE 1447
  177 +15_SORGHO 1404
  178 +07_TRITICALE 1201
  179 +11_POIS_PROTEAGINEUX 1021
  180 +12_FEVE_FEVEROLE 470
  181 +62_MELANGE_PROTEAGINEUX 324
  182 +05_AVOINE 293
  183 +61_MELANGE_CEREALES 290
  184 +04_ORGE_PRINTEMPS 251
  185 +31_POMME_DE_TERRE_CONSO 157
  186 +06_SEIGLE 19
  187 +21_BETTERAVE_INDUSTRIELLE 14
  188 +Name: LIBCULTURE, dtype: int64
  189 +#+end_example
  190 +
  191 +
  192 +** Histogramme valeurs de rendement:
  193 +#+begin_src python :results output :session :exports both
  194 +df31.RENDNORME.describe()
  195 +#+end_src
  196 +
  197 +#+RESULTS:
  198 +: count 4713.000000
  199 +: mean 46.262873
  200 +: std 20.104306
  201 +: min 1.000000
  202 +: 25% 27.840000
  203 +: 50% 48.000000
  204 +: 75% 63.000000
  205 +: max 120.000000
  206 +: Name: RENDNORME, dtype: float64
  207 +
  208 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  209 +import matplotlib.pyplot as plt
  210 +plt.figure(figsize=(10,5))
  211 +df31.hist(column='RENDNORME')
  212 +plt.savefig(matplot_lib_filename)
  213 +matplot_lib_filename
  214 +#+end_src
  215 +
  216 +#+RESULTS:
  217 +[[file:/tmp/babel-QEj3SP/figureDH9QA6.png]]
  218 +
  219 +Valeurs manquantes de rendement?
  220 +#+begin_src python :results output :session :exports both
  221 +num_parcelles = len(df31.RENDNORME)
  222 +num_rendements = df31.RENDNORME.count()
  223 +print('{} parcelles dont {} avec information de rendement'.format(num_parcelles, num_rendements))
  224 +#+end_src
  225 +
  226 +#+RESULTS:
  227 +: 131246 parcelles dont 4713 avec information de rendement
  228 +
  229 +
  230 +** Disponibiité des données de rendement par culture
  231 +#+begin_src python :results output :session :exports both
  232 +df31[df31.RENDNORME.notnull()]['LIBCULTURE'].value_counts()
  233 +#+end_src
  234 +
  235 +#+RESULTS:
  236 +#+begin_example
  237 +13_TOURNESOL 1178
  238 +02_BLE_DUR 1125
  239 +01_BLE_TENDRE 935
  240 +17_MAIS_GRAIN 294
  241 +09_COLZA 258
  242 +03_ORGE_HIVER 250
  243 +14_SOJA 241
  244 +11_POIS_PROTEAGINEUX 136
  245 +15_SORGHO 118
  246 +07_TRITICALE 70
  247 +12_FEVE_FEVEROLE 57
  248 +20_MAIS_FOURRAGE 17
  249 +05_AVOINE 15
  250 +04_ORGE_PRINTEMPS 14
  251 +06_SEIGLE 4
  252 +31_POMME_DE_TERRE_CONSO 1
  253 +Name: LIBCULTURE, dtype: int64
  254 +#+end_example
  255 +
  256 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  257 +import matplotlib.pyplot as plt
  258 +plt.figure(figsize=(10,5))
  259 +df31[df31.RENDNORME.notnull()]['CODE_CULTU'].value_counts().plot.bar()
  260 +plt.savefig(matplot_lib_filename)
  261 +matplot_lib_filename
  262 +#+end_src
  263 +
  264 +#+RESULTS:
  265 +[[file:/tmp/babel-QEj3SP/figureaXXL2p.png]]
  266 +
  267 +** Distribution statisique des rendements par culture
  268 +#+RESULTS:
  269 +#+begin_example
  270 +TRN
  271 + CODE_CULTU SURF_ADM PRECISION SEMENCE ... SURUTISOL RENDNORME MMEAU geometry
  272 +24 TRN 0.60 None 0 ... NaN NaN NaN POLYGON ((555017.3030000031 6288487.261, 55503...
  273 +30 TRN 3.65 None 0 ... NaN NaN NaN POLYGON ((581263.6560000032 6252087.490000002,...
  274 +31 TRN 6.69 None 0 ... NaN NaN NaN POLYGON ((580446.200000003 6251350.913000003, ...
  275 +33 TRN 14.25 None 0 ... NaN NaN NaN POLYGON ((580111.6431000009 6250744.730800003,...
  276 +45 TRN 1.05 None 0 ... NaN NaN NaN POLYGON ((581790.2880000025 6251575.461000003,...
  277 +48 TRN 12.44 None 0 ... NaN NaN NaN POLYGON ((581401.3687999994 6250285.316800002,...
  278 +53 TRN 2.16 None 0 ... NaN NaN NaN POLYGON ((554394.9580000043 6288677.411000002,...
  279 +54 TRN 8.48 None 0 ... NaN NaN NaN POLYGON ((554866.626000002 6288673.861000001, ...
  280 +58 TRN 3.52 None 0 ... NaN NaN NaN POLYGON ((553234.800999999 6287448.078000002, ...
  281 +72 TRN 0.44 None 0 ... 25.00 24.0 60.00 POLYGON ((575801.138000004 6245039.913000003, ...
  282 +81 TRN 9.79 None 0 ... 25.00 24.0 60.00 POLYGON ((575622.563000001 6245093.604600001, ...
  283 +85 TRN 0.60 None 0 ... 25.00 24.0 60.00 POLYGON ((576226.2412 6245008.710800003, 57621...
  284 +86 TRN 3.60 None 0 ... 25.00 24.0 60.00 POLYGON ((576052.189199999 6245216.165200002, ...
  285 +88 TRN 6.77 None 0 ... 25.00 24.0 60.00 POLYGON ((577236.5139000043 6244098.652800001,...
  286 +90 TRN 2.04 None 0 ... 25.00 24.0 60.00 POLYGON ((577029.3339999989 6244296.425000001,...
  287 +92 TRN 2.49 None 0 ... 25.00 24.0 60.00 POLYGON ((578223.6330000013 6246276.920000002,...
  288 +104 TRN 1.00 None 0 ... NaN NaN NaN POLYGON ((555160.8339999989 6288614.073000003,...
  289 +111 TRN 5.59 None 0 ... NaN NaN NaN POLYGON ((609866.8835000023 6263464.590600003,...
  290 +119 TRN 7.70 None 0 ... NaN NaN NaN POLYGON ((586628.8779999986 6287265.739100002,...
  291 +228 TRN 29.52 None 0 ... NaN NaN NaN POLYGON ((586507.767400004 6249033.661900003, ...
  292 +240 TRN 4.68 None 0 ... NaN NaN NaN POLYGON ((511739.449000001 6241834.759, 511737...
  293 +322 TRN 4.32 None 0 ... NaN NaN NaN POLYGON ((610430.0773999989 6261651.982700001,...
  294 +323 TRN 5.69 None 0 ... NaN NaN NaN POLYGON ((614708.3079999983 6277454.932, 61471...
  295 +326 TRN 1.67 None 0 ... NaN NaN NaN POLYGON ((615043.6757000014 6277122.2916, 6150...
  296 +333 TRN 1.68 None 0 ... NaN NaN NaN POLYGON ((608512.7588 6262650.3046, 608452.698...
  297 +334 TRN 5.63 None 0 ... NaN NaN NaN POLYGON ((608843.5855000019 6262790.020500001,...
  298 +338 TRN 9.24 None 0 ... NaN NaN NaN POLYGON ((610236.4738000035 6262741.7326, 6102...
  299 +341 TRN 0.44 None 1 ... NaN NaN NaN POLYGON ((610038.8352999985 6262889.500300001,...
  300 +344 TRN 0.41 None 1 ... NaN NaN NaN POLYGON ((610132.8236000016 6262748.6778, 6101...
  301 +346 TRN 2.38 None 0 ... NaN NaN NaN POLYGON ((609944.3612000048 6262859.592700001,...
  302 +... ... ... ... ... ... ... ... ...
  303 +130858 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((591821.9479999989 6246798.5711, 5917...
  304 +130862 TRN 2.47 None 0 ... NaN NaN NaN POLYGON ((561445.3229999989 6247174.507000003,...
  305 +130885 TRN 6.04 None 0 ... NaN NaN NaN POLYGON ((518236.2965000048 6252405.700800002,...
  306 +130900 TRN 4.37 None 0 ... NaN NaN NaN POLYGON ((568873.9149999991 6253983.294, 56885...
  307 +130922 TRN 5.74 None 0 ... NaN NaN NaN POLYGON ((604401.186999999 6269327.276300002, ...
  308 +130923 TRN 2.04 None 0 ... NaN NaN NaN POLYGON ((604401.1928000003 6269327.282900002,...
  309 +130927 TRN 0.15 None 0 ... NaN NaN NaN POLYGON ((546528.9905000031 6313053.770800002,...
  310 +130952 TRN 7.88 None 0 ... NaN NaN NaN POLYGON ((562847.7470000014 6247395.255000003,...
  311 +130957 TRN 1.85 None 0 ... NaN NaN NaN POLYGON ((562305.936999999 6243942.735000003, ...
  312 +130958 TRN 1.80 None 0 ... NaN NaN NaN POLYGON ((562131.1979999989 6244150.639700003,...
  313 +130959 TRN 6.73 None 0 ... NaN NaN NaN POLYGON ((562298.2080999985 6244368.4877, 5624...
  314 +130980 TRN 0.63 None 0 ... NaN NaN NaN POLYGON ((546709.0170000046 6271094.791000001,...
  315 +130981 TRN 0.41 None 0 ... NaN NaN NaN POLYGON ((546708.8593000025 6271093.847900003,...
  316 +130987 TRN 2.59 None 0 ... 85.00 24.0 204.00 POLYGON ((597935.5368999988 6259834.054000001,...
  317 +130994 TRN 1.17 None 0 ... NaN NaN NaN POLYGON ((612614.8017000034 6263528.291700002,...
  318 +130995 TRN 3.37 None 0 ... NaN NaN NaN POLYGON ((612437.2829999998 6263578.228300001,...
  319 +131012 TRN 3.18 None 0 ... NaN NaN NaN POLYGON ((546528.6845000014 6313043.021100003,...
  320 +131023 TRN 0.70 None 0 ... NaN NaN NaN POLYGON ((541206.675999999 6300451.175000001, ...
  321 +131095 TRN 0.59 None 0 ... NaN NaN NaN POLYGON ((541208.5989000052 6300274.455000002,...
  322 +131096 TRN 1.33 None 0 ... NaN NaN NaN POLYGON ((541206.8114000037 6300275.685600001,...
  323 +131120 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((580034.3561000004 6250682.503200002,...
  324 +131134 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((597674.930399999 6258343.746300001, ...
  325 +131136 TRN 0.00 None 0 ... NaN NaN NaN POLYGON ((591802.4574000016 6246689.483200002,...
  326 +131146 TRN 4.60 None 0 ... NaN NaN NaN POLYGON ((581203.7241000012 6256713.529000003,...
  327 +131152 TRN 0.00 None 0 ... 80.16 24.0 192.38 POLYGON ((598665.1539999992 6259764.382800002,...
  328 +131156 TRN 2.49 None 0 ... NaN NaN NaN POLYGON ((602208.8986000046 6260887.712900002,...
  329 +131212 TRN 1.09 None 0 ... NaN NaN NaN POLYGON ((595624.1480000019 6250820.873, 59563...
  330 +131240 TRN 1.91 None 0 ... NaN NaN NaN POLYGON ((501596.7771000043 6236236.866, 50158...
  331 +131241 TRN 1.99 None 0 ... NaN NaN NaN POLYGON ((501600.5104999989 6236239.7509, 5015...
  332 +131243 TRN 0.38 None 0 ... NaN NaN NaN POLYGON ((592146.2823000029 6256409.391900003,...
  333 +
  334 +[10073 rows x 18 columns]
  335 +#+end_example
  336 +
  337 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  338 +rnd_counts = df31[df31.RENDNORME.notnull()]['CODE_CULTU'].value_counts()
  339 +maj6_rnd = rnd_counts.iloc[:6]
  340 +fig, axes = plt.subplots(3, 2, sharex=True, sharey=True)
  341 +print(axes)
  342 +for current_cult, axis in zip(maj6_rnd.index, axes.flatten()):
  343 + axis.set_xlabel(current_cult)
  344 + df31[df31['CODE_CULTU']==current_cult].hist(column='RENDNORME', ax=axis)
  345 + axis.set_title('')
  346 +plt.suptitle('RENDNORME')
  347 +plt.savefig(matplot_lib_filename)
  348 +matplot_lib_filename
  349 +#+end_src
  350 +
  351 +#+RESULTS:
  352 +[[file:/tmp/babel-QEj3SP/figureIq8abo.png]]
  353 +
  354 +
  355 +
144 356 * Quels sont les départements contenant des informations de rendement?
145 357 * Quelles sont les cultures pour lesquelles les informations de rendement sont les plus nombreuses?
146 358 * Quelles sont les tuiles S2 nécessaires pour couvrir chaque département?
... ...