Compare View

switch
from
...
to
 
Commits (5)
jobs/prepare_samples.cfg 100644 → 100755
jobs/prepare_samples.sh 100644 → 100755
jobs/zonal_stats.pbs 100644 → 100755
1 1 #!/bin/sh
2   -#PBS -N zonal_stats
3   -#PBS -l select=1:ncpus=20:mem=110gb
4   -#PBS -l walltime=20:00:00
  2 +#PBS -N refl_zonal_stats
  3 +#PBS -l select=1:ncpus=20:mem=100gb
  4 +#PBS -l walltime=100:00:00
5 5  
6 6  
7 7 . /work/OT/theia/oso/jordi/OTB/otb_superbuild/otb_superbuild-otb_develop-Release-install/config_otb_tf_py35.sh
... ... @@ -18,12 +18,24 @@ MAASSP_DIR=/work/OT/theia/oso/jordi/maassp_data
18 18 #7z x $TERLAB_ZIP -aoa -o$TERLAB_DIR
19 19 #otbcli_ZonalStatistics -in ${MAASSP_DIR}/tiles/T31TCJ/Sentinel2_ST_MASK.tif -inbv -10000 -inzone.vector.in $TERLAB_SHP -inzone.vector.reproject 1 -out.vector.filename ${MAASSP_DIR}/zonal_stats_mask.shp -ram 30000
20 20  
21   -#otbcli_ZonalStatistics -in ${MAASSP_DIR}/tiles/T31TCJ/Sentinel2_ST_REFL.tif -inbv -10000 -inzone.vector.in $TERLAB_SHP -inzone.vector.reproject 1 -out.vector.filename ${MAASSP_DIR}/zonal_stats_refl.shp -ram 30000
  21 +# The input image has 720 channels and OGR fails when creating stdev_509, so we can try to split the image in half the bands
  22 +
  23 +CH_LIST1=$(for i in $(seq 1 359);do echo Channel$i; done)
  24 +CH_LIST2=$(for i in $(seq 360 720);do echo Channel$i; done)
  25 +
  26 +otbcli_ExtractROI -in ${MAASSP_DIR}/tiles/T31TCJ/Sentinel2_ST_REFL.tif -out ${MAASSP_DIR}/tiles/T31TCJ/Sentinel2_ST_REFL_1.tif int16 -cl ${CH_LIST1} -ram 80000
  27 +
  28 +otbcli_ZonalStatistics -in ${MAASSP_DIR}/tiles/T31TCJ/Sentinel2_ST_REFL_1.tif -inbv -10000 -inzone.vector.in $TERLAB_SHP -inzone.vector.reproject 1 -out.vector.filename ${MAASSP_DIR}/zonal_stats_refl_1.shp -ram 80000
  29 +
  30 +otbcli_ExtractROI -in ${MAASSP_DIR}/tiles/T31TCJ/Sentinel2_ST_REFL.tif -out ${MAASSP_DIR}/tiles/T31TCJ/Sentinel2_ST_REFL_2.tif int16 -cl ${CH_LIST2} -ram 80000
  31 +
  32 +otbcli_ZonalStatistics -in ${MAASSP_DIR}/tiles/T31TCJ/Sentinel2_ST_REFL_2.tif -inbv -10000 -inzone.vector.in $TERLAB_SHP -inzone.vector.reproject 1 -out.vector.filename ${MAASSP_DIR}/zonal_stats_refl_2.shp -ram 80000
  33 +
22 34  
23 35 #### DEM
24   -ALTIMG=${DEM_DIR}/S2__TEST_AUX_REFDE2_${TILE}_0001_ALT_R1.TIF
25   -ASPIMG=${DEM_DIR}/S2__TEST_AUX_REFDE2_${TILE}_0001_ASP_R1.TIF
26   -SLPIMG=${DEM_DIR}/S2__TEST_AUX_REFDE2_${TILE}_0001_SLP_R1.TIF
27   -DEMIMG=${MAASSP_DIR}/tiles/T31TCJ/DEM_${TILE}.tif
28   -otbcli_ConcatenateImages -il ${ALTIMG} ${ASPIMG} ${SLPIMG} -out ${DEMIMG} -ram 30000
29   -otbcli_ZonalStatistics -in ${DEMIMG} -inbv -10000 -inzone.vector.in $TERLAB_SHP -inzone.vector.reproject 1 -out.vector.filename ${MAASSP_DIR}/zonal_stats_dem.shp -ram 30000
  36 +# ALTIMG=${DEM_DIR}/S2__TEST_AUX_REFDE2_${TILE}_0001_ALT_R1.TIF
  37 +# ASPIMG=${DEM_DIR}/S2__TEST_AUX_REFDE2_${TILE}_0001_ASP_R1.TIF
  38 +# SLPIMG=${DEM_DIR}/S2__TEST_AUX_REFDE2_${TILE}_0001_SLP_R1.TIF
  39 +# DEMIMG=${MAASSP_DIR}/tiles/T31TCJ/DEM_${TILE}.tif
  40 +# otbcli_ConcatenateImages -il ${ALTIMG} ${ASPIMG} ${SLPIMG} -out ${DEMIMG} -ram 30000
  41 +# otbcli_ZonalStatistics -in ${DEMIMG} -inbv -10000 -inzone.vector.in $TERLAB_SHP -inzone.vector.reproject 1 -out.vector.filename ${MAASSP_DIR}/zonal_stats_dem.shp -ram 30000
... ...
notebook/exploration.org
... ... @@ -33,6 +33,11 @@ def find_terlab_file(dpt):
33 33 #+end_src
34 34  
35 35 #+RESULTS:
  36 +: Python 3.7.1 (default, Oct 23 2018, 19:19:42)
  37 +: [GCC 7.3.0] :: Anaconda, Inc. on linux
  38 +: Type "help", "copyright", "credits" or "license" for more information.
  39 +: python.el: native completion setup loaded
  40 +
36 41  
37 42 #+begin_src python :results output :session :exports both
38 43 print(terlabdir)
... ... @@ -42,6 +47,7 @@ print(terlabdir)
42 47 : /work/OT/theia/oso/shapes/TERLAB/
43 48  
44 49  
  50 +
45 51 #+begin_src python :results output :session :exports both
46 52 assert terlabdir+'RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('94')
47 53 assert terlabdir+'RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('95')
... ... @@ -69,7 +75,6 @@ def shape2df(departement):
69 75 #+RESULTS:
70 76  
71 77 * Quelles sont les variables intéressantes dans les données TERLAB
72   -
73 78 ** On vérifie que le fichier est bien lu
74 79 #+begin_src python :results output :session :exports both
75 80 df31 = shape2df(31)
... ... @@ -77,12 +82,12 @@ print(df31.head())
77 82 #+end_src
78 83  
79 84 #+RESULTS:
80   -: CODE_CULTU SURF_ADM PRECISION ... RENDNORME MMEAU geometry
81   -: 0 ORH 1.42 None ... NaN NaN POLYGON ((506016.382600002 6234988.195600003, ...
82   -: 1 PPH 0.00 None ... NaN NaN POLYGON ((506032.0680000037 6235002.251000002,...
83   -: 2 PPH 0.00 None ... NaN NaN POLYGON ((506108.3540000021 6234766.824000001,...
84   -: 3 ORH 1.87 None ... NaN NaN POLYGON ((506107.6996000037 6234765.226100001,...
85   -: 4 PPH 0.00 None ... NaN NaN POLYGON ((506874.6119000018 6234495.103300001,...
  85 +: CODE_CULTU SURF_ADM PRECISION SEMENCE DEST_ICHN CULTURE_D1 CULTURE_D2 ... TLENQ CODUTISOL LIBCULTURE SURUTISOL RENDNORME MMEAU geometry
  86 +: 0 ORH 1.42 None 0 None None None ... 1 03 03_ORGE_HIVER NaN NaN NaN POLYGON ((506016.382600002 6234988.195600003, ...
  87 +: 1 PPH 0.00 None 0 None None None ... 0 00 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506032.0680000037 6235002.251000002,...
  88 +: 2 PPH 0.00 None 0 None None None ... 0 00 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506108.3540000021 6234766.824000001,...
  89 +: 3 ORH 1.87 None 0 None None None ... 1 03 03_ORGE_HIVER NaN NaN NaN POLYGON ((506107.6996000037 6234765.226100001,...
  90 +: 4 PPH 0.00 None 0 None None None ... 0 00 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506874.6119000018 6234495.103300001,...
86 91 :
87 92 : [5 rows x 18 columns]
88 93  
... ... @@ -137,7 +142,7 @@ print(codes_cultures)
137 142  
138 143 #+RESULTS:
139 144 : 167
140   -: {'PCL', 'LO7', 'PH7', 'AGR', 'SAI', 'VE7', 'MCT', 'DTY', 'PHI', 'MOT', 'PPO', 'J6S', 'BTN', 'LIH', 'FLP', 'MPA', 'FNU', 'NOX', 'LUZ', 'CHT', 'OIG', 'CCT', 'MCR', 'POR', 'ME7', 'VRC', 'PAS', 'PVP', 'HBL', 'LOT', 'MLT', 'BTA', 'ML6', 'CEL', 'PH6', 'ANE', 'AUB', 'SGH', 'LIP', 'ORT', 'J5M', 'TRN', 'FAG', 'HAR', 'PFR', 'SRS', 'CHU', 'FEV', 'TR6', 'CAR', 'VES', 'BDH', 'ART', 'TOM', 'PPP', 'MH7', 'LEC', 'FRA', 'SA6', 'TTP', 'NOS', 'RGA', 'TR5', 'PFP', 'LH6', 'PCH', 'ORH', 'LH7', 'RDI', 'PAN', 'BFS', 'FET', 'EPE', 'MPC', 'ME6', 'BDP', 'SA7', 'SOJ', 'CPL', 'PAG', 'J6P', 'GFP', 'RVI', 'LBF', 'CPH', 'MH6', 'LU6', 'LU5', 'BOR', 'CZP', 'MC5', 'LUD', 'TCR', 'PPR', 'SA5', 'TRU', 'PP7', 'PPA', 'FNO', 'CGO', 'AVP', 'TR7', 'LAV', 'VRG', 'BTP', 'TRE', 'CML', 'PTC', 'MIE', 'ML5', 'SNE', 'MH5', 'PTR', 'CZH', 'LU7', 'FF7', 'CCN', 'LDH', 'CRD', 'MC6', 'CHS', 'OAG', 'OLI', 'VRT', 'BOP', 'SGE', 'GES', 'PPH', 'SPH', 'BVF', 'CTG', 'AIL', 'FVL', 'CID', 'NVT', 'MLG', 'BRO', 'PRL', 'FLA', 'SPL', 'SBO', 'ORP', 'BTH', 'MC7', 'TTH', 'CAG', 'AVH', 'BFP', 'ML7', 'POT', 'TOP', 'PSL', 'FSG', 'CIT', 'MID', 'LP7', 'PFH', 'MIS', 'CES', 'CMB', 'PEP', 'TAB', 'VE6', 'MLO', 'MOH', 'SOG', 'ROQ'}
  145 +: {'LBF', 'PAS', 'LUD', 'MIS', 'CGO', 'RVI', 'SGE', 'MOH', 'VRT', 'PAN', 'LOT', 'RDI', 'MC5', 'FVL', 'LH7', 'J5M', 'NOS', 'SRS', 'PEP', 'PH7', 'GFP', 'CZP', 'LU7', 'HBL', 'EPE', 'CHT', 'RGA', 'ORT', 'MH5', 'GES', 'PCH', 'PHI', 'OLI', 'BVF', 'NOX', 'CAG', 'FLA', 'TAB', 'SBO', 'BOR', 'FNO', 'AVH', 'BTN', 'CRD', 'MPA', 'J6P', 'SPL', 'MC6', 'ROQ', 'ML7', 'PFR', 'SGH', 'FAG', 'SOJ', 'TR6', 'TTP', 'CHS', 'VE7', 'BDP', 'PCL', 'MOT', 'SAI', 'BTP', 'CML', 'FEV', 'TTH', 'ANE', 'VRG', 'CPL', 'TCR', 'PAG', 'LU5', 'CES', 'PPO', 'ART', 'PTR', 'PPP', 'TOP', 'LP7', 'ORP', 'PTC', 'AUB', 'TR5', 'BRO', 'HAR', 'BTH', 'BFS', 'ML5', 'CZH', 'FRA', 'LAV', 'MH7', 'CTG', 'CCT', 'FET', 'PFH', 'POR', 'CCN', 'PRL', 'MLG', 'OIG', 'CMB', 'ORH', 'LIP', 'SA5', 'CAR', 'PP7', 'VRC', 'AGR', 'SPH', 'SOG', 'BTA', 'ME6', 'PFP', 'FSG', 'PPR', 'PSL', 'J6S', 'NVT', 'AVP', 'MID', 'LO7', 'BDH', 'TRE', 'LIH', 'LU6', 'VE6', 'PVP', 'DTY', 'TR7', 'LDH', 'PPH', 'FF7', 'CHU', 'ME7', 'LEC', 'MCT', 'FNU', 'FLP', 'PH6', 'POT', 'ML6', 'TOM', 'CPH', 'MCR', 'MLO', 'MH6', 'OAG', 'MIE', 'PPA', 'LH6', 'MPC', 'SA6', 'MC7', 'VES', 'CID', 'MLT', 'SNE', 'TRN', 'LUZ', 'TRU', 'CIT', 'BFP', 'SA7', 'AIL', 'CEL', 'BOP'}
141 146  
142 147 Mais il est plus lisible de regarder ~LIBCULTURE~
143 148 #+begin_src python :results output :session :exports both
... ... @@ -148,7 +153,7 @@ print(libs_cultures)
148 153  
149 154 #+RESULTS:
150 155 : 20
151   -: {'11_POIS_PROTEAGINEUX', '62_MELANGE_PROTEAGINEUX', '12_FEVE_FEVEROLE', '00_XXXXXXXXXX', '31_POMME_DE_TERRE_CONSO', '14_SOJA', '03_ORGE_HIVER', '01_BLE_TENDRE', '13_TOURNESOL', '21_BETTERAVE_INDUSTRIELLE', '04_ORGE_PRINTEMPS', '07_TRITICALE', '09_COLZA', '05_AVOINE', '15_SORGHO', '06_SEIGLE', '61_MELANGE_CEREALES', '02_BLE_DUR', '17_MAIS_GRAIN', '20_MAIS_FOURRAGE'}
  156 +: {'21_BETTERAVE_INDUSTRIELLE', '00_XXXXXXXXXX', '11_POIS_PROTEAGINEUX', '01_BLE_TENDRE', '62_MELANGE_PROTEAGINEUX', '06_SEIGLE', '12_FEVE_FEVEROLE', '14_SOJA', '04_ORGE_PRINTEMPS', '15_SORGHO', '20_MAIS_FOURRAGE', '05_AVOINE', '03_ORGE_HIVER', '17_MAIS_GRAIN', '09_COLZA', '07_TRITICALE', '02_BLE_DUR', '13_TOURNESOL', '61_MELANGE_CEREALES', '31_POMME_DE_TERRE_CONSO'}
152 157  
153 158 Mais il y a beaucoup plus de ~CODE_CULTU~ que de ~LIBCULTURE~. Peut-être que le 2è n'existe que pour les parcelles avec rendement?
154 159  
... ... @@ -218,7 +223,7 @@ matplot_lib_filename
218 223 #+end_src
219 224  
220 225 #+RESULTS:
221   -[[file:/tmp/babel-X17w0V/figureR5oWbq.png]]
  226 +[[file:/tmp/babel-dHk8X9/figureym1xLQ.png]]
222 227  
223 228 Valeurs manquantes de rendement?
224 229 #+begin_src python :results output :session :exports both
... ... @@ -266,7 +271,7 @@ matplot_lib_filename
266 271 #+end_src
267 272  
268 273 #+RESULTS:
269   -[[file:/tmp/babel-X17w0V/figureJIfLg1.png]]
  274 +[[file:/tmp/babel-dHk8X9/figureLFYhfU.png]]
270 275  
271 276 ** Distribution statisique des rendements par culture
272 277  
... ... @@ -285,7 +290,7 @@ matplot_lib_filename
285 290 #+end_src
286 291  
287 292 #+RESULTS:
288   -[[file:/tmp/babel-X17w0V/figureukzYe2.png]]
  293 +[[file:/tmp/babel-dHk8X9/figure7FPnTM.png]]
289 294  
290 295  
291 296  
... ... @@ -313,8 +318,8 @@ SURF_ADM SURUTISOL
313 318 03_ORGE_HIVER 20
314 319 14_SOJA 20
315 320 11_POIS_PROTEAGINEUX 19
316   -17_MAIS_GRAIN 18
317 321 13_TOURNESOL 18
  322 +17_MAIS_GRAIN 18
318 323 09_COLZA 12
319 324 15_SORGHO 11
320 325 07_TRITICALE 7
... ... @@ -323,8 +328,8 @@ SURF_ADM SURUTISOL
323 328 04_ORGE_PRINTEMPS 4
324 329 05_AVOINE 3
325 330 06_SEIGLE 2
326   -20_MAIS_FOURRAGE 1
327 331 31_POMME_DE_TERRE_CONSO 1
  332 +20_MAIS_FOURRAGE 1
328 333 Name: LIBCULTURE, dtype: int64
329 334 #+end_example
330 335 ** Histogramme des rendements par culture sur les parcelles pures
... ... @@ -343,7 +348,7 @@ matplot_lib_filename
343 348 #+end_src
344 349  
345 350 #+RESULTS:
346   -[[file:/tmp/babel-X17w0V/figurevurUbh.png]]
  351 +[[file:/tmp/babel-dHk8X9/figure4znAK6.png]]
347 352  
348 353 ** Ajouter une colonne avec le centroïde de la parcelle
349 354 #+begin_src python :results output :session :exports both
... ... @@ -352,13 +357,19 @@ print(df31par.centroid.head())
352 357 #+end_src
353 358  
354 359 #+RESULTS:
355   -: 78 POINT (576943.0567814494 6248166.951789126)
356   -: 644 POINT (573025.7548245641 6264003.533300648)
357   -: 937 POINT (572496.3795754968 6266017.564098726)
358   -: 5115 POINT (560246.9060276946 6236853.034810145)
359   -: 5303 POINT (583985.3814635368 6245950.934973895)
360   -: dtype: object
361   -: 576943.0567814494
  360 +#+begin_example
  361 +__main__:1: SettingWithCopyWarning:
  362 +A value is trying to be set on a copy of a slice from a DataFrame.
  363 +Try using .loc[row_indexer,col_indexer] = value instead
  364 +
  365 +See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  366 +78 POINT (576943.0567814494 6248166.951789126)
  367 +644 POINT (573025.7548245641 6264003.533300648)
  368 +937 POINT (572496.3795754968 6266017.564098726)
  369 +5115 POINT (560246.9060276946 6236853.034810145)
  370 +5303 POINT (583985.3814635368 6245950.934973895)
  371 +dtype: object
  372 +#+end_example
362 373  
363 374 #+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
364 375 import matplotlib.pyplot as plt
... ... @@ -370,7 +381,7 @@ matplot_lib_filename
370 381 #+end_src
371 382  
372 383 #+RESULTS:
373   -[[file:/tmp/babel-X17w0V/figure4XvQFY.png]]
  384 +[[file:/tmp/babel-dHk8X9/figurek3Dtu3.png]]
374 385  
375 386  
376 387  
... ... @@ -384,7 +395,7 @@ colors = [colordict[c] for c in df31par['LIBCULTURE']]
384 395 #+end_src
385 396  
386 397 #+RESULTS:
387   -: {'14_SOJA': 0, '02_BLE_DUR': 1, '03_ORGE_HIVER': 2, '01_BLE_TENDRE': 3, '11_POIS_PROTEAGINEUX': 4, '13_TOURNESOL': 5, '04_ORGE_PRINTEMPS': 6, '07_TRITICALE': 7, '15_SORGHO': 8, '09_COLZA': 9, '12_FEVE_FEVEROLE': 10, '17_MAIS_GRAIN': 11, '20_MAIS_FOURRAGE': 12, '31_POMME_DE_TERRE_CONSO': 13, '05_AVOINE': 14, '06_SEIGLE': 15}
  398 +: {'06_SEIGLE': 0, '12_FEVE_FEVEROLE': 1, '14_SOJA': 2, '04_ORGE_PRINTEMPS': 3, '15_SORGHO': 4, '20_MAIS_FOURRAGE': 5, '01_BLE_TENDRE': 6, '11_POIS_PROTEAGINEUX': 7, '09_COLZA': 8, '07_TRITICALE': 9, '02_BLE_DUR': 10, '05_AVOINE': 11, '31_POMME_DE_TERRE_CONSO': 12, '03_ORGE_HIVER': 13, '17_MAIS_GRAIN': 14, '13_TOURNESOL': 15}
388 399  
389 400  
390 401  
... ... @@ -404,7 +415,7 @@ matplot_lib_filename
404 415 #+end_src
405 416  
406 417 #+RESULTS:
407   -[[file:/tmp/babel-X17w0V/figureUjRq6C.png]]
  418 +[[file:/tmp/babel-dHk8X9/figureJrlI9h.png]]
408 419  
409 420 Il semble y avoir une corrélation positive entre rendement et taille de la parcelle, conditionnée par la classe
410 421  
... ... @@ -414,13 +425,6 @@ maj6_rnd = rnd_counts.iloc[:6]
414 425 #+end_src
415 426  
416 427 #+RESULTS:
417   -: 01_BLE_TENDRE 23
418   -: 03_ORGE_HIVER 20
419   -: 14_SOJA 20
420   -: 11_POIS_PROTEAGINEUX 19
421   -: 17_MAIS_GRAIN 18
422   -: 13_TOURNESOL 18
423   -: Name: LIBCULTURE, dtype: int64
424 428  
425 429 #+begin_src python :session :results output :exports both
426 430 import matplotlib.pyplot as plt
... ... @@ -511,7 +515,7 @@ matplot_lib_filename
511 515 #+end_src
512 516  
513 517 #+RESULTS:
514   -[[file:/tmp/babel-X17w0V/figureYa6zlX.png]]
  518 +[[file:/tmp/babel-dHk8X9/figureVmQj9r.png]]
515 519  
516 520 La corrélation n'est valable que pour le blé tendre et le tournesol. Il faudra évidemment refaire tout ça avec tous les départements.
517 521  
... ... @@ -544,7 +548,7 @@ matplot_lib_filename
544 548 #+end_src
545 549  
546 550 #+RESULTS:
547   -[[file:/tmp/babel-X17w0V/figureVmssHE.png]]
  551 +[[file:/tmp/babel-dHk8X9/figure8GibKh.png]]
548 552  
549 553 *** Bio
550 554 #+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
... ... @@ -574,7 +578,7 @@ matplot_lib_filename
574 578 #+end_src
575 579  
576 580 #+RESULTS:
577   -[[file:/tmp/babel-X17w0V/figureE5mY6F.png]]
  581 +[[file:/tmp/babel-dHk8X9/figure2huezh.png]]
578 582  
579 583 *** Différence entre surface admin et vraie surface
580 584 *** Latitude
... ... @@ -595,8 +599,8 @@ OLS Regression Results
595 599 Dep. Variable: RENDNORME R-squared: 0.812
596 600 Model: OLS Adj. R-squared: 0.790
597 601 Method: Least Squares F-statistic: 36.70
598   -Date: Thu, 06 Dec 2018 Prob (F-statistic): 2.68e-46
599   -Time: 16:17:06 Log-Likelihood: -623.34
  602 +Date: Thu, 31 Jan 2019 Prob (F-statistic): 2.68e-46
  603 +Time: 13:44:19 Log-Likelihood: -623.34
600 604 No. Observations: 172 AIC: 1285.
601 605 Df Residuals: 153 BIC: 1344.
602 606 Df Model: 18
... ... @@ -646,7 +650,7 @@ matplot_lib_filename
646 650 #+end_src
647 651  
648 652 #+RESULTS:
649   -[[file:/tmp/babel-X17w0V/figureWkaF0Q.png]]
  653 +[[file:/tmp/babel-dHk8X9/figuremqAIp9.png]]
650 654  
651 655 **** Logit
652 656 Il faut que la variable endogène (cible) soit dans [0,1]
... ... @@ -673,8 +677,8 @@ Optimization terminated successfully.
673 677 Dep. Variable: RENDN150 No. Observations: 172
674 678 Model: Logit Df Residuals: 153
675 679 Method: MLE Df Model: 18
676   -Date: Thu, 06 Dec 2018 Pseudo R-squ.: -1.315
677   -Time: 16:29:13 Log-Likelihood: -84.975
  680 +Date: Thu, 31 Jan 2019 Pseudo R-squ.: -1.315
  681 +Time: 13:44:20 Log-Likelihood: -84.975
678 682 converged: True LL-Null: -36.706
679 683 LLR p-value: 1.000
680 684 =========================================================================================================
... ... @@ -714,7 +718,7 @@ matplot_lib_filename
714 718 #+end_src
715 719  
716 720 #+RESULTS:
717   -[[file:/tmp/babel-X17w0V/figureQYHSMd.png]]
  721 +[[file:/tmp/babel-dHk8X9/figureQcPDrQ.png]]
718 722  
719 723  
720 724  
... ...
notebook/zonal_stats.org 0 → 100644
... ... @@ -0,0 +1,575 @@
  1 +#+TITLE: Construction d'un data frame avec les stats zonales
  2 +#+AUTHOR: Jordi Inglada
  3 +#+LANGUAGE: fr
  4 +
  5 +* Init
  6 +
  7 +Petit bout de python vide pour initialiser la session. Ca permet d'avoir la version de python utilisée
  8 +#+begin_src python :results output :session :exports code
  9 +import geopandas as gp
  10 +import pandas as pd
  11 +#+end_src
  12 +
  13 +#+RESULTS:
  14 +
  15 +
  16 +* Simplification du fichier TERLAB
  17 +- Eliminer les géométries qui ne contiennent pas d'information de rendement.
  18 +
  19 +* Génération des fichiers de stats zonales
  20 +Le code est ici [[file:/work/OT/theia/oso/jordi/src/maassp/jobs/zonal_stats.pbs]]
  21 +
  22 +Comme l'image pour les stats zonales contient 720 bandes, le fichier vecteur contient > 720 * 4 attributs, ce qui dépasse le nb de colonnes dans un fichier sqlite. Le shapefile n'a pas ce problème, mais est limité à 4GB, ce qui est trop petit.
  23 +
  24 +J'ai ensuite séparé la série en 2 demi-séries pour pouvoir générer 2 shapefiles, mais on a ensuite le pb de faire le lien entre les polygones des 2 shapefiles générés, parce qu'on perd l'identifiant.
  25 +
  26 +On pourrait commencer par ajouter un identifiant unique à chaque polygone du fichier TERLAB de façon à pouvoir faire le lien entre les différents fichiers de stats locales (reflectances, DEM, masques) générés à partir du même fichier TERLAB.
  27 +
  28 +#+begin_src bash :results output :exports code
  29 +export ${iota2dir}=
  30 +export terlabdir='/work/OT/theia/oso/shapes/TERLAB/'
  31 +export terlab31=${terlabdir}'SURFACES-2017-PARCELLES-GRAPHIQUES-CONSTATEES_031_20180210'
  32 +cp ${terlab31}.shp ${terlabdir}'terlab31.shp'
  33 +cp ${terlab31}.dbf ${terlabdir}'terlab31.dbf'
  34 +cp ${terlab31}.shx ${terlabdir}'terlab31.shx'
  35 +cp ${terlab31}.prj ${terlabdir}'terlab31.prj'
  36 +python ${iota2dir}/scripts/VectorTools/AddFieldID.py ${terlabdir}'terlab31.shp'
  37 +#+end_src
  38 +
  39 +
  40 +* Import des fichiers dans des DF
  41 +
  42 +Le fichier TERLAB qui contient le dept 31 est:
  43 +
  44 +#+begin_src python :results output :session :exports code
  45 +terlabdir = '/work/OT/theia/oso/shapes/TERLAB/'
  46 +terlab31 = terlabdir+'terlab31.shp'
  47 +df_terlab = gp.read_file(terlab31)
  48 +print(df_terlab.head())
  49 +#+end_src
  50 +
  51 +#+RESULTS:
  52 +: CODE_CULTU SURF_ADM PRECISION SEMENCE DEST_ICHN CULTURE_D1 CULTURE_D2 ... TLENQ CODUTISOL LIBCULTURE SURUTISOL RENDNORME MMEAU geometry
  53 +: 0 ORH 1.42 None 0 None None None ... 1 03 03_ORGE_HIVER NaN NaN NaN POLYGON ((506016.382600002 6234988.195600003, ...
  54 +: 1 PPH 0.00 None 0 None None None ... 0 00 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506032.0680000037 6235002.251000002,...
  55 +: 2 PPH 0.00 None 0 None None None ... 0 00 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506108.3540000021 6234766.824000001,...
  56 +: 3 ORH 1.87 None 0 None None None ... 1 03 03_ORGE_HIVER NaN NaN NaN POLYGON ((506107.6996000037 6234765.226100001,...
  57 +: 4 PPH 0.00 None 0 None None None ... 0 00 00_XXXXXXXXXX NaN NaN NaN POLYGON ((506874.6119000018 6234495.103300001,...
  58 +:
  59 +: [5 rows x 18 columns]
  60 +
  61 +
  62 +Les fichiers shape avec les stats sont
  63 +
  64 +#+begin_src python :results none :session :exports code
  65 +zs_dir='/work/OT/theia/oso/jordi/maassp_data'
  66 +zs_rf1=zs_dir+'/zonal_stats_refl_1.shp'
  67 +zs_rf2=zs_dir+'/zonal_stats_refl_2.shp'
  68 +zs_dem=zs_dir+'/zonal_stats_dem.shp'
  69 +zs_mask=zs_dir+'/zonal_stats_mask.shp'
  70 +#+end_src
  71 +
  72 +Lire les fichiers dans un DF pandas
  73 +
  74 +#+begin_src python :results output :session :exports code
  75 +df_rf1 = gp.read_file(zs_rf1)
  76 +df_rf2 = gp.read_file(zs_rf2)
  77 +df_dem = gp.read_file(zs_dem).to_crs
  78 +df_mask = gp.read_file(zs_mask)
  79 +print(len(df_rf1))
  80 +print(len(df_rf2))
  81 +print(len(df_dem))
  82 +print(len(df_mask))
  83 +#+end_src
  84 +
  85 +#+RESULTS:
  86 +: 85275
  87 +: 85275
  88 +: 85126
  89 +: 85275
  90 +
  91 +Il paraît que le fichier de DEM a perdu quelques polygones?
  92 +
  93 +Vérification des projections :
  94 +
  95 +#+begin_src python :results output :session :exports code
  96 +print(df_dem.crs)
  97 +print(df_mask.crs)
  98 +print(df_rf1.crs)
  99 +print(df_rf1.crs)
  100 +#+end_src
  101 +
  102 +#+RESULTS:
  103 +: {'init': 'epsg:32631'}
  104 +: {'init': 'epsg:2154'}
  105 +: {'init': 'epsg:2154'}
  106 +: {'init': 'epsg:2154'}
  107 +
  108 +Reprojection du DEM
  109 +
  110 +#+begin_src python :results output :session :exports code
  111 +df_dem = df_dem.to_crs(df_mask.crs)
  112 +print(df_dem.crs)
  113 +#+end_src
  114 +
  115 +#+RESULTS:
  116 +: {'init': 'epsg:2154'}
  117 +
  118 +
  119 +* Fusion dans un seul DF
  120 +
  121 +#+begin_src python :results output :session :exports code
  122 +for c in df_terlab.columns:
  123 + print(c)
  124 +#+end_src
  125 +
  126 +#+RESULTS:
  127 +#+begin_example
  128 +CODE_CULTU
  129 +SURF_ADM
  130 +PRECISION
  131 +SEMENCE
  132 +DEST_ICHN
  133 +CULTURE_D1
  134 +CULTURE_D2
  135 +BIO
  136 +ENGAGEMENT
  137 +MARAICHAGE
  138 +AGROFOREST
  139 +TLENQ
  140 +CODUTISOL
  141 +LIBCULTURE
  142 +SURUTISOL
  143 +RENDNORME
  144 +MMEAU
  145 +geometry
  146 +#+end_example
  147 +
  148 +#+begin_src python :results output :session :exports code
  149 +for c in df_mask.columns:
  150 + print(c)
  151 +#+end_src
  152 +
  153 +#+RESULTS:
  154 +#+begin_example
  155 +CODE_CULTU
  156 +SURF_ADM
  157 +SEMENCE
  158 +BIO
  159 +MARAICHAGE
  160 +TLENQ
  161 +CODUTISOL
  162 +LIBCULTURE
  163 +count
  164 +mean_0
  165 +stdev_0
  166 +min_0
  167 +max_0
  168 +mean_1
  169 +stdev_1
  170 +min_1
  171 +max_1
  172 +mean_2
  173 +stdev_2
  174 +min_2
  175 +max_2
  176 +mean_3
  177 +stdev_3
  178 +min_3
  179 +max_3
  180 +mean_4
  181 +stdev_4
  182 +min_4
  183 +max_4
  184 +mean_5
  185 +stdev_5
  186 +min_5
  187 +max_5
  188 +mean_6
  189 +stdev_6
  190 +min_6
  191 +max_6
  192 +mean_7
  193 +stdev_7
  194 +min_7
  195 +max_7
  196 +mean_8
  197 +stdev_8
  198 +min_8
  199 +max_8
  200 +mean_9
  201 +stdev_9
  202 +min_9
  203 +max_9
  204 +mean_10
  205 +stdev_10
  206 +min_10
  207 +max_10
  208 +mean_11
  209 +stdev_11
  210 +min_11
  211 +max_11
  212 +mean_12
  213 +stdev_12
  214 +min_12
  215 +max_12
  216 +mean_13
  217 +stdev_13
  218 +min_13
  219 +max_13
  220 +mean_14
  221 +stdev_14
  222 +min_14
  223 +max_14
  224 +mean_15
  225 +stdev_15
  226 +min_15
  227 +max_15
  228 +mean_16
  229 +stdev_16
  230 +min_16
  231 +max_16
  232 +mean_17
  233 +stdev_17
  234 +min_17
  235 +max_17
  236 +mean_18
  237 +stdev_18
  238 +min_18
  239 +max_18
  240 +mean_19
  241 +stdev_19
  242 +min_19
  243 +max_19
  244 +mean_20
  245 +stdev_20
  246 +min_20
  247 +max_20
  248 +mean_21
  249 +stdev_21
  250 +min_21
  251 +max_21
  252 +mean_22
  253 +stdev_22
  254 +min_22
  255 +max_22
  256 +mean_23
  257 +stdev_23
  258 +min_23
  259 +max_23
  260 +mean_24
  261 +stdev_24
  262 +min_24
  263 +max_24
  264 +mean_25
  265 +stdev_25
  266 +min_25
  267 +max_25
  268 +mean_26
  269 +stdev_26
  270 +min_26
  271 +max_26
  272 +mean_27
  273 +stdev_27
  274 +min_27
  275 +max_27
  276 +mean_28
  277 +stdev_28
  278 +min_28
  279 +max_28
  280 +mean_29
  281 +stdev_29
  282 +min_29
  283 +max_29
  284 +mean_30
  285 +stdev_30
  286 +min_30
  287 +max_30
  288 +mean_31
  289 +stdev_31
  290 +min_31
  291 +max_31
  292 +mean_32
  293 +stdev_32
  294 +min_32
  295 +max_32
  296 +mean_33
  297 +stdev_33
  298 +min_33
  299 +max_33
  300 +mean_34
  301 +stdev_34
  302 +min_34
  303 +max_34
  304 +mean_35
  305 +stdev_35
  306 +min_35
  307 +max_35
  308 +mean_36
  309 +stdev_36
  310 +min_36
  311 +max_36
  312 +mean_37
  313 +stdev_37
  314 +min_37
  315 +max_37
  316 +mean_38
  317 +stdev_38
  318 +min_38
  319 +max_38
  320 +mean_39
  321 +stdev_39
  322 +min_39
  323 +max_39
  324 +mean_40
  325 +stdev_40
  326 +min_40
  327 +max_40
  328 +mean_41
  329 +stdev_41
  330 +min_41
  331 +max_41
  332 +mean_42
  333 +stdev_42
  334 +min_42
  335 +max_42
  336 +mean_43
  337 +stdev_43
  338 +min_43
  339 +max_43
  340 +mean_44
  341 +stdev_44
  342 +min_44
  343 +max_44
  344 +mean_45
  345 +stdev_45
  346 +min_45
  347 +max_45
  348 +mean_46
  349 +stdev_46
  350 +min_46
  351 +max_46
  352 +mean_47
  353 +stdev_47
  354 +min_47
  355 +max_47
  356 +mean_48
  357 +stdev_48
  358 +min_48
  359 +max_48
  360 +mean_49
  361 +stdev_49
  362 +min_49
  363 +max_49
  364 +mean_50
  365 +stdev_50
  366 +min_50
  367 +max_50
  368 +mean_51
  369 +stdev_51
  370 +min_51
  371 +max_51
  372 +mean_52
  373 +stdev_52
  374 +min_52
  375 +max_52
  376 +mean_53
  377 +stdev_53
  378 +min_53
  379 +max_53
  380 +mean_54
  381 +stdev_54
  382 +min_54
  383 +max_54
  384 +mean_55
  385 +stdev_55
  386 +min_55
  387 +max_55
  388 +mean_56
  389 +stdev_56
  390 +min_56
  391 +max_56
  392 +mean_57
  393 +stdev_57
  394 +min_57
  395 +max_57
  396 +mean_58
  397 +stdev_58
  398 +min_58
  399 +max_58
  400 +mean_59
  401 +stdev_59
  402 +min_59
  403 +max_59
  404 +mean_60
  405 +stdev_60
  406 +min_60
  407 +max_60
  408 +mean_61
  409 +stdev_61
  410 +min_61
  411 +max_61
  412 +mean_62
  413 +stdev_62
  414 +min_62
  415 +max_62
  416 +mean_63
  417 +stdev_63
  418 +min_63
  419 +max_63
  420 +mean_64
  421 +stdev_64
  422 +min_64
  423 +max_64
  424 +mean_65
  425 +stdev_65
  426 +min_65
  427 +max_65
  428 +mean_66
  429 +stdev_66
  430 +min_66
  431 +max_66
  432 +mean_67
  433 +stdev_67
  434 +min_67
  435 +max_67
  436 +mean_68
  437 +stdev_68
  438 +min_68
  439 +max_68
  440 +mean_69
  441 +stdev_69
  442 +min_69
  443 +max_69
  444 +mean_70
  445 +stdev_70
  446 +min_70
  447 +max_70
  448 +mean_71
  449 +stdev_71
  450 +min_71
  451 +max_71
  452 +geometry
  453 +#+end_example
  454 +
  455 +#+begin_src python :results output :session :exports code
  456 +print(df_dem.geometry.head())
  457 +print(df_mask.geometry.head())
  458 +print(df_rf1.geometry.head())
  459 +print(df_rf2.geometry.head())
  460 +#+end_src
  461 +
  462 +#+RESULTS:
  463 +#+begin_example
  464 +0 POLYGON ((529596.8529984651 6253247.056947426,...
  465 +1 POLYGON ((529634.1229984688 6253078.247947427,...
  466 +2 POLYGON ((529099.9159984606 6252601.944947429,...
  467 +3 POLYGON ((529413.4439984614 6252660.904947431,...
  468 +4 POLYGON ((529568.9989984701 6252522.081947433,...
  469 +Name: geometry, dtype: object
  470 +0 POLYGON ((529596.8529999098 6253247.056996606,...
  471 +1 POLYGON ((529634.1229999131 6253078.247996605,...
  472 +2 POLYGON ((529099.9159999104 6252601.944996604,...
  473 +3 POLYGON ((529413.4439999078 6252660.904996607,...
  474 +4 POLYGON ((529568.998999915 6252522.081996607, ...
  475 +Name: geometry, dtype: object
  476 +0 POLYGON ((529596.8529999098 6253247.056996606,...
  477 +1 POLYGON ((529634.1229999131 6253078.247996605,...
  478 +2 POLYGON ((529099.9159999104 6252601.944996604,...
  479 +3 POLYGON ((529413.4439999078 6252660.904996607,...
  480 +4 POLYGON ((529568.998999915 6252522.081996607, ...
  481 +Name: geometry, dtype: object
  482 +0 POLYGON ((529596.8529999098 6253247.056996606,...
  483 +1 POLYGON ((529634.1229999131 6253078.247996605,...
  484 +2 POLYGON ((529099.9159999104 6252601.944996604,...
  485 +3 POLYGON ((529413.4439999078 6252660.904996607,...
  486 +4 POLYGON ((529568.998999915 6252522.081996607, ...
  487 +Name: geometry, dtype: object
  488 +#+end_example
  489 +
  490 +
  491 +
  492 +On fait un *spatial join*.
  493 +#+begin_src python :results output :session :exports code
  494 +df_dem_mask = gp.sjoin(df_mask, df_mask, op='intersects')
  495 +print(df_dem_mask.head())
  496 +print(len(df_dem_mask))
  497 +#+end_src
  498 +
  499 +#+RESULTS:
  500 +: Traceback (most recent call last):
  501 +: File "<stdin>", line 1, in <module>
  502 +: File "/tmp/pbs.952734.admin01/babel-mr2dHV/python-0cfLDq", line 1, in <module>
  503 +: df_dem_mask = gp.sjoin(df_mask, df_mask.buffer(-10), op='intersects')
  504 +: File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/geopandas/tools/sjoin.py", line 51, in sjoin
  505 +: or any(right_df.columns.isin([index_left, index_right]))):
  506 +: File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/pandas/core/generic.py", line 5053, in __getattr__
  507 +: return object.__getattribute__(self, name)
  508 +: AttributeError: 'GeoSeries' object has no attribute 'columns'
  509 +
  510 +On a beaucoup trop d'éléments dans le nouveau DF, ce qui veut dire qu'il y a trop d'intersections. Si on le fait entre les 2 fichiers de réflectances, qui sont issus de la même donnée :
  511 +#+begin_src python :results output :session :exports code
  512 +df_dem_mask = gp.sjoin(df_rf2, df_rf1, op='intersects')
  513 +print(df_dem_mask.head())
  514 +print(len(df_dem_mask))
  515 +#+end_src
  516 +
  517 +#+RESULTS:
  518 +: CODE_CULTU_left SURF_ADM_left SEMENCE_left BIO_left MARAICHAGE_left TLENQ_left ... min_357_right max_357_right mean_358_right stdev_358_right min_358_right max_358_right
  519 +: 0 PTR 8.46 0 0 0 0 ... 1729.0 3806.0 2436.875147 411.536815 1125.0 3214.0
  520 +: 12043 J6S 1.04 0 0 0 0 ... 1729.0 3806.0 2436.875147 411.536815 1125.0 3214.0
  521 +: 12044 PTR 7.04 0 0 0 0 ... 1729.0 3806.0 2436.875147 411.536815 1125.0 3214.0
  522 +: 0 PTR 8.46 0 0 0 0 ... 1835.0 3904.0 2472.012640 289.545686 1283.0 2912.0
  523 +: 12036 PPH 0.65 0 0 0 0 ... 1835.0 3904.0 2472.012640 289.545686 1283.0 2912.0
  524 +:
  525 +: [5 rows x 2900 columns]
  526 +: 206065
  527 +
  528 +Essayons de merger sur plusieurs colonnes
  529 +#+begin_src python :results output :session :exports code
  530 +df_tmp = df_mask
  531 +df_dem_mask = df_tmp.merge(df_rf1, on=['CODE_CULTU', 'SURF_ADM'])
  532 +print(df_dem_mask.head())
  533 +print(len(df_dem_mask))
  534 +#+end_src
  535 +
  536 +#+RESULTS:
  537 +#+begin_example
  538 +Traceback (most recent call last):
  539 + File "<stdin>", line 1, in <module>
  540 + File "/tmp/pbs.952734.admin01/babel-mr2dHV/python-ATVm0Y", line 2, in <module>
  541 + df_dem_mask = df_tmp.merge(df_rf1, on=['CODE_CULTU', 'SURF_ADM'])
  542 + File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/geopandas/geodataframe.py", line 475, in merge
  543 + result = DataFrame.merge(self, *args, **kwargs)
  544 + File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/pandas/core/frame.py", line 6877, in merge
  545 + copy=copy, indicator=indicator, validate=validate)
  546 + File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/pandas/core/reshape/merge.py", line 48, in merge
  547 + return op.get_result()
  548 + File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/pandas/core/reshape/merge.py", line 560, in get_result
  549 + concat_axis=0, copy=self.copy)
  550 + File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/pandas/core/internals/managers.py", line 2061, in concatenate_block_managers
  551 + concatenate_join_units(join_units, concat_axis, copy=copy),
  552 + File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/pandas/core/internals/concat.py", line 240, in concatenate_join_units
  553 + for ju in join_units]
  554 + File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/pandas/core/internals/concat.py", line 240, in <listcomp>
  555 + for ju in join_units]
  556 + File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/pandas/core/internals/concat.py", line 223, in get_reindexed_values
  557 + fill_value=fill_value)
  558 + File "/work/OT/theia/oso/jordi/conda_envs/datasci-37/lib/python3.7/site-packages/pandas/core/algorithms.py", line 1645, in take_nd
  559 + out = np.empty(out_shape, dtype=dtype)
  560 +MemoryError
  561 +#+end_example
  562 +
  563 +
  564 +
  565 +* Templates
  566 +#+begin_src python :results output :session :exports code
  567 +#+end_src
  568 +
  569 +#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
  570 +import matplotlib.pyplot as plt
  571 +# plt.figure(figsize=(10,5))
  572 +# df31.hist(column='RENDNORME')
  573 +# plt.savefig(matplot_lib_filename)
  574 +matplot_lib_filename
  575 +#+end_src
... ...