exploration.org 22 KB

Exploration des données TERLAB

Accès à un fichier TERLAB

Trouver le fichier d’un département

Le nom des fichiers zippés est RPG_TERLAB_DEP65-66_2017.7z ou RPG_TERLAB_DEP10_2017.7z


terlabdir = '/home/inglada/stok/DATA/OSO/MAA_SSP/'
import glob
def find_terlab_file(dpt):
    tld = terlabdir
    tlfiles = glob.glob('{}/*.7z'.format(tld))
    filefound = None
    for f in tlfiles:
        tok = f.split('/')[-1].split('_')[2]
        if tok == 'Corse' and (dpt=='2A' or dpt=='2B'):
            return f
        else:
            tok = tok[3:]
            if len(tok) == 2 and tok == str(dpt):
                return f
            else:
                toks = tok.split('-')
                if str(dpt) in toks:
                    return f
    return None

assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('94')
assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_DEP90-91-93-94-95_2017.7z'==find_terlab_file('95')
assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_DEP31_2017.7z'==find_terlab_file('31')
assert '/home/inglada/stok/DATA/OSO/MAA_SSP/RPG_TERLAB_Corse_2017.7z'==find_terlab_file('2A')

Mettre le fichier dans un data frame geopandas

import subprocess
import os
import geopandas as gp
def shape2df(departement):
    zipfile = find_terlab_file(str(departement))
    devnull = open(os.devnull, 'w')
    subprocess.call(["7z", "x", zipfile, "-aoa"], stdout=devnull)
    shapefile = '{}/SURFACES-2017-PARCELLES-GRAPHIQUES-CONSTATEES_0{}_20180210.shp'.format(terlabdir, str(departement))
    return gp.read_file(shapefile)

Quelles sont les variables intéressantes dans les données TERLAB

On vérifie que le fichier est bien lu

df31 = shape2df(31)
print(df31.head())
CODE_CULTU  SURF_ADM PRECISION  SEMENCE                        ...                         SURUTISOL RENDNORME MMEAU                                           geometry
0        ORH      1.42      None        0                        ...                               NaN       NaN   NaN  POLYGON ((506016.382600002 6234988.195600003, ...
1        PPH      0.00      None        0                        ...                               NaN       NaN   NaN  POLYGON ((506032.0680000037 6235002.251000002,...
2        PPH      0.00      None        0                        ...                               NaN       NaN   NaN  POLYGON ((506108.3540000021 6234766.824000001,...
3        ORH      1.87      None        0                        ...                               NaN       NaN   NaN  POLYGON ((506107.6996000037 6234765.226100001,...
4        PPH      0.00      None        0                        ...                               NaN       NaN   NaN  POLYGON ((506874.6119000018 6234495.103300001,...

[5 rows x 18 columns]

On liste les colonnes les lignes

print(df31.columns)
Index(['CODE_CULTU', 'SURF_ADM', 'PRECISION', 'SEMENCE', 'DEST_ICHN',
       'CULTURE_D1', 'CULTURE_D2', 'BIO', 'ENGAGEMENT', 'MARAICHAGE',
       'AGROFOREST', 'TLENQ', 'CODUTISOL', 'LIBCULTURE', 'SURUTISOL',
       'RENDNORME', 'MMEAU', 'geometry'],
      dtype='object')

Une ligne

print(df31.iloc[0])
CODE_CULTU                                                  ORH
SURF_ADM                                                   1.42
PRECISION                                                  None
SEMENCE                                                       0
DEST_ICHN                                                  None
CULTURE_D1                                                 None
CULTURE_D2                                                 None
BIO                                                           0
ENGAGEMENT                                                 None
MARAICHAGE                                                    0
AGROFOREST                                                 None
TLENQ                                                         1
CODUTISOL                                                    03
LIBCULTURE                                        03_ORGE_HIVER
SURUTISOL                                                   NaN
RENDNORME                                                   NaN
MMEAU                                                       NaN
geometry      POLYGON ((506016.382600002 6234988.195600003, ...
Name: 0, dtype: object

Quels sont les codes des cultures?

codes_cultures = set(df31.CODE_CULTU)
print(len(codes_cultures))
print(codes_cultures)
167
{'LOT', 'MPC', 'AVH', 'GES', 'PCH', 'TTP', 'DTY', 'LH6', 'PAG', 'FNU', 'PPO', 'SGH', 'MC5', 'TRN', 'MLG', 'CZH', 'PPP', 'SGE', 'LAV', 'ORP', 'BRO', 'OLI', 'ME7', 'POR', 'CML', 'SOJ', 'FEV', 'FLA', 'CHS', 'LH7', 'SNE', 'LU5', 'MIS', 'SPL', 'NOX', 'FVL', 'BVF', 'LUD', 'CID', 'MIE', 'SAI', 'RGA', 'PTR', 'PEP', 'BDH', 'LU6', 'ORH', 'CPL', 'ML6', 'CTG', 'SPH', 'LEC', 'TOP', 'PFP', 'MC7', 'FRA', 'SRS', 'CRD', 'VRT', 'VRG', 'BTP', 'ORT', 'BOR', 'BTA', 'RVI', 'TR6', 'PPR', 'MOT', 'LUZ', 'CAG', 'OIG', 'CHU', 'CIT', 'TAB', 'MC6', 'RDI', 'MLO', 'CCT', 'LU7', 'FF7', 'MID', 'HBL', 'PH6', 'MLT', 'TR5', 'SBO', 'ME6', 'EPE', 'PHI', 'CEL', 'SOG', 'SA5', 'ANE', 'BTH', 'PSL', 'VE7', 'LDH', 'ML5', 'SA6', 'PCL', 'CAR', 'POT', 'PRL', 'PAS', 'MCT', 'NVT', 'OAG', 'CPH', 'MOH', 'BFS', 'J6S', 'TCR', 'CCN', 'AUB', 'J5M', 'AIL', 'FAG', 'TTH', 'MCR', 'TRE', 'LBF', 'TOM', 'PAN', 'CZP', 'LIH', 'J6P', 'AGR', 'MH7', 'GFP', 'TRU', 'LO7', 'CGO', 'PVP', 'AVP', 'FNO', 'SA7', 'PFR', 'PPH', 'ML7', 'PFH', 'FET', 'BDP', 'BOP', 'NOS', 'MH5', 'PTC', 'MPA', 'TR7', 'PH7', 'CES', 'FLP', 'CHT', 'LP7', 'HAR', 'VE6', 'CMB', 'BTN', 'MH6', 'FSG', 'BFP', 'PPA', 'VES', 'ROQ', 'PP7', 'LIP', 'VRC', 'ART'}

Mais il est plus lisible de regarder LIBCULTURE

libs_cultures = set(df31.LIBCULTURE)
print(len(libs_cultures))
print(libs_cultures)
20
{'07_TRITICALE', '00_XXXXXXXXXX', '21_BETTERAVE_INDUSTRIELLE', '20_MAIS_FOURRAGE', '31_POMME_DE_TERRE_CONSO', '17_MAIS_GRAIN', '11_POIS_PROTEAGINEUX', '15_SORGHO', '02_BLE_DUR', '13_TOURNESOL', '05_AVOINE', '09_COLZA', '06_SEIGLE', '01_BLE_TENDRE', '61_MELANGE_CEREALES', '62_MELANGE_PROTEAGINEUX', '12_FEVE_FEVEROLE', '03_ORGE_HIVER', '14_SOJA', '04_ORGE_PRINTEMPS'}

Mais il y a beaucoup plus de CODE_CULTU que de LIBCULTURE. Peut-être que le 2è n’existe que pour les parcelles avec rendement?

Lien entre les 2 champs?

codes_libs = df31[['CODE_CULTU', 'LIBCULTURE']].drop_duplicates()
print(len(codes_libs))
167

Répartition des cultures

df31.LIBCULTURE.value_counts()
00_XXXXXXXXXX                83046
01_BLE_TENDRE                10423
13_TOURNESOL                 10073
02_BLE_DUR                    8268
17_MAIS_GRAIN                 5470
03_ORGE_HIVER                 3012
09_COLZA                      2090
14_SOJA                       1973
20_MAIS_FOURRAGE              1447
15_SORGHO                     1404
07_TRITICALE                  1201
11_POIS_PROTEAGINEUX          1021
12_FEVE_FEVEROLE               470
62_MELANGE_PROTEAGINEUX        324
05_AVOINE                      293
61_MELANGE_CEREALES            290
04_ORGE_PRINTEMPS              251
31_POMME_DE_TERRE_CONSO        157
06_SEIGLE                       19
21_BETTERAVE_INDUSTRIELLE       14
Name: LIBCULTURE, dtype: int64

Histogramme valeurs de rendement:

df31.RENDNORME.describe()
count    4713.000000
mean       46.262873
std        20.104306
min         1.000000
25%        27.840000
50%        48.000000
75%        63.000000
max       120.000000
Name: RENDNORME, dtype: float64
import matplotlib.pyplot as plt
plt.figure(figsize=(10,5))
df31.hist(column='RENDNORME')
plt.savefig(matplot_lib_filename)
matplot_lib_filename

/tmp/babel-QEj3SP/figureDH9QA6.png

Valeurs manquantes de rendement?

num_parcelles = len(df31.RENDNORME)
num_rendements = df31.RENDNORME.count()
print('{} parcelles dont {} avec information de rendement'.format(num_parcelles, num_rendements))
131246 parcelles dont 4713 avec information de rendement

Disponibiité des données de rendement par culture

df31[df31.RENDNORME.notnull()]['LIBCULTURE'].value_counts()
13_TOURNESOL               1178
02_BLE_DUR                 1125
01_BLE_TENDRE               935
17_MAIS_GRAIN               294
09_COLZA                    258
03_ORGE_HIVER               250
14_SOJA                     241
11_POIS_PROTEAGINEUX        136
15_SORGHO                   118
07_TRITICALE                 70
12_FEVE_FEVEROLE             57
20_MAIS_FOURRAGE             17
05_AVOINE                    15
04_ORGE_PRINTEMPS            14
06_SEIGLE                     4
31_POMME_DE_TERRE_CONSO       1
Name: LIBCULTURE, dtype: int64
import matplotlib.pyplot as plt
plt.figure(figsize=(10,5))
df31[df31.RENDNORME.notnull()]['CODE_CULTU'].value_counts().plot.bar()
plt.savefig(matplot_lib_filename)
matplot_lib_filename

/tmp/babel-QEj3SP/figureaXXL2p.png

Distribution statisique des rendements par culture

TRN
       CODE_CULTU  SURF_ADM PRECISION  SEMENCE                        ...                         SURUTISOL RENDNORME   MMEAU                                           geometry
24            TRN      0.60      None        0                        ...                               NaN       NaN     NaN  POLYGON ((555017.3030000031 6288487.261, 55503...
30            TRN      3.65      None        0                        ...                               NaN       NaN     NaN  POLYGON ((581263.6560000032 6252087.490000002,...
31            TRN      6.69      None        0                        ...                               NaN       NaN     NaN  POLYGON ((580446.200000003 6251350.913000003, ...
33            TRN     14.25      None        0                        ...                               NaN       NaN     NaN  POLYGON ((580111.6431000009 6250744.730800003,...
45            TRN      1.05      None        0                        ...                               NaN       NaN     NaN  POLYGON ((581790.2880000025 6251575.461000003,...
48            TRN     12.44      None        0                        ...                               NaN       NaN     NaN  POLYGON ((581401.3687999994 6250285.316800002,...
53            TRN      2.16      None        0                        ...                               NaN       NaN     NaN  POLYGON ((554394.9580000043 6288677.411000002,...
54            TRN      8.48      None        0                        ...                               NaN       NaN     NaN  POLYGON ((554866.626000002 6288673.861000001, ...
58            TRN      3.52      None        0                        ...                               NaN       NaN     NaN  POLYGON ((553234.800999999 6287448.078000002, ...
72            TRN      0.44      None        0                        ...                             25.00      24.0   60.00  POLYGON ((575801.138000004 6245039.913000003, ...
81            TRN      9.79      None        0                        ...                             25.00      24.0   60.00  POLYGON ((575622.563000001 6245093.604600001, ...
85            TRN      0.60      None        0                        ...                             25.00      24.0   60.00  POLYGON ((576226.2412 6245008.710800003, 57621...
86            TRN      3.60      None        0                        ...                             25.00      24.0   60.00  POLYGON ((576052.189199999 6245216.165200002, ...
88            TRN      6.77      None        0                        ...                             25.00      24.0   60.00  POLYGON ((577236.5139000043 6244098.652800001,...
90            TRN      2.04      None        0                        ...                             25.00      24.0   60.00  POLYGON ((577029.3339999989 6244296.425000001,...
92            TRN      2.49      None        0                        ...                             25.00      24.0   60.00  POLYGON ((578223.6330000013 6246276.920000002,...
104           TRN      1.00      None        0                        ...                               NaN       NaN     NaN  POLYGON ((555160.8339999989 6288614.073000003,...
111           TRN      5.59      None        0                        ...                               NaN       NaN     NaN  POLYGON ((609866.8835000023 6263464.590600003,...
119           TRN      7.70      None        0                        ...                               NaN       NaN     NaN  POLYGON ((586628.8779999986 6287265.739100002,...
228           TRN     29.52      None        0                        ...                               NaN       NaN     NaN  POLYGON ((586507.767400004 6249033.661900003, ...
240           TRN      4.68      None        0                        ...                               NaN       NaN     NaN  POLYGON ((511739.449000001 6241834.759, 511737...
322           TRN      4.32      None        0                        ...                               NaN       NaN     NaN  POLYGON ((610430.0773999989 6261651.982700001,...
323           TRN      5.69      None        0                        ...                               NaN       NaN     NaN  POLYGON ((614708.3079999983 6277454.932, 61471...
326           TRN      1.67      None        0                        ...                               NaN       NaN     NaN  POLYGON ((615043.6757000014 6277122.2916, 6150...
333           TRN      1.68      None        0                        ...                               NaN       NaN     NaN  POLYGON ((608512.7588 6262650.3046, 608452.698...
334           TRN      5.63      None        0                        ...                               NaN       NaN     NaN  POLYGON ((608843.5855000019 6262790.020500001,...
338           TRN      9.24      None        0                        ...                               NaN       NaN     NaN  POLYGON ((610236.4738000035 6262741.7326, 6102...
341           TRN      0.44      None        1                        ...                               NaN       NaN     NaN  POLYGON ((610038.8352999985 6262889.500300001,...
344           TRN      0.41      None        1                        ...                               NaN       NaN     NaN  POLYGON ((610132.8236000016 6262748.6778, 6101...
346           TRN      2.38      None        0                        ...                               NaN       NaN     NaN  POLYGON ((609944.3612000048 6262859.592700001,...
...       ...       ...      ...                        ...                               ...       ...     ...                                                ...
130858        TRN      0.00      None        0                        ...                               NaN       NaN     NaN  POLYGON ((591821.9479999989 6246798.5711, 5917...
130862        TRN      2.47      None        0                        ...                               NaN       NaN     NaN  POLYGON ((561445.3229999989 6247174.507000003,...
130885        TRN      6.04      None        0                        ...                               NaN       NaN     NaN  POLYGON ((518236.2965000048 6252405.700800002,...
130900        TRN      4.37      None        0                        ...                               NaN       NaN     NaN  POLYGON ((568873.9149999991 6253983.294, 56885...
130922        TRN      5.74      None        0                        ...                               NaN       NaN     NaN  POLYGON ((604401.186999999 6269327.276300002, ...
130923        TRN      2.04      None        0                        ...                               NaN       NaN     NaN  POLYGON ((604401.1928000003 6269327.282900002,...
130927        TRN      0.15      None        0                        ...                               NaN       NaN     NaN  POLYGON ((546528.9905000031 6313053.770800002,...
130952        TRN      7.88      None        0                        ...                               NaN       NaN     NaN  POLYGON ((562847.7470000014 6247395.255000003,...
130957        TRN      1.85      None        0                        ...                               NaN       NaN     NaN  POLYGON ((562305.936999999 6243942.735000003, ...
130958        TRN      1.80      None        0                        ...                               NaN       NaN     NaN  POLYGON ((562131.1979999989 6244150.639700003,...
130959        TRN      6.73      None        0                        ...                               NaN       NaN     NaN  POLYGON ((562298.2080999985 6244368.4877, 5624...
130980        TRN      0.63      None        0                        ...                               NaN       NaN     NaN  POLYGON ((546709.0170000046 6271094.791000001,...
130981        TRN      0.41      None        0                        ...                               NaN       NaN     NaN  POLYGON ((546708.8593000025 6271093.847900003,...
130987        TRN      2.59      None        0                        ...                             85.00      24.0  204.00  POLYGON ((597935.5368999988 6259834.054000001,...
130994        TRN      1.17      None        0                        ...                               NaN       NaN     NaN  POLYGON ((612614.8017000034 6263528.291700002,...
130995        TRN      3.37      None        0                        ...                               NaN       NaN     NaN  POLYGON ((612437.2829999998 6263578.228300001,...
131012        TRN      3.18      None        0                        ...                               NaN       NaN     NaN  POLYGON ((546528.6845000014 6313043.021100003,...
131023        TRN      0.70      None        0                        ...                               NaN       NaN     NaN  POLYGON ((541206.675999999 6300451.175000001, ...
131095        TRN      0.59      None        0                        ...                               NaN       NaN     NaN  POLYGON ((541208.5989000052 6300274.455000002,...
131096        TRN      1.33      None        0                        ...                               NaN       NaN     NaN  POLYGON ((541206.8114000037 6300275.685600001,...
131120        TRN      0.00      None        0                        ...                               NaN       NaN     NaN  POLYGON ((580034.3561000004 6250682.503200002,...
131134        TRN      0.00      None        0                        ...                               NaN       NaN     NaN  POLYGON ((597674.930399999 6258343.746300001, ...
131136        TRN      0.00      None        0                        ...                               NaN       NaN     NaN  POLYGON ((591802.4574000016 6246689.483200002,...
131146        TRN      4.60      None        0                        ...                               NaN       NaN     NaN  POLYGON ((581203.7241000012 6256713.529000003,...
131152        TRN      0.00      None        0                        ...                             80.16      24.0  192.38  POLYGON ((598665.1539999992 6259764.382800002,...
131156        TRN      2.49      None        0                        ...                               NaN       NaN     NaN  POLYGON ((602208.8986000046 6260887.712900002,...
131212        TRN      1.09      None        0                        ...                               NaN       NaN     NaN  POLYGON ((595624.1480000019 6250820.873, 59563...
131240        TRN      1.91      None        0                        ...                               NaN       NaN     NaN  POLYGON ((501596.7771000043 6236236.866, 50158...
131241        TRN      1.99      None        0                        ...                               NaN       NaN     NaN  POLYGON ((501600.5104999989 6236239.7509, 5015...
131243        TRN      0.38      None        0                        ...                               NaN       NaN     NaN  POLYGON ((592146.2823000029 6256409.391900003,...

[10073 rows x 18 columns]
rnd_counts = df31[df31.RENDNORME.notnull()]['CODE_CULTU'].value_counts()
maj6_rnd = rnd_counts.iloc[:6]
fig, axes = plt.subplots(3, 2, sharex=True, sharey=True)
print(axes)
for current_cult, axis in zip(maj6_rnd.index, axes.flatten()):
    axis.set_xlabel(current_cult)
    df31[df31['CODE_CULTU']==current_cult].hist(column='RENDNORME', ax=axis)
    axis.set_title('')
plt.suptitle('RENDNORME')
plt.savefig(matplot_lib_filename)
matplot_lib_filename

/tmp/babel-QEj3SP/figureIq8abo.png

Quels sont les départements contenant des informations de rendement?

Quelles sont les cultures pour lesquelles les informations de rendement sont les plus nombreuses?

Quelles sont les tuiles S2 nécessaires pour couvrir chaque département?