Commit 669bf4334e62f0c2eb6afa7e5331a19acbbc7e73

Authored by Louis Baetens
1 parent 0245d468
Exists in master

ENH the pixels_features

ALCD/parameters_files/global_parameters.json
... ... @@ -103,11 +103,11 @@
103 103 "training_proportion": "0.9"
104 104 },
105 105 "user_choices": {
106   - "clear_date": "20160427",
107   - "current_date": "20160417",
108   - "location": "Marrakech",
109   - "main_dir": "/mnt/data/home/baetensl/clouds_detection_git/Data_ALCD/Marrakech_29RPQ_20160417",
110   - "raw_img": "Marrakech_bands.tif",
111   - "tile": "29RPQ"
  106 + "clear_date": "20171005",
  107 + "current_date": "20171002",
  108 + "location": "Arles",
  109 + "main_dir": "/mnt/data/home/baetensl/clouds_detection_git/Data_ALCD/Arles_31TFJ_20171002",
  110 + "raw_img": "Arles_bands.tif",
  111 + "tile": "31TFJ"
112 112 }
113 113 }
114 114 \ No newline at end of file
... ...
PCC/parameters_files/comparison_parameters.json
... ... @@ -7,13 +7,13 @@
7 7 "resolution": "60"
8 8 },
9 9 "automatically_generated": {
10   - "alcd_main_dir": "/mnt/data/home/baetensl/clouds_detection_git/Data_ALCD/Alta_Floresta_Brazil_21LWK_20180813",
11   - "current_date": "20180813",
12   - "location": "Alta_Floresta_Brazil",
13   - "main_dir": "/mnt/data/home/baetensl/clouds_detection_git/Data_PCC/HOT016_final/Alta_Floresta_Brazil_21LWK_20180813",
14   - "raw_img": "Alta_Floresta_Brazil_bands.tif",
15   - "sub_data_dir": "HOT016_final",
16   - "tile": "21LWK"
  10 + "alcd_main_dir": "/mnt/data/home/baetensl/clouds_detection_git/Data_ALCD/Ispra_32TMR_20170815",
  11 + "current_date": "20170815",
  12 + "location": "Ispra",
  13 + "main_dir": "/mnt/data/home/baetensl/clouds_detection_git/Data_PCC/HOT016/Ispra_32TMR_20170815",
  14 + "raw_img": "Ispra_bands.tif",
  15 + "sub_data_dir": "HOT016",
  16 + "tile": "32TMR"
17 17 },
18 18 "maja_parameters": {
19 19 "maja_dir_name": "MAJA_3_1_S2AS2B_HOT016",
... ...
PCC/pixels_features_analysis.py
... ... @@ -8,46 +8,86 @@ import matplotlib.pyplot as plt
8 8 import json
9 9 import otbApplication
10 10 from matplotlib.lines import Line2D
  11 +import collections
  12 +
  13 +#~ def extract_class_band(raw_img, band_to_extract, diff_img, diff_class_nb, alcd_labeled_img='', alcd_class_nb = 0):
  14 + #~ '''
  15 + #~ From the difference image (e.g. diff_binary) between the processing
  16 + #~ chain and the ALCD, and the ALCD labeled, extract the data for the pixels
  17 + #~ in the class_nb for both images
  18 + #~ Mostly used to plot histograms for each class and band
  19 + #~ '''
  20 +
  21 + #~ BandMathX = otbApplication.Registry.CreateApplication("BandMathX")
  22 + #~ if alcd_labeled_img == '':
  23 + #~ BandMathX.SetParameterStringList("il", [str(raw_img), str(diff_img)])
  24 + #~ expression = "(im2b1 == {diff_class_nb} ? im1b{band} : 0)".format(
  25 + #~ diff_class_nb = str(diff_class_nb), band = str(band_to_extract))
  26 + #~ else:
  27 + #~ BandMathX.SetParameterStringList("il", [str(raw_img), str(diff_img), str(alcd_labeled_img)])
  28 + #~ expression = "((im2b1 == {diff_class_nb} && im3b1 == {alcd_class_nb}) ? im1b{band} : 0)".format(
  29 + #~ diff_class_nb = str(diff_class_nb),
  30 + #~ alcd_class_nb = str(alcd_class_nb),
  31 + #~ band = str(band_to_extract))
  32 +
  33 + #~ BandMathX.SetParameterString("exp", expression)
  34 +
  35 + #~ BandMathX.UpdateParameters()
  36 + #~ BandMathX.Execute()
  37 +
  38 + #~ # Pass directly to numpy array, doesnt write on disk
  39 + #~ extraction_output = BandMathX.GetImageAsNumpyArray('out')
  40 + #~ data = np.array(extraction_output)
11 41  
12   -def extract_class_band(raw_img, band_to_extract, diff_img, diff_class_nb, alcd_labeled_img='', alcd_class_nb = 0):
13   - '''
14   - From the difference image (e.g. diff_binary) between the processing
15   - chain and the ALCD, and the ALCD labeled, extract the data for the pixels
16   - in the class_nb for both images
17   - Mostly used to plot histograms for each class and band
18   - '''
  42 + #~ return data
  43 +
  44 +
  45 +
  46 +#~ def count_intersected_classes(diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb):
  47 + #~ '''
  48 + #~ Count the number of pixels being at the same time of the
  49 + #~ diff_class_nb and alcd_class_nb.
  50 + #~ e.g. if diff_class_nb = 1 and alcd_class_nb = 2, it will returns the
  51 + #~ number of true positive and low_clouds pixels.
  52 + #~ '''
19 53  
20   - BandMathX = otbApplication.Registry.CreateApplication("BandMathX")
21   - if alcd_labeled_img == '':
22   - BandMathX.SetParameterStringList("il", [str(raw_img), str(diff_img)])
23   - expression = "(im2b1 == {diff_class_nb} ? im1b{band} : 0)".format(
24   - diff_class_nb = str(diff_class_nb), band = str(band_to_extract))
25   - else:
26   - BandMathX.SetParameterStringList("il", [str(raw_img), str(diff_img), str(alcd_labeled_img)])
27   - expression = "((im2b1 == {diff_class_nb} && im3b1 == {alcd_class_nb}) ? im1b{band} : 0)".format(
28   - diff_class_nb = str(diff_class_nb),
29   - alcd_class_nb = str(alcd_class_nb),
30   - band = str(band_to_extract))
  54 + #~ Superimpose = otbApplication.Registry.CreateApplication("Superimpose")
  55 + #~ Superimpose.SetParameterString("inr", str(diff_img))
  56 + #~ Superimpose.SetParameterString("inm", str(alcd_labeled_img))
  57 + #~ Superimpose.SetParameterString("interpolator", "nn")
  58 + #~ Superimpose.UpdateParameters()
  59 + #~ Superimpose.Execute()
  60 +
  61 +
  62 + #~ BandMathX = otbApplication.Registry.CreateApplication("BandMathX")
  63 +
  64 + #~ BandMathX.AddParameterStringList("il", str(diff_img))
  65 + #~ BandMathX.AddImageToParameterInputImageList("il", Superimpose.GetParameterOutputImage("out"))
  66 + #~ #im1 is the diff image, im2 the ALCD ref
  67 + #~ expression = "((im1b1 == {diff_class_nb} && im2b1 == {alcd_class_nb}) ? im1b1 : 0)".format(
  68 + #~ diff_class_nb = str(diff_class_nb),
  69 + #~ alcd_class_nb = str(alcd_class_nb))
31 70  
32   - BandMathX.SetParameterString("exp", expression)
  71 + #~ BandMathX.SetParameterString("exp", expression)
33 72  
34   - BandMathX.UpdateParameters()
35   - BandMathX.Execute()
  73 + #~ BandMathX.UpdateParameters()
  74 + #~ BandMathX.Execute()
36 75  
37   - # Pass directly to numpy array, doesnt write on disk
38   - extraction_output = BandMathX.GetImageAsNumpyArray('out')
39   - data = np.array(extraction_output)
40   -
41   - return data
  76 + #~ # Pass directly to numpy array, doesnt write on disk
  77 + #~ extraction_output = BandMathX.GetImageAsNumpyArray('out')
  78 + #~ data = np.array(extraction_output)
42 79  
  80 + #~ return data
  81 +
  82 +
  83 +
  84 +
  85 +
43 86  
44 87  
45   -def count_misclassified_class(diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb):
  88 +def get_alcd_label_from_diff(diff_img, diff_class_nb, alcd_labeled_img):
46 89 '''
47   - Count the number of pixels being at the same time of the
48   - diff_class_nb and alcd_class_nb.
49   - e.g. if diff_class_nb = 1 and alcd_class_nb = 2, it will returns the
50   - number of true positive and low_clouds pixels.
  90 + A FAIRE
51 91 '''
52 92  
53 93 Superimpose = otbApplication.Registry.CreateApplication("Superimpose")
... ... @@ -62,9 +102,10 @@ def count_misclassified_class(diff_img, diff_class_nb, alcd_labeled_img, alcd_cl
62 102  
63 103 BandMathX.AddParameterStringList("il", str(diff_img))
64 104 BandMathX.AddImageToParameterInputImageList("il", Superimpose.GetParameterOutputImage("out"))
65   - expression = "((im1b1 == {diff_class_nb} && im2b1 == {alcd_class_nb}) ? im1b1 : 0)".format(
66   - diff_class_nb = str(diff_class_nb),
67   - alcd_class_nb = str(alcd_class_nb))
  105 + #im1 is the diff image, im2 the ALCD ref
  106 + #~ expression = "(im1b1 == {diff_class_nb} ? im2b1 : -1)".format(
  107 + expression = "((im1b1 == {diff_class_nb} ? im2b1 : -1) + (im1b1 == 0 ? 1 : 0))".format(
  108 + diff_class_nb = str(diff_class_nb))
68 109  
69 110 BandMathX.SetParameterString("exp", expression)
70 111  
... ... @@ -73,21 +114,30 @@ def count_misclassified_class(diff_img, diff_class_nb, alcd_labeled_img, alcd_cl
73 114  
74 115 # Pass directly to numpy array, doesnt write on disk
75 116 extraction_output = BandMathX.GetImageAsNumpyArray('out')
76   - data = np.array(extraction_output)
  117 + alcd_masked = np.array(extraction_output).flatten().astype(int)
  118 +
  119 + class_numbers = collections.Counter(alcd_masked)
  120 +
  121 + nb_valid_pixels = len(alcd_masked)-class_numbers[0]
  122 +
77 123  
78   - return data
  124 + return class_numbers, nb_valid_pixels
  125 +
  126 +
79 127  
80 128  
81 129  
82   -def plot_confusion_repartition(comparison_parameters, processing_chain):
  130 +def plot_confusion_repartition(location, date, processing_chain, cas_alcd):
83 131 '''
84 132 Plot the repartition of classes for each cell of the confusion
85 133 matrix.
86 134 e.g. what percentage of low_clouds and clouds_shadows in false positive
87 135 '''
88   - main_dir = comparison_parameters["automatically_generated"]["main_dir"]
89   - location = comparison_parameters["automatically_generated"]["location"]
90   - date = comparison_parameters["automatically_generated"]["current_date"]
  136 + paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))
  137 + data_pcc_dir = paths_configuration["data_paths"]["data_pcc"]
  138 + tile = paths_configuration["tile_location"][location]
  139 + main_dir = op.join(data_pcc_dir, '{}_{}_{}'.format(location, tile, date))
  140 +
91 141  
92 142 class_nb = []
93 143 all_proportion = []
... ... @@ -104,7 +154,8 @@ def plot_confusion_repartition(comparison_parameters, processing_chain):
104 154 '6':'water', '7':'snow'}
105 155  
106 156 for confusion_mat_place in [1,2,3,4]:
107   - non_zero_class_nb, nb_per_class, proportion_in_image = get_miclassified_histograms(comparison_parameters, processing_chain, diff_class_nb = confusion_mat_place)
  157 + non_zero_class_nb, nb_per_class, proportion_in_image = get_miclassified_histograms(location,
  158 + date, cas_alcd, processing_chain, diff_class_nb = confusion_mat_place, display = True)
108 159 class_nb.append(non_zero_class_nb)
109 160 all_proportion.append(nb_per_class)
110 161 mat_conf_proportion.append(proportion_in_image)
... ... @@ -158,9 +209,9 @@ def plot_confusion_repartition(comparison_parameters, processing_chain):
158 209 plt.tight_layout()
159 210  
160 211 # save the figure
161   - #~ plt.show()
  212 + plt.show()
162 213 out_fig = op.join(main_dir, 'Statistics', 'features', 'repartition_matconf_{}_{}_{}.png'.format(processing_chain, location, date))
163   - plt.savefig(out_fig, bbox_inches='tight')
  214 + #~ plt.savefig(out_fig, bbox_inches='tight')
164 215  
165 216  
166 217  
... ... @@ -172,26 +223,79 @@ def plot_confusion_repartition(comparison_parameters, processing_chain):
172 223  
173 224  
174 225  
175   -def get_miclassified_histograms(comparison_parameters, processing_chain, diff_class_nb = 1, display = False):
  226 +def get_miclassified_histograms(location, date, cas_alcd, processing_chain, diff_class_nb = 1, display = False):
176 227 paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))
177 228  
178   - original_data = paths_configuration["data_paths"]["data_alcd"]
179   - location = str(comparison_parameters["automatically_generated"]["location"])
180   - date = str(comparison_parameters["automatically_generated"]["current_date"])
181   - location_dir = location + '_' + '*' + '_' + date
182   - raw_img = glob.glob(op.join(original_data, location_dir, 'In_data', 'Image', (location + '_bands.tif')))[0]
183   -
184   - main_dir = comparison_parameters["automatically_generated"]["main_dir"]
  229 + data_alcd_path = paths_configuration["data_paths"]["data_alcd"]
  230 + data_pcc_path = paths_configuration["data_paths"]["data_pcc"]
  231 + tile = paths_configuration["tile_location"][location]
  232 +
  233 + location_dir = location + '_' + tile + '_' + date
  234 + raw_img = glob.glob(op.join(data_alcd_path, location_dir, 'In_data', 'Image', (location + '_bands.tif')))[0]
  235 +
  236 + main_dir = op.join(data_pcc_path, '{}_{}_{}'.format(location, tile, date))
  237 +
  238 + alcd_labeled_img = op.join(main_dir, 'Original_data', 'labeled_img_original.tif')
  239 +
  240 + if cas_alcd == 1:
  241 + reference_sub_dir = 'ALCD_initial'
  242 + if processing_chain == 'maja':
  243 + processing_name = 'maja_ini'
  244 + elif processing_chain == 'sen2cor':
  245 + processing_name = 'sen2cor_ini'
  246 + elif processing_chain == 'fmask':
  247 + processing_name = 'fmask_ini'
  248 + elif cas_alcd == 2:
  249 + reference_sub_dir = 'ALCD_dilat'
  250 + if processing_chain == 'maja':
  251 + processing_name = 'maja_ini'
  252 + elif processing_chain == 'sen2cor':
  253 + processing_name = 'sen2cor_dilat'
  254 + elif processing_chain == 'fmask':
  255 + processing_name = 'fmask_dilat'
  256 + elif cas_alcd == 3:
  257 + reference_sub_dir = 'ALCD_initial'
  258 + if processing_chain == 'maja':
  259 + processing_name = 'maja_erode'
  260 + elif processing_chain == 'sen2cor':
  261 + processing_name = 'sen2cor_ini'
  262 + elif processing_chain == 'fmask':
  263 + processing_name = 'fmask_ini'
  264 +
  265 +
  266 +
  267 +
  268 + diff_img = glob.glob(op.join(main_dir, 'Binary_difference', reference_sub_dir, '*{}*.tif'.format(processing_name)))[0]
  269 +
  270 + numbers_dict, nb_valid_pixels = get_alcd_label_from_diff(diff_img, diff_class_nb, alcd_labeled_img)
  271 +
185 272  
186   - reference_sub_dir = comparison_parameters["processing"]["alcd_initial"]["sub_dir"]
  273 + non_zero_class_nb = []
  274 + nb_per_class = []
  275 +
  276 +
  277 + all_classes_nb = np.arange(1,8)
  278 + for alcd_class_nb in all_classes_nb:
  279 + if numbers_dict[alcd_class_nb] > 0:
  280 + non_zero_class_nb.append(alcd_class_nb)
  281 + nb_per_class.append(numbers_dict[alcd_class_nb])
  282 +
  283 +
  284 + total_samples = sum(nb_per_class)
  285 + nb_per_class = [float(n)/total_samples for n in nb_per_class]
  286 +
  287 + proportion_in_image = float(total_samples)/float(nb_valid_pixels)
  288 +
  289 + return non_zero_class_nb, nb_per_class, proportion_in_image
  290 +
  291 +
187 292  
188   - diff_img = glob.glob(op.join(main_dir, 'Binary_difference', reference_sub_dir, '*{}*.tif'.format(processing_chain)))[0]
189   - alcd_labeled_img = op.join(main_dir, 'Original_data', comparison_parameters["processing"]["alcd_initial"]["cloud_mask"])
190 293  
191 294 alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
192 295 '3':'high clouds', '4':'clouds shadows', '5':'land',
193 296 '6':'water', '7':'snow'}
194 297  
  298 + # Create the colors
195 299 all_classes_nb = np.arange(1,8)
196 300 colormap = create_colormap(color_pattern = 'labeled', class_call = 'number')
197 301  
... ... @@ -202,7 +306,9 @@ def get_miclassified_histograms(comparison_parameters, processing_chain, diff_cl
202 306 # get the data for each class
203 307 nb_per_class = []
204 308 for alcd_class_nb in all_classes_nb:
205   - data = count_misclassified_class(diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb)
  309 + #~ data = count_intersected_classes(diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb)
  310 + data = get_alcd_label_from_diff(diff_img, diff_class_nb, alcd_labeled_img)
  311 + return
206 312 data = data.flatten()
207 313 total_pixels_in_img = len(data)
208 314 # remove the zero values
... ... @@ -222,183 +328,10 @@ def get_miclassified_histograms(comparison_parameters, processing_chain, diff_cl
222 328  
223 329 proportion_in_image = float(total_samples)/float(total_pixels_in_img)
224 330  
225   - if display == False:
226   - return non_zero_class_nb, nb_per_class, proportion_in_image
227   - else:
228   - # create plot
229   - fig, ax = plt.subplots()
230   - ax.yaxis.grid(True) # put horizontal grid
231   - nb_alcd_classes = len(nb_per_class)
232   - index = np.arange(nb_alcd_classes)
233   -
234   - bar_width = 0.25+1./(nb_alcd_classes+2)
235   - opacity = 0.8
236   -
237   - # One rect per metric
238   - for k in range(len(nb_per_class)):
239   - rects = plt.bar(index[k], nb_per_class[k], bar_width,
240   - alpha=opacity,
241   - color=non_zero_colors[k],
242   - label = non_zero_names[k])
243   -
244   - plt.xlabel('True class')
245   - plt.ylabel('Proportion')
246   -
247   - location = comparison_parameters["automatically_generated"]["location"]
248   - date = comparison_parameters["automatically_generated"]["current_date"]
249   -
250   - plt.ylim(0.,1.)
251   -
252   - dict_diff_class_nb={1:'True Positive', 2:'False Negative', 3:'False Positive', 4:'True Negative'}
253   -
254   -
255   - plt.title('Repartition in the {}\n{:.02f}% of all pixels\n{}, {}'.format(
256   - dict_diff_class_nb[diff_class_nb], proportion_in_image, location, date))
257   -
258   - plt.xticks(index + bar_width/2*(nb_alcd_classes/2), (non_zero_names))
259   - plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
260   - #~ plt.legend()
261   -
262   - plt.tight_layout()
263   - plt.show()
264   -
265   - dict_diff_class_nb_short={1:'TP', 2:'FN', 3:'FP', 4:'TN'}
266   - out_fig = op.join(main_dir, 'Statistics',
267   - 'misclassified_details_{}_{}_{}_{}.png'.format(dict_diff_class_nb_short[diff_class_nb],location, date, processing_chain))
268   - print(out_fig)
269   - #~ plt.savefig(out_fig, bbox_inches='tight')
270   -
271   -
272   -
273   -
274   -
275   -
276 331  
277 332  
  333 + return non_zero_class_nb, nb_per_class, proportion_in_image
278 334  
279   -
280   -
281   -def get_normalized_histo(comparison_parameters, processing_chain, diff_class_nb, alcd_class_nb, band, min_range = None, max_range = None):
282   - '''
283   - Return a normalized histogram from a class and a feature nb
284   - The range is limited to a percentage of the total data
285   -
286   - TODO : change the range parameters, for the features 'time' especially
287   - Change the 'original_data' directory
288   - '''
289   - paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))
290   - #~ original_data = '/mnt/data/home/baetensl/clouds_detection_git/Data_ALCD'
291   - original_data = paths_configuration["data_paths"]["data_alcd"]
292   - location = str(comparison_parameters["automatically_generated"]["location"])
293   - location_dir = location + '_' + '*' + '_' + str(comparison_parameters["automatically_generated"]["current_date"])
294   - raw_img = glob.glob(op.join(original_data, location_dir, 'In_data', 'Image', (location + '_bands.tif')))[0]
295   - main_dir = comparison_parameters["automatically_generated"]["main_dir"]
296   -
297   - diff_img = glob.glob(op.join(main_dir, 'Binary_difference', 'ALCD_initial', '*{}*.tif'.format(processing_chain)))[0]
298   - alcd_labeled_img = op.join(main_dir, 'Original_data', comparison_parameters["processing"]["alcd_initial"]["cloud_mask"])
299   -
300   -
301   - alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
302   - '3':'high clouds', '4':'clouds shadows', '5':'land',
303   - '6':'water', '7':'snow'}
304   -
305   - # get the data for each class
306   - data = extract_class_band(raw_img, band, diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb)
307   -
308   - # remove the zero values
309   - data = data[data != 0]
310   - data = data[np.isnan(data) == False]
311   -
312   - if len(data) == 0:
313   - curve = []
314   - bin_centers = []
315   - return curve, bin_centers
316   -
317   - # Removes the tails of the dataset
318   - offset_percentage = 0.2
319   - offset = np.floor(float(offset_percentage)/100*len(data))
320   -
321   - if min_range == None:
322   - min_range = data[data.argsort()[offset]]
323   - if max_range == None:
324   - max_range = data[data.argsort()[-offset]]
325   -
326   - # to have a consistent step between classes for a given feature
327   - bins = np.arange(start = min_range, stop = max_range, step = 50)
328   -
329   - # force the bins to be in good enough number
330   - if len(bins) > 1000 or len(bins)<40:
331   - bins = 50
332   -
333   -
334   - # compute the histogram
335   - histo, bin_edges = np.histogram(data, range = (min_range, max_range), bins = bins)
336   - histo = [float(h) for h in histo]
337   - total_pixels_number = sum(histo)
338   -
339   - # set the max value of the histogram to 1
340   - curve = np.divide(histo, np.max(histo))
341   - # takes the center of the bins instead of the edges, better for curve style
342   - # instead of real histogram
343   - bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2
344   -
345   - return curve, bin_centers
346   -
347   -
348   -def extract_bands_per_class(comparison_parameters, processing_chain, alcd_class_nb):
349   - '''
350   - Extract the bands for all the classes and plot an histogram of every one
351   -
352   - DEPRECATED, should be improved !
353   - '''
354   - main_dir = comparison_parameters["automatically_generated"]["main_dir"]
355   - stats_dir = op.join(main_dir, 'Statistics')
356   -
357   - all_diff_class_nb = [1,2,3,4]
358   - features = np.arange(1,15)
359   - #~ features = np.arange(1,3)
360   -
361   - alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
362   - '3':'high clouds', '4':'clouds shadows', '5':'land',
363   - '6':'water', '7':'snow'}
364   -
365   - all_class_names = ['TP', 'FN', 'FP', 'TN']
366   - colormap = create_colormap(class_call = 'number')
367   -
368   - # Execution of the histograms
369   - for feat in features:
370   - curves = []
371   - bins = []
372   - colors = []
373   -
374   - for diff_class_nb in all_diff_class_nb:
375   - #~ curve, bin_centers = get_normalized_histo(comparison_parameters, diff_class_nb, band)
376   - curve, bin_centers = get_normalized_histo(comparison_parameters, processing_chain, diff_class_nb, alcd_class_nb, feat)
377   - curves.append(curve)
378   - bins.append(bin_centers)
379   - colors.append(colormap[str(diff_class_nb)])
380   -
381   - plt.figure()
382   - # first the fill between, then the solid curve
383   - for k in range(len(curves)):
384   - if len(curves[k]) > 0:
385   - plt.fill_between(bins[k], curves[k], color = colors[k], alpha = 0.1, linewidth = 0)
386   -
387   - for k in range(len(curves)):
388   - if len(curves[k]) > 0:
389   - linestyle = '-'
390   - plt.plot(bins[k], curves[k], color = colors[k],
391   - linewidth = 1.5, label = all_class_names[k], linestyle = linestyle)
392   -
393   -
394   - plt.title('True class: {}\nFeature {}'.format(alcd_class_dict[str(alcd_class_nb)], feat))
395   - plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
396   - plt.show()
397   - out_fig = op.join(main_dir, 'Statistics', 'features', 'class_{}_feature_{}.png'.format(alcd_class_nb, feat))
398   - #~ plt.savefig(out_fig, bbox_inches='tight')
399   -
400   - #~ plt.show(block = False)
401   -
402 335  
403 336  
404 337  
... ... @@ -450,7 +383,16 @@ def create_colormap(color_pattern = &#39;diff&#39;, class_call = &#39;number&#39;):
450 383  
451 384  
452 385 def main():
453   - comparison_parameters = json.load(open(op.join('parameters_files','comparison_parameters.json')))
  386 + #~ comparison_parameters = json.load(open(op.join('parameters_files','comparison_parameters.json')))
  387 +
  388 + location = 'Arles'
  389 + date = '20171002'
  390 + processing_chain = 'maja'
  391 + cas_alcd = 1
  392 + plot_confusion_repartition(location, date, processing_chain, cas_alcd)
  393 +
  394 + return
  395 +
454 396  
455 397 for processing_chain in ['maja', 'sen2cor', 'fmask']:
456 398 try:
... ...
PCC/pixels_features_analysis_old.py 0 → 100644
... ... @@ -0,0 +1,477 @@
  1 +#!/usr/bin/python
  2 +# -*- coding: utf-8 -*-
  3 +
  4 +import os.path as op
  5 +import glob
  6 +import numpy as np
  7 +import matplotlib.pyplot as plt
  8 +import json
  9 +import otbApplication
  10 +from matplotlib.lines import Line2D
  11 +
  12 +def extract_class_band(raw_img, band_to_extract, diff_img, diff_class_nb, alcd_labeled_img='', alcd_class_nb = 0):
  13 + '''
  14 + From the difference image (e.g. diff_binary) between the processing
  15 + chain and the ALCD, and the ALCD labeled, extract the data for the pixels
  16 + in the class_nb for both images
  17 + Mostly used to plot histograms for each class and band
  18 + '''
  19 +
  20 + BandMathX = otbApplication.Registry.CreateApplication("BandMathX")
  21 + if alcd_labeled_img == '':
  22 + BandMathX.SetParameterStringList("il", [str(raw_img), str(diff_img)])
  23 + expression = "(im2b1 == {diff_class_nb} ? im1b{band} : 0)".format(
  24 + diff_class_nb = str(diff_class_nb), band = str(band_to_extract))
  25 + else:
  26 + BandMathX.SetParameterStringList("il", [str(raw_img), str(diff_img), str(alcd_labeled_img)])
  27 + expression = "((im2b1 == {diff_class_nb} && im3b1 == {alcd_class_nb}) ? im1b{band} : 0)".format(
  28 + diff_class_nb = str(diff_class_nb),
  29 + alcd_class_nb = str(alcd_class_nb),
  30 + band = str(band_to_extract))
  31 +
  32 + BandMathX.SetParameterString("exp", expression)
  33 +
  34 + BandMathX.UpdateParameters()
  35 + BandMathX.Execute()
  36 +
  37 + # Pass directly to numpy array, doesnt write on disk
  38 + extraction_output = BandMathX.GetImageAsNumpyArray('out')
  39 + data = np.array(extraction_output)
  40 +
  41 + return data
  42 +
  43 +
  44 +
  45 +def count_misclassified_class(diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb):
  46 + '''
  47 + Count the number of pixels being at the same time of the
  48 + diff_class_nb and alcd_class_nb.
  49 + e.g. if diff_class_nb = 1 and alcd_class_nb = 2, it will returns the
  50 + number of true positive and low_clouds pixels.
  51 + '''
  52 +
  53 + Superimpose = otbApplication.Registry.CreateApplication("Superimpose")
  54 + Superimpose.SetParameterString("inr", str(diff_img))
  55 + Superimpose.SetParameterString("inm", str(alcd_labeled_img))
  56 + Superimpose.SetParameterString("interpolator", "nn")
  57 + Superimpose.UpdateParameters()
  58 + Superimpose.Execute()
  59 +
  60 +
  61 + BandMathX = otbApplication.Registry.CreateApplication("BandMathX")
  62 +
  63 + BandMathX.AddParameterStringList("il", str(diff_img))
  64 + BandMathX.AddImageToParameterInputImageList("il", Superimpose.GetParameterOutputImage("out"))
  65 + expression = "((im1b1 == {diff_class_nb} && im2b1 == {alcd_class_nb}) ? im1b1 : 0)".format(
  66 + diff_class_nb = str(diff_class_nb),
  67 + alcd_class_nb = str(alcd_class_nb))
  68 +
  69 + BandMathX.SetParameterString("exp", expression)
  70 +
  71 + BandMathX.UpdateParameters()
  72 + BandMathX.Execute()
  73 +
  74 + # Pass directly to numpy array, doesnt write on disk
  75 + extraction_output = BandMathX.GetImageAsNumpyArray('out')
  76 + data = np.array(extraction_output)
  77 +
  78 + return data
  79 +
  80 +
  81 +
  82 +def plot_confusion_repartition(comparison_parameters, processing_chain):
  83 + '''
  84 + Plot the repartition of classes for each cell of the confusion
  85 + matrix.
  86 + e.g. what percentage of low_clouds and clouds_shadows in false positive
  87 + '''
  88 + paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))
  89 +
  90 + main_dir =
  91 + main_dir = comparison_parameters["automatically_generated"]["main_dir"]
  92 + location = comparison_parameters["automatically_generated"]["location"]
  93 + date = comparison_parameters["automatically_generated"]["current_date"]
  94 +
  95 + class_nb = []
  96 + all_proportion = []
  97 + mat_conf_proportion = []
  98 + all_indexes = [0]
  99 + all_colors = []
  100 + all_classes_proportion = []
  101 + all_labels = []
  102 + colormap = create_colormap(color_pattern = 'labeled', class_call = 'number')
  103 + ticks_positions = []
  104 +
  105 + alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
  106 + '3':'high clouds', '4':'clouds shadows', '5':'land',
  107 + '6':'water', '7':'snow'}
  108 +
  109 + for confusion_mat_place in [1,2,3,4]:
  110 + non_zero_class_nb, nb_per_class, proportion_in_image = get_miclassified_histograms(comparison_parameters, processing_chain, diff_class_nb = confusion_mat_place)
  111 + class_nb.append(non_zero_class_nb)
  112 + all_proportion.append(nb_per_class)
  113 + mat_conf_proportion.append(proportion_in_image)
  114 +
  115 + all_classes_proportion.extend(nb_per_class)
  116 +
  117 + for clas in non_zero_class_nb:
  118 + all_colors.append(colormap[str(clas)])
  119 + all_indexes.append(all_indexes[-1]+1)
  120 +
  121 + ticks_positions.append(float(all_indexes[-(len(non_zero_class_nb)+1)]+all_indexes[-1])/2)
  122 + all_indexes[-1] = all_indexes[-1]+2
  123 +
  124 + all_indexes.pop()
  125 +
  126 + # create plot
  127 + fig, ax = plt.subplots()
  128 + ax.yaxis.grid(True) # put horizontal grid
  129 +
  130 + # ticks creation
  131 + ticks_labels = ['TP ({:.01f}%)'.format(mat_conf_proportion[0]*100),
  132 + 'FN ({:.01f}%)'.format(mat_conf_proportion[1]*100),
  133 + 'FP ({:.01f}%)'.format(mat_conf_proportion[2]*100),
  134 + 'TN ({:.01f}%)'.format(mat_conf_proportion[3]*100)]
  135 +
  136 + plt.xticks(ticks_positions, ticks_labels)
  137 +
  138 + # legend customization
  139 + all_legend_nb = list(set([item for sublist in class_nb for item in sublist]))
  140 + all_legend_names = [alcd_class_dict[str(clas)] for clas in all_legend_nb]
  141 + all_legend_colors = [colormap[str(clas)] for clas in all_legend_nb]
  142 + custom_lines = [Line2D([0], [0], color=all_legend_colors[k], lw=4) for k in range(len(all_legend_colors))]
  143 +
  144 + plt.legend(custom_lines, all_legend_names, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
  145 +
  146 + # plot the data
  147 + bar_width = 1.
  148 + opacity = 0.8
  149 +
  150 + rects = plt.bar(all_indexes, all_classes_proportion, bar_width,
  151 + alpha=opacity,
  152 + color=all_colors,
  153 + label = all_labels)
  154 +
  155 +
  156 + # axes modification
  157 + plt.xlabel('Confusion matrix stat. (% of total pixels)')
  158 + plt.ylabel('Proportion among the stat')
  159 + plt.ylim(0.,1.)
  160 + plt.title('Repartition of the true classes in the confusion matrix \n for {}, {}, {}'.format(processing_chain, location, date))
  161 + plt.tight_layout()
  162 +
  163 + # save the figure
  164 + #~ plt.show()
  165 + out_fig = op.join(main_dir, 'Statistics', 'features', 'repartition_matconf_{}_{}_{}.png'.format(processing_chain, location, date))
  166 + plt.savefig(out_fig, bbox_inches='tight')
  167 +
  168 +
  169 +
  170 +
  171 +
  172 +
  173 +
  174 +
  175 +
  176 +
  177 +
  178 +def get_miclassified_histograms(comparison_parameters, processing_chain, diff_class_nb = 1, display = False):
  179 + paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))
  180 +
  181 + original_data = paths_configuration["data_paths"]["data_alcd"]
  182 + location = str(comparison_parameters["automatically_generated"]["location"])
  183 + date = str(comparison_parameters["automatically_generated"]["current_date"])
  184 + location_dir = location + '_' + '*' + '_' + date
  185 + raw_img = glob.glob(op.join(original_data, location_dir, 'In_data', 'Image', (location + '_bands.tif')))[0]
  186 +
  187 + main_dir = comparison_parameters["automatically_generated"]["main_dir"]
  188 +
  189 + reference_sub_dir = comparison_parameters["processing"]["alcd_initial"]["sub_dir"]
  190 +
  191 + diff_img = glob.glob(op.join(main_dir, 'Binary_difference', reference_sub_dir, '*{}*.tif'.format(processing_chain)))[0]
  192 + alcd_labeled_img = op.join(main_dir, 'Original_data', comparison_parameters["processing"]["alcd_initial"]["cloud_mask"])
  193 +
  194 + alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
  195 + '3':'high clouds', '4':'clouds shadows', '5':'land',
  196 + '6':'water', '7':'snow'}
  197 +
  198 + all_classes_nb = np.arange(1,8)
  199 + colormap = create_colormap(color_pattern = 'labeled', class_call = 'number')
  200 +
  201 + non_zero_names = []
  202 + non_zero_colors = []
  203 + non_zero_class_nb = []
  204 +
  205 + # get the data for each class
  206 + nb_per_class = []
  207 + for alcd_class_nb in all_classes_nb:
  208 + data = count_misclassified_class(diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb)
  209 + data = data.flatten()
  210 + total_pixels_in_img = len(data)
  211 + # remove the zero values
  212 + data = data[data != 0]
  213 + data = data[np.isnan(data) == False]
  214 +
  215 + nb_of_samples = len(data)
  216 + if nb_of_samples != 0:
  217 + nb_per_class.append(nb_of_samples)
  218 + non_zero_names.append(alcd_class_dict[str(alcd_class_nb)])
  219 + non_zero_colors.append(colormap[str(alcd_class_nb)])
  220 + non_zero_class_nb.append(alcd_class_nb)
  221 +
  222 +
  223 + total_samples = sum(nb_per_class)
  224 + nb_per_class = [float(n)/total_samples for n in nb_per_class]
  225 +
  226 + proportion_in_image = float(total_samples)/float(total_pixels_in_img)
  227 +
  228 + if display == False:
  229 + return non_zero_class_nb, nb_per_class, proportion_in_image
  230 + else:
  231 + # create plot
  232 + fig, ax = plt.subplots()
  233 + ax.yaxis.grid(True) # put horizontal grid
  234 + nb_alcd_classes = len(nb_per_class)
  235 + index = np.arange(nb_alcd_classes)
  236 +
  237 + bar_width = 0.25+1./(nb_alcd_classes+2)
  238 + opacity = 0.8
  239 +
  240 + # One rect per metric
  241 + for k in range(len(nb_per_class)):
  242 + rects = plt.bar(index[k], nb_per_class[k], bar_width,
  243 + alpha=opacity,
  244 + color=non_zero_colors[k],
  245 + label = non_zero_names[k])
  246 +
  247 + plt.xlabel('True class')
  248 + plt.ylabel('Proportion')
  249 +
  250 + location = comparison_parameters["automatically_generated"]["location"]
  251 + date = comparison_parameters["automatically_generated"]["current_date"]
  252 +
  253 + plt.ylim(0.,1.)
  254 +
  255 + dict_diff_class_nb={1:'True Positive', 2:'False Negative', 3:'False Positive', 4:'True Negative'}
  256 +
  257 +
  258 + plt.title('Repartition in the {}\n{:.02f}% of all pixels\n{}, {}'.format(
  259 + dict_diff_class_nb[diff_class_nb], proportion_in_image, location, date))
  260 +
  261 + plt.xticks(index + bar_width/2*(nb_alcd_classes/2), (non_zero_names))
  262 + plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
  263 + #~ plt.legend()
  264 +
  265 + plt.tight_layout()
  266 + plt.show()
  267 +
  268 + dict_diff_class_nb_short={1:'TP', 2:'FN', 3:'FP', 4:'TN'}
  269 + out_fig = op.join(main_dir, 'Statistics',
  270 + 'misclassified_details_{}_{}_{}_{}.png'.format(dict_diff_class_nb_short[diff_class_nb],location, date, processing_chain))
  271 + print(out_fig)
  272 + #~ plt.savefig(out_fig, bbox_inches='tight')
  273 +
  274 +
  275 +
  276 +
  277 +
  278 +
  279 +
  280 +
  281 +
  282 +
  283 +
  284 +def get_normalized_histo(comparison_parameters, processing_chain, diff_class_nb, alcd_class_nb, band, min_range = None, max_range = None):
  285 + '''
  286 + Return a normalized histogram from a class and a feature nb
  287 + The range is limited to a percentage of the total data
  288 +
  289 + TODO : change the range parameters, for the features 'time' especially
  290 + Change the 'original_data' directory
  291 + '''
  292 + paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))
  293 + #~ original_data = '/mnt/data/home/baetensl/clouds_detection_git/Data_ALCD'
  294 + original_data = paths_configuration["data_paths"]["data_alcd"]
  295 + location = str(comparison_parameters["automatically_generated"]["location"])
  296 + location_dir = location + '_' + '*' + '_' + str(comparison_parameters["automatically_generated"]["current_date"])
  297 + raw_img = glob.glob(op.join(original_data, location_dir, 'In_data', 'Image', (location + '_bands.tif')))[0]
  298 + main_dir = comparison_parameters["automatically_generated"]["main_dir"]
  299 +
  300 + diff_img = glob.glob(op.join(main_dir, 'Binary_difference', 'ALCD_initial', '*{}*.tif'.format(processing_chain)))[0]
  301 + alcd_labeled_img = op.join(main_dir, 'Original_data', comparison_parameters["processing"]["alcd_initial"]["cloud_mask"])
  302 +
  303 +
  304 + alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
  305 + '3':'high clouds', '4':'clouds shadows', '5':'land',
  306 + '6':'water', '7':'snow'}
  307 +
  308 + # get the data for each class
  309 + data = extract_class_band(raw_img, band, diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb)
  310 +
  311 + # remove the zero values
  312 + data = data[data != 0]
  313 + data = data[np.isnan(data) == False]
  314 +
  315 + if len(data) == 0:
  316 + curve = []
  317 + bin_centers = []
  318 + return curve, bin_centers
  319 +
  320 + # Removes the tails of the dataset
  321 + offset_percentage = 0.2
  322 + offset = np.floor(float(offset_percentage)/100*len(data))
  323 +
  324 + if min_range == None:
  325 + min_range = data[data.argsort()[offset]]
  326 + if max_range == None:
  327 + max_range = data[data.argsort()[-offset]]
  328 +
  329 + # to have a consistent step between classes for a given feature
  330 + bins = np.arange(start = min_range, stop = max_range, step = 50)
  331 +
  332 + # force the bins to be in good enough number
  333 + if len(bins) > 1000 or len(bins)<40:
  334 + bins = 50
  335 +
  336 +
  337 + # compute the histogram
  338 + histo, bin_edges = np.histogram(data, range = (min_range, max_range), bins = bins)
  339 + histo = [float(h) for h in histo]
  340 + total_pixels_number = sum(histo)
  341 +
  342 + # set the max value of the histogram to 1
  343 + curve = np.divide(histo, np.max(histo))
  344 + # takes the center of the bins instead of the edges, better for curve style
  345 + # instead of real histogram
  346 + bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2
  347 +
  348 + return curve, bin_centers
  349 +
  350 +
  351 +def extract_bands_per_class(comparison_parameters, processing_chain, alcd_class_nb):
  352 + '''
  353 + Extract the bands for all the classes and plot an histogram of every one
  354 +
  355 + DEPRECATED, should be improved !
  356 + '''
  357 + main_dir = comparison_parameters["automatically_generated"]["main_dir"]
  358 + stats_dir = op.join(main_dir, 'Statistics')
  359 +
  360 + all_diff_class_nb = [1,2,3,4]
  361 + features = np.arange(1,15)
  362 + #~ features = np.arange(1,3)
  363 +
  364 + alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
  365 + '3':'high clouds', '4':'clouds shadows', '5':'land',
  366 + '6':'water', '7':'snow'}
  367 +
  368 + all_class_names = ['TP', 'FN', 'FP', 'TN']
  369 + colormap = create_colormap(class_call = 'number')
  370 +
  371 + # Execution of the histograms
  372 + for feat in features:
  373 + curves = []
  374 + bins = []
  375 + colors = []
  376 +
  377 + for diff_class_nb in all_diff_class_nb:
  378 + #~ curve, bin_centers = get_normalized_histo(comparison_parameters, diff_class_nb, band)
  379 + curve, bin_centers = get_normalized_histo(comparison_parameters, processing_chain, diff_class_nb, alcd_class_nb, feat)
  380 + curves.append(curve)
  381 + bins.append(bin_centers)
  382 + colors.append(colormap[str(diff_class_nb)])
  383 +
  384 + plt.figure()
  385 + # first the fill between, then the solid curve
  386 + for k in range(len(curves)):
  387 + if len(curves[k]) > 0:
  388 + plt.fill_between(bins[k], curves[k], color = colors[k], alpha = 0.1, linewidth = 0)
  389 +
  390 + for k in range(len(curves)):
  391 + if len(curves[k]) > 0:
  392 + linestyle = '-'
  393 + plt.plot(bins[k], curves[k], color = colors[k],
  394 + linewidth = 1.5, label = all_class_names[k], linestyle = linestyle)
  395 +
  396 +
  397 + plt.title('True class: {}\nFeature {}'.format(alcd_class_dict[str(alcd_class_nb)], feat))
  398 + plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
  399 + plt.show()
  400 + out_fig = op.join(main_dir, 'Statistics', 'features', 'class_{}_feature_{}.png'.format(alcd_class_nb, feat))
  401 + #~ plt.savefig(out_fig, bbox_inches='tight')
  402 +
  403 + #~ plt.show(block = False)
  404 +
  405 +
  406 +
  407 +
  408 +def create_colormap(color_pattern = 'diff', class_call = 'number'):
  409 + '''
  410 + Create a colormap for each class
  411 + '''
  412 +
  413 + class_color = {}
  414 +
  415 + if color_pattern == 'diff':
  416 + class_color["full_image"] = np.divide([100., 0., 255.], 255.)
  417 + class_color["null_value"] = np.divide([0., 0., 0., 0.], 255.)
  418 + class_color["TP"] = np.divide([10., 192., 56.], 255.)
  419 + class_color["FN"] = np.divide([255., 0., 0.], 255.)
  420 + class_color["FP"] = np.divide([192., 52., 193.], 255.)
  421 + class_color["TN"] = np.divide([10., 10., 10.], 255.)
  422 +
  423 + if class_call == 'number':
  424 + class_color["-1"] = class_color["full_image"]
  425 + class_color["0"] = class_color["null_value"]
  426 + class_color["1"] = class_color["TP"]
  427 + class_color["2"] = class_color["FN"]
  428 + class_color["3"] = class_color["FP"]
  429 + class_color["4"] = class_color["TN"]
  430 + else:
  431 + class_color["full_image"] = np.divide([100., 0., 255.], 255.)
  432 + class_color["null_value"] = np.divide([195., 255., 42.], 255.)
  433 + class_color["background"] = np.divide([187., 83., 58.], 255.)
  434 + class_color["low_clouds"] = np.divide([170., 170., 170.], 255.)
  435 + class_color["high_clouds"] = np.divide([255., 123., 184.], 255.)
  436 + class_color["clouds_shadows"] = np.divide([10., 10., 10.], 255.)
  437 + class_color["land"] = np.divide([0., 151., 56.], 255.)
  438 + class_color["water"] = np.divide([0., 142., 208.], 255.)
  439 + class_color["snow"] = np.divide([77., 77., 77.], 255.)
  440 + if class_call == 'number':
  441 + class_color["-1"] = class_color["full_image"]
  442 + class_color["0"] = class_color["null_value"]
  443 + class_color["1"] = class_color["background"]
  444 + class_color["2"] = class_color["low_clouds"]
  445 + class_color["3"] = class_color["high_clouds"]
  446 + class_color["4"] = class_color["clouds_shadows"]
  447 + class_color["5"] = class_color["land"]
  448 + class_color["6"] = class_color["water"]
  449 + class_color["7"] = class_color["snow"]
  450 +
  451 +
  452 + return class_color
  453 +
  454 +
  455 +def main():
  456 + comparison_parameters = json.load(open(op.join('parameters_files','comparison_parameters.json')))
  457 +
  458 + location = 'Arles'
  459 + date = '20171002'
  460 +
  461 +
  462 + for processing_chain in ['maja', 'sen2cor', 'fmask']:
  463 + try:
  464 + plot_confusion_repartition(comparison_parameters, processing_chain)
  465 + except:
  466 + pass
  467 +
  468 + return