Commit 1de4b8b9a169a34bb46f53e64e0c193d7e85101f

Authored by Louis Baetens
1 parent 669bf433
Exists in master

RMV old pixels features

Showing 1 changed file with 0 additions and 477 deletions   Show diff stats
PCC/pixels_features_analysis_old.py
@@ -1,477 +0,0 @@ @@ -1,477 +0,0 @@
1 -#!/usr/bin/python  
2 -# -*- coding: utf-8 -*-  
3 -  
4 -import os.path as op  
5 -import glob  
6 -import numpy as np  
7 -import matplotlib.pyplot as plt  
8 -import json  
9 -import otbApplication  
10 -from matplotlib.lines import Line2D  
11 -  
12 -def extract_class_band(raw_img, band_to_extract, diff_img, diff_class_nb, alcd_labeled_img='', alcd_class_nb = 0):  
13 - '''  
14 - From the difference image (e.g. diff_binary) between the processing  
15 - chain and the ALCD, and the ALCD labeled, extract the data for the pixels  
16 - in the class_nb for both images  
17 - Mostly used to plot histograms for each class and band  
18 - '''  
19 -  
20 - BandMathX = otbApplication.Registry.CreateApplication("BandMathX")  
21 - if alcd_labeled_img == '':  
22 - BandMathX.SetParameterStringList("il", [str(raw_img), str(diff_img)])  
23 - expression = "(im2b1 == {diff_class_nb} ? im1b{band} : 0)".format(  
24 - diff_class_nb = str(diff_class_nb), band = str(band_to_extract))  
25 - else:  
26 - BandMathX.SetParameterStringList("il", [str(raw_img), str(diff_img), str(alcd_labeled_img)])  
27 - expression = "((im2b1 == {diff_class_nb} && im3b1 == {alcd_class_nb}) ? im1b{band} : 0)".format(  
28 - diff_class_nb = str(diff_class_nb),  
29 - alcd_class_nb = str(alcd_class_nb),  
30 - band = str(band_to_extract))  
31 -  
32 - BandMathX.SetParameterString("exp", expression)  
33 -  
34 - BandMathX.UpdateParameters()  
35 - BandMathX.Execute()  
36 -  
37 - # Pass directly to numpy array, doesnt write on disk  
38 - extraction_output = BandMathX.GetImageAsNumpyArray('out')  
39 - data = np.array(extraction_output)  
40 -  
41 - return data  
42 -  
43 -  
44 -  
45 -def count_misclassified_class(diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb):  
46 - '''  
47 - Count the number of pixels being at the same time of the  
48 - diff_class_nb and alcd_class_nb.  
49 - e.g. if diff_class_nb = 1 and alcd_class_nb = 2, it will returns the  
50 - number of true positive and low_clouds pixels.  
51 - '''  
52 -  
53 - Superimpose = otbApplication.Registry.CreateApplication("Superimpose")  
54 - Superimpose.SetParameterString("inr", str(diff_img))  
55 - Superimpose.SetParameterString("inm", str(alcd_labeled_img))  
56 - Superimpose.SetParameterString("interpolator", "nn")  
57 - Superimpose.UpdateParameters()  
58 - Superimpose.Execute()  
59 -  
60 -  
61 - BandMathX = otbApplication.Registry.CreateApplication("BandMathX")  
62 -  
63 - BandMathX.AddParameterStringList("il", str(diff_img))  
64 - BandMathX.AddImageToParameterInputImageList("il", Superimpose.GetParameterOutputImage("out"))  
65 - expression = "((im1b1 == {diff_class_nb} && im2b1 == {alcd_class_nb}) ? im1b1 : 0)".format(  
66 - diff_class_nb = str(diff_class_nb),  
67 - alcd_class_nb = str(alcd_class_nb))  
68 -  
69 - BandMathX.SetParameterString("exp", expression)  
70 -  
71 - BandMathX.UpdateParameters()  
72 - BandMathX.Execute()  
73 -  
74 - # Pass directly to numpy array, doesnt write on disk  
75 - extraction_output = BandMathX.GetImageAsNumpyArray('out')  
76 - data = np.array(extraction_output)  
77 -  
78 - return data  
79 -  
80 -  
81 -  
82 -def plot_confusion_repartition(comparison_parameters, processing_chain):  
83 - '''  
84 - Plot the repartition of classes for each cell of the confusion  
85 - matrix.  
86 - e.g. what percentage of low_clouds and clouds_shadows in false positive  
87 - '''  
88 - paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))  
89 -  
90 - main_dir =  
91 - main_dir = comparison_parameters["automatically_generated"]["main_dir"]  
92 - location = comparison_parameters["automatically_generated"]["location"]  
93 - date = comparison_parameters["automatically_generated"]["current_date"]  
94 -  
95 - class_nb = []  
96 - all_proportion = []  
97 - mat_conf_proportion = []  
98 - all_indexes = [0]  
99 - all_colors = []  
100 - all_classes_proportion = []  
101 - all_labels = []  
102 - colormap = create_colormap(color_pattern = 'labeled', class_call = 'number')  
103 - ticks_positions = []  
104 -  
105 - alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',  
106 - '3':'high clouds', '4':'clouds shadows', '5':'land',  
107 - '6':'water', '7':'snow'}  
108 -  
109 - for confusion_mat_place in [1,2,3,4]:  
110 - non_zero_class_nb, nb_per_class, proportion_in_image = get_miclassified_histograms(comparison_parameters, processing_chain, diff_class_nb = confusion_mat_place)  
111 - class_nb.append(non_zero_class_nb)  
112 - all_proportion.append(nb_per_class)  
113 - mat_conf_proportion.append(proportion_in_image)  
114 -  
115 - all_classes_proportion.extend(nb_per_class)  
116 -  
117 - for clas in non_zero_class_nb:  
118 - all_colors.append(colormap[str(clas)])  
119 - all_indexes.append(all_indexes[-1]+1)  
120 -  
121 - ticks_positions.append(float(all_indexes[-(len(non_zero_class_nb)+1)]+all_indexes[-1])/2)  
122 - all_indexes[-1] = all_indexes[-1]+2  
123 -  
124 - all_indexes.pop()  
125 -  
126 - # create plot  
127 - fig, ax = plt.subplots()  
128 - ax.yaxis.grid(True) # put horizontal grid  
129 -  
130 - # ticks creation  
131 - ticks_labels = ['TP ({:.01f}%)'.format(mat_conf_proportion[0]*100),  
132 - 'FN ({:.01f}%)'.format(mat_conf_proportion[1]*100),  
133 - 'FP ({:.01f}%)'.format(mat_conf_proportion[2]*100),  
134 - 'TN ({:.01f}%)'.format(mat_conf_proportion[3]*100)]  
135 -  
136 - plt.xticks(ticks_positions, ticks_labels)  
137 -  
138 - # legend customization  
139 - all_legend_nb = list(set([item for sublist in class_nb for item in sublist]))  
140 - all_legend_names = [alcd_class_dict[str(clas)] for clas in all_legend_nb]  
141 - all_legend_colors = [colormap[str(clas)] for clas in all_legend_nb]  
142 - custom_lines = [Line2D([0], [0], color=all_legend_colors[k], lw=4) for k in range(len(all_legend_colors))]  
143 -  
144 - plt.legend(custom_lines, all_legend_names, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)  
145 -  
146 - # plot the data  
147 - bar_width = 1.  
148 - opacity = 0.8  
149 -  
150 - rects = plt.bar(all_indexes, all_classes_proportion, bar_width,  
151 - alpha=opacity,  
152 - color=all_colors,  
153 - label = all_labels)  
154 -  
155 -  
156 - # axes modification  
157 - plt.xlabel('Confusion matrix stat. (% of total pixels)')  
158 - plt.ylabel('Proportion among the stat')  
159 - plt.ylim(0.,1.)  
160 - plt.title('Repartition of the true classes in the confusion matrix \n for {}, {}, {}'.format(processing_chain, location, date))  
161 - plt.tight_layout()  
162 -  
163 - # save the figure  
164 - #~ plt.show()  
165 - out_fig = op.join(main_dir, 'Statistics', 'features', 'repartition_matconf_{}_{}_{}.png'.format(processing_chain, location, date))  
166 - plt.savefig(out_fig, bbox_inches='tight')  
167 -  
168 -  
169 -  
170 -  
171 -  
172 -  
173 -  
174 -  
175 -  
176 -  
177 -  
178 -def get_miclassified_histograms(comparison_parameters, processing_chain, diff_class_nb = 1, display = False):  
179 - paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))  
180 -  
181 - original_data = paths_configuration["data_paths"]["data_alcd"]  
182 - location = str(comparison_parameters["automatically_generated"]["location"])  
183 - date = str(comparison_parameters["automatically_generated"]["current_date"])  
184 - location_dir = location + '_' + '*' + '_' + date  
185 - raw_img = glob.glob(op.join(original_data, location_dir, 'In_data', 'Image', (location + '_bands.tif')))[0]  
186 -  
187 - main_dir = comparison_parameters["automatically_generated"]["main_dir"]  
188 -  
189 - reference_sub_dir = comparison_parameters["processing"]["alcd_initial"]["sub_dir"]  
190 -  
191 - diff_img = glob.glob(op.join(main_dir, 'Binary_difference', reference_sub_dir, '*{}*.tif'.format(processing_chain)))[0]  
192 - alcd_labeled_img = op.join(main_dir, 'Original_data', comparison_parameters["processing"]["alcd_initial"]["cloud_mask"])  
193 -  
194 - alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',  
195 - '3':'high clouds', '4':'clouds shadows', '5':'land',  
196 - '6':'water', '7':'snow'}  
197 -  
198 - all_classes_nb = np.arange(1,8)  
199 - colormap = create_colormap(color_pattern = 'labeled', class_call = 'number')  
200 -  
201 - non_zero_names = []  
202 - non_zero_colors = []  
203 - non_zero_class_nb = []  
204 -  
205 - # get the data for each class  
206 - nb_per_class = []  
207 - for alcd_class_nb in all_classes_nb:  
208 - data = count_misclassified_class(diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb)  
209 - data = data.flatten()  
210 - total_pixels_in_img = len(data)  
211 - # remove the zero values  
212 - data = data[data != 0]  
213 - data = data[np.isnan(data) == False]  
214 -  
215 - nb_of_samples = len(data)  
216 - if nb_of_samples != 0:  
217 - nb_per_class.append(nb_of_samples)  
218 - non_zero_names.append(alcd_class_dict[str(alcd_class_nb)])  
219 - non_zero_colors.append(colormap[str(alcd_class_nb)])  
220 - non_zero_class_nb.append(alcd_class_nb)  
221 -  
222 -  
223 - total_samples = sum(nb_per_class)  
224 - nb_per_class = [float(n)/total_samples for n in nb_per_class]  
225 -  
226 - proportion_in_image = float(total_samples)/float(total_pixels_in_img)  
227 -  
228 - if display == False:  
229 - return non_zero_class_nb, nb_per_class, proportion_in_image  
230 - else:  
231 - # create plot  
232 - fig, ax = plt.subplots()  
233 - ax.yaxis.grid(True) # put horizontal grid  
234 - nb_alcd_classes = len(nb_per_class)  
235 - index = np.arange(nb_alcd_classes)  
236 -  
237 - bar_width = 0.25+1./(nb_alcd_classes+2)  
238 - opacity = 0.8  
239 -  
240 - # One rect per metric  
241 - for k in range(len(nb_per_class)):  
242 - rects = plt.bar(index[k], nb_per_class[k], bar_width,  
243 - alpha=opacity,  
244 - color=non_zero_colors[k],  
245 - label = non_zero_names[k])  
246 -  
247 - plt.xlabel('True class')  
248 - plt.ylabel('Proportion')  
249 -  
250 - location = comparison_parameters["automatically_generated"]["location"]  
251 - date = comparison_parameters["automatically_generated"]["current_date"]  
252 -  
253 - plt.ylim(0.,1.)  
254 -  
255 - dict_diff_class_nb={1:'True Positive', 2:'False Negative', 3:'False Positive', 4:'True Negative'}  
256 -  
257 -  
258 - plt.title('Repartition in the {}\n{:.02f}% of all pixels\n{}, {}'.format(  
259 - dict_diff_class_nb[diff_class_nb], proportion_in_image, location, date))  
260 -  
261 - plt.xticks(index + bar_width/2*(nb_alcd_classes/2), (non_zero_names))  
262 - plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)  
263 - #~ plt.legend()  
264 -  
265 - plt.tight_layout()  
266 - plt.show()  
267 -  
268 - dict_diff_class_nb_short={1:'TP', 2:'FN', 3:'FP', 4:'TN'}  
269 - out_fig = op.join(main_dir, 'Statistics',  
270 - 'misclassified_details_{}_{}_{}_{}.png'.format(dict_diff_class_nb_short[diff_class_nb],location, date, processing_chain))  
271 - print(out_fig)  
272 - #~ plt.savefig(out_fig, bbox_inches='tight')  
273 -  
274 -  
275 -  
276 -  
277 -  
278 -  
279 -  
280 -  
281 -  
282 -  
283 -  
284 -def get_normalized_histo(comparison_parameters, processing_chain, diff_class_nb, alcd_class_nb, band, min_range = None, max_range = None):  
285 - '''  
286 - Return a normalized histogram from a class and a feature nb  
287 - The range is limited to a percentage of the total data  
288 -  
289 - TODO : change the range parameters, for the features 'time' especially  
290 - Change the 'original_data' directory  
291 - '''  
292 - paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))  
293 - #~ original_data = '/mnt/data/home/baetensl/clouds_detection_git/Data_ALCD'  
294 - original_data = paths_configuration["data_paths"]["data_alcd"]  
295 - location = str(comparison_parameters["automatically_generated"]["location"])  
296 - location_dir = location + '_' + '*' + '_' + str(comparison_parameters["automatically_generated"]["current_date"])  
297 - raw_img = glob.glob(op.join(original_data, location_dir, 'In_data', 'Image', (location + '_bands.tif')))[0]  
298 - main_dir = comparison_parameters["automatically_generated"]["main_dir"]  
299 -  
300 - diff_img = glob.glob(op.join(main_dir, 'Binary_difference', 'ALCD_initial', '*{}*.tif'.format(processing_chain)))[0]  
301 - alcd_labeled_img = op.join(main_dir, 'Original_data', comparison_parameters["processing"]["alcd_initial"]["cloud_mask"])  
302 -  
303 -  
304 - alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',  
305 - '3':'high clouds', '4':'clouds shadows', '5':'land',  
306 - '6':'water', '7':'snow'}  
307 -  
308 - # get the data for each class  
309 - data = extract_class_band(raw_img, band, diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb)  
310 -  
311 - # remove the zero values  
312 - data = data[data != 0]  
313 - data = data[np.isnan(data) == False]  
314 -  
315 - if len(data) == 0:  
316 - curve = []  
317 - bin_centers = []  
318 - return curve, bin_centers  
319 -  
320 - # Removes the tails of the dataset  
321 - offset_percentage = 0.2  
322 - offset = np.floor(float(offset_percentage)/100*len(data))  
323 -  
324 - if min_range == None:  
325 - min_range = data[data.argsort()[offset]]  
326 - if max_range == None:  
327 - max_range = data[data.argsort()[-offset]]  
328 -  
329 - # to have a consistent step between classes for a given feature  
330 - bins = np.arange(start = min_range, stop = max_range, step = 50)  
331 -  
332 - # force the bins to be in good enough number  
333 - if len(bins) > 1000 or len(bins)<40:  
334 - bins = 50  
335 -  
336 -  
337 - # compute the histogram  
338 - histo, bin_edges = np.histogram(data, range = (min_range, max_range), bins = bins)  
339 - histo = [float(h) for h in histo]  
340 - total_pixels_number = sum(histo)  
341 -  
342 - # set the max value of the histogram to 1  
343 - curve = np.divide(histo, np.max(histo))  
344 - # takes the center of the bins instead of the edges, better for curve style  
345 - # instead of real histogram  
346 - bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2  
347 -  
348 - return curve, bin_centers  
349 -  
350 -  
351 -def extract_bands_per_class(comparison_parameters, processing_chain, alcd_class_nb):  
352 - '''  
353 - Extract the bands for all the classes and plot an histogram of every one  
354 -  
355 - DEPRECATED, should be improved !  
356 - '''  
357 - main_dir = comparison_parameters["automatically_generated"]["main_dir"]  
358 - stats_dir = op.join(main_dir, 'Statistics')  
359 -  
360 - all_diff_class_nb = [1,2,3,4]  
361 - features = np.arange(1,15)  
362 - #~ features = np.arange(1,3)  
363 -  
364 - alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',  
365 - '3':'high clouds', '4':'clouds shadows', '5':'land',  
366 - '6':'water', '7':'snow'}  
367 -  
368 - all_class_names = ['TP', 'FN', 'FP', 'TN']  
369 - colormap = create_colormap(class_call = 'number')  
370 -  
371 - # Execution of the histograms  
372 - for feat in features:  
373 - curves = []  
374 - bins = []  
375 - colors = []  
376 -  
377 - for diff_class_nb in all_diff_class_nb:  
378 - #~ curve, bin_centers = get_normalized_histo(comparison_parameters, diff_class_nb, band)  
379 - curve, bin_centers = get_normalized_histo(comparison_parameters, processing_chain, diff_class_nb, alcd_class_nb, feat)  
380 - curves.append(curve)  
381 - bins.append(bin_centers)  
382 - colors.append(colormap[str(diff_class_nb)])  
383 -  
384 - plt.figure()  
385 - # first the fill between, then the solid curve  
386 - for k in range(len(curves)):  
387 - if len(curves[k]) > 0:  
388 - plt.fill_between(bins[k], curves[k], color = colors[k], alpha = 0.1, linewidth = 0)  
389 -  
390 - for k in range(len(curves)):  
391 - if len(curves[k]) > 0:  
392 - linestyle = '-'  
393 - plt.plot(bins[k], curves[k], color = colors[k],  
394 - linewidth = 1.5, label = all_class_names[k], linestyle = linestyle)  
395 -  
396 -  
397 - plt.title('True class: {}\nFeature {}'.format(alcd_class_dict[str(alcd_class_nb)], feat))  
398 - plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)  
399 - plt.show()  
400 - out_fig = op.join(main_dir, 'Statistics', 'features', 'class_{}_feature_{}.png'.format(alcd_class_nb, feat))  
401 - #~ plt.savefig(out_fig, bbox_inches='tight')  
402 -  
403 - #~ plt.show(block = False)  
404 -  
405 -  
406 -  
407 -  
408 -def create_colormap(color_pattern = 'diff', class_call = 'number'):  
409 - '''  
410 - Create a colormap for each class  
411 - '''  
412 -  
413 - class_color = {}  
414 -  
415 - if color_pattern == 'diff':  
416 - class_color["full_image"] = np.divide([100., 0., 255.], 255.)  
417 - class_color["null_value"] = np.divide([0., 0., 0., 0.], 255.)  
418 - class_color["TP"] = np.divide([10., 192., 56.], 255.)  
419 - class_color["FN"] = np.divide([255., 0., 0.], 255.)  
420 - class_color["FP"] = np.divide([192., 52., 193.], 255.)  
421 - class_color["TN"] = np.divide([10., 10., 10.], 255.)  
422 -  
423 - if class_call == 'number':  
424 - class_color["-1"] = class_color["full_image"]  
425 - class_color["0"] = class_color["null_value"]  
426 - class_color["1"] = class_color["TP"]  
427 - class_color["2"] = class_color["FN"]  
428 - class_color["3"] = class_color["FP"]  
429 - class_color["4"] = class_color["TN"]  
430 - else:  
431 - class_color["full_image"] = np.divide([100., 0., 255.], 255.)  
432 - class_color["null_value"] = np.divide([195., 255., 42.], 255.)  
433 - class_color["background"] = np.divide([187., 83., 58.], 255.)  
434 - class_color["low_clouds"] = np.divide([170., 170., 170.], 255.)  
435 - class_color["high_clouds"] = np.divide([255., 123., 184.], 255.)  
436 - class_color["clouds_shadows"] = np.divide([10., 10., 10.], 255.)  
437 - class_color["land"] = np.divide([0., 151., 56.], 255.)  
438 - class_color["water"] = np.divide([0., 142., 208.], 255.)  
439 - class_color["snow"] = np.divide([77., 77., 77.], 255.)  
440 - if class_call == 'number':  
441 - class_color["-1"] = class_color["full_image"]  
442 - class_color["0"] = class_color["null_value"]  
443 - class_color["1"] = class_color["background"]  
444 - class_color["2"] = class_color["low_clouds"]  
445 - class_color["3"] = class_color["high_clouds"]  
446 - class_color["4"] = class_color["clouds_shadows"]  
447 - class_color["5"] = class_color["land"]  
448 - class_color["6"] = class_color["water"]  
449 - class_color["7"] = class_color["snow"]  
450 -  
451 -  
452 - return class_color  
453 -  
454 -  
455 -def main():  
456 - comparison_parameters = json.load(open(op.join('parameters_files','comparison_parameters.json')))  
457 -  
458 - location = 'Arles'  
459 - date = '20171002'  
460 -  
461 -  
462 - for processing_chain in ['maja', 'sen2cor', 'fmask']:  
463 - try:  
464 - plot_confusion_repartition(comparison_parameters, processing_chain)  
465 - except:  
466 - pass  
467 -  
468 - return  
469 - alcd_class_nb = 2  
470 - extract_bands_per_class(comparison_parameters, processing_chain, alcd_class_nb)  
471 -  
472 -  
473 -  
474 -  
475 -if __name__ == '__main__':  
476 - main()  
477 -