Commit 1de4b8b9a169a34bb46f53e64e0c193d7e85101f

Authored by Louis Baetens
1 parent 669bf433
Exists in master

RMV old pixels features

Showing 1 changed file with 0 additions and 477 deletions   Show diff stats
PCC/pixels_features_analysis_old.py
... ... @@ -1,477 +0,0 @@
1   -#!/usr/bin/python
2   -# -*- coding: utf-8 -*-
3   -
4   -import os.path as op
5   -import glob
6   -import numpy as np
7   -import matplotlib.pyplot as plt
8   -import json
9   -import otbApplication
10   -from matplotlib.lines import Line2D
11   -
12   -def extract_class_band(raw_img, band_to_extract, diff_img, diff_class_nb, alcd_labeled_img='', alcd_class_nb = 0):
13   - '''
14   - From the difference image (e.g. diff_binary) between the processing
15   - chain and the ALCD, and the ALCD labeled, extract the data for the pixels
16   - in the class_nb for both images
17   - Mostly used to plot histograms for each class and band
18   - '''
19   -
20   - BandMathX = otbApplication.Registry.CreateApplication("BandMathX")
21   - if alcd_labeled_img == '':
22   - BandMathX.SetParameterStringList("il", [str(raw_img), str(diff_img)])
23   - expression = "(im2b1 == {diff_class_nb} ? im1b{band} : 0)".format(
24   - diff_class_nb = str(diff_class_nb), band = str(band_to_extract))
25   - else:
26   - BandMathX.SetParameterStringList("il", [str(raw_img), str(diff_img), str(alcd_labeled_img)])
27   - expression = "((im2b1 == {diff_class_nb} && im3b1 == {alcd_class_nb}) ? im1b{band} : 0)".format(
28   - diff_class_nb = str(diff_class_nb),
29   - alcd_class_nb = str(alcd_class_nb),
30   - band = str(band_to_extract))
31   -
32   - BandMathX.SetParameterString("exp", expression)
33   -
34   - BandMathX.UpdateParameters()
35   - BandMathX.Execute()
36   -
37   - # Pass directly to numpy array, doesnt write on disk
38   - extraction_output = BandMathX.GetImageAsNumpyArray('out')
39   - data = np.array(extraction_output)
40   -
41   - return data
42   -
43   -
44   -
45   -def count_misclassified_class(diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb):
46   - '''
47   - Count the number of pixels being at the same time of the
48   - diff_class_nb and alcd_class_nb.
49   - e.g. if diff_class_nb = 1 and alcd_class_nb = 2, it will returns the
50   - number of true positive and low_clouds pixels.
51   - '''
52   -
53   - Superimpose = otbApplication.Registry.CreateApplication("Superimpose")
54   - Superimpose.SetParameterString("inr", str(diff_img))
55   - Superimpose.SetParameterString("inm", str(alcd_labeled_img))
56   - Superimpose.SetParameterString("interpolator", "nn")
57   - Superimpose.UpdateParameters()
58   - Superimpose.Execute()
59   -
60   -
61   - BandMathX = otbApplication.Registry.CreateApplication("BandMathX")
62   -
63   - BandMathX.AddParameterStringList("il", str(diff_img))
64   - BandMathX.AddImageToParameterInputImageList("il", Superimpose.GetParameterOutputImage("out"))
65   - expression = "((im1b1 == {diff_class_nb} && im2b1 == {alcd_class_nb}) ? im1b1 : 0)".format(
66   - diff_class_nb = str(diff_class_nb),
67   - alcd_class_nb = str(alcd_class_nb))
68   -
69   - BandMathX.SetParameterString("exp", expression)
70   -
71   - BandMathX.UpdateParameters()
72   - BandMathX.Execute()
73   -
74   - # Pass directly to numpy array, doesnt write on disk
75   - extraction_output = BandMathX.GetImageAsNumpyArray('out')
76   - data = np.array(extraction_output)
77   -
78   - return data
79   -
80   -
81   -
82   -def plot_confusion_repartition(comparison_parameters, processing_chain):
83   - '''
84   - Plot the repartition of classes for each cell of the confusion
85   - matrix.
86   - e.g. what percentage of low_clouds and clouds_shadows in false positive
87   - '''
88   - paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))
89   -
90   - main_dir =
91   - main_dir = comparison_parameters["automatically_generated"]["main_dir"]
92   - location = comparison_parameters["automatically_generated"]["location"]
93   - date = comparison_parameters["automatically_generated"]["current_date"]
94   -
95   - class_nb = []
96   - all_proportion = []
97   - mat_conf_proportion = []
98   - all_indexes = [0]
99   - all_colors = []
100   - all_classes_proportion = []
101   - all_labels = []
102   - colormap = create_colormap(color_pattern = 'labeled', class_call = 'number')
103   - ticks_positions = []
104   -
105   - alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
106   - '3':'high clouds', '4':'clouds shadows', '5':'land',
107   - '6':'water', '7':'snow'}
108   -
109   - for confusion_mat_place in [1,2,3,4]:
110   - non_zero_class_nb, nb_per_class, proportion_in_image = get_miclassified_histograms(comparison_parameters, processing_chain, diff_class_nb = confusion_mat_place)
111   - class_nb.append(non_zero_class_nb)
112   - all_proportion.append(nb_per_class)
113   - mat_conf_proportion.append(proportion_in_image)
114   -
115   - all_classes_proportion.extend(nb_per_class)
116   -
117   - for clas in non_zero_class_nb:
118   - all_colors.append(colormap[str(clas)])
119   - all_indexes.append(all_indexes[-1]+1)
120   -
121   - ticks_positions.append(float(all_indexes[-(len(non_zero_class_nb)+1)]+all_indexes[-1])/2)
122   - all_indexes[-1] = all_indexes[-1]+2
123   -
124   - all_indexes.pop()
125   -
126   - # create plot
127   - fig, ax = plt.subplots()
128   - ax.yaxis.grid(True) # put horizontal grid
129   -
130   - # ticks creation
131   - ticks_labels = ['TP ({:.01f}%)'.format(mat_conf_proportion[0]*100),
132   - 'FN ({:.01f}%)'.format(mat_conf_proportion[1]*100),
133   - 'FP ({:.01f}%)'.format(mat_conf_proportion[2]*100),
134   - 'TN ({:.01f}%)'.format(mat_conf_proportion[3]*100)]
135   -
136   - plt.xticks(ticks_positions, ticks_labels)
137   -
138   - # legend customization
139   - all_legend_nb = list(set([item for sublist in class_nb for item in sublist]))
140   - all_legend_names = [alcd_class_dict[str(clas)] for clas in all_legend_nb]
141   - all_legend_colors = [colormap[str(clas)] for clas in all_legend_nb]
142   - custom_lines = [Line2D([0], [0], color=all_legend_colors[k], lw=4) for k in range(len(all_legend_colors))]
143   -
144   - plt.legend(custom_lines, all_legend_names, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
145   -
146   - # plot the data
147   - bar_width = 1.
148   - opacity = 0.8
149   -
150   - rects = plt.bar(all_indexes, all_classes_proportion, bar_width,
151   - alpha=opacity,
152   - color=all_colors,
153   - label = all_labels)
154   -
155   -
156   - # axes modification
157   - plt.xlabel('Confusion matrix stat. (% of total pixels)')
158   - plt.ylabel('Proportion among the stat')
159   - plt.ylim(0.,1.)
160   - plt.title('Repartition of the true classes in the confusion matrix \n for {}, {}, {}'.format(processing_chain, location, date))
161   - plt.tight_layout()
162   -
163   - # save the figure
164   - #~ plt.show()
165   - out_fig = op.join(main_dir, 'Statistics', 'features', 'repartition_matconf_{}_{}_{}.png'.format(processing_chain, location, date))
166   - plt.savefig(out_fig, bbox_inches='tight')
167   -
168   -
169   -
170   -
171   -
172   -
173   -
174   -
175   -
176   -
177   -
178   -def get_miclassified_histograms(comparison_parameters, processing_chain, diff_class_nb = 1, display = False):
179   - paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))
180   -
181   - original_data = paths_configuration["data_paths"]["data_alcd"]
182   - location = str(comparison_parameters["automatically_generated"]["location"])
183   - date = str(comparison_parameters["automatically_generated"]["current_date"])
184   - location_dir = location + '_' + '*' + '_' + date
185   - raw_img = glob.glob(op.join(original_data, location_dir, 'In_data', 'Image', (location + '_bands.tif')))[0]
186   -
187   - main_dir = comparison_parameters["automatically_generated"]["main_dir"]
188   -
189   - reference_sub_dir = comparison_parameters["processing"]["alcd_initial"]["sub_dir"]
190   -
191   - diff_img = glob.glob(op.join(main_dir, 'Binary_difference', reference_sub_dir, '*{}*.tif'.format(processing_chain)))[0]
192   - alcd_labeled_img = op.join(main_dir, 'Original_data', comparison_parameters["processing"]["alcd_initial"]["cloud_mask"])
193   -
194   - alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
195   - '3':'high clouds', '4':'clouds shadows', '5':'land',
196   - '6':'water', '7':'snow'}
197   -
198   - all_classes_nb = np.arange(1,8)
199   - colormap = create_colormap(color_pattern = 'labeled', class_call = 'number')
200   -
201   - non_zero_names = []
202   - non_zero_colors = []
203   - non_zero_class_nb = []
204   -
205   - # get the data for each class
206   - nb_per_class = []
207   - for alcd_class_nb in all_classes_nb:
208   - data = count_misclassified_class(diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb)
209   - data = data.flatten()
210   - total_pixels_in_img = len(data)
211   - # remove the zero values
212   - data = data[data != 0]
213   - data = data[np.isnan(data) == False]
214   -
215   - nb_of_samples = len(data)
216   - if nb_of_samples != 0:
217   - nb_per_class.append(nb_of_samples)
218   - non_zero_names.append(alcd_class_dict[str(alcd_class_nb)])
219   - non_zero_colors.append(colormap[str(alcd_class_nb)])
220   - non_zero_class_nb.append(alcd_class_nb)
221   -
222   -
223   - total_samples = sum(nb_per_class)
224   - nb_per_class = [float(n)/total_samples for n in nb_per_class]
225   -
226   - proportion_in_image = float(total_samples)/float(total_pixels_in_img)
227   -
228   - if display == False:
229   - return non_zero_class_nb, nb_per_class, proportion_in_image
230   - else:
231   - # create plot
232   - fig, ax = plt.subplots()
233   - ax.yaxis.grid(True) # put horizontal grid
234   - nb_alcd_classes = len(nb_per_class)
235   - index = np.arange(nb_alcd_classes)
236   -
237   - bar_width = 0.25+1./(nb_alcd_classes+2)
238   - opacity = 0.8
239   -
240   - # One rect per metric
241   - for k in range(len(nb_per_class)):
242   - rects = plt.bar(index[k], nb_per_class[k], bar_width,
243   - alpha=opacity,
244   - color=non_zero_colors[k],
245   - label = non_zero_names[k])
246   -
247   - plt.xlabel('True class')
248   - plt.ylabel('Proportion')
249   -
250   - location = comparison_parameters["automatically_generated"]["location"]
251   - date = comparison_parameters["automatically_generated"]["current_date"]
252   -
253   - plt.ylim(0.,1.)
254   -
255   - dict_diff_class_nb={1:'True Positive', 2:'False Negative', 3:'False Positive', 4:'True Negative'}
256   -
257   -
258   - plt.title('Repartition in the {}\n{:.02f}% of all pixels\n{}, {}'.format(
259   - dict_diff_class_nb[diff_class_nb], proportion_in_image, location, date))
260   -
261   - plt.xticks(index + bar_width/2*(nb_alcd_classes/2), (non_zero_names))
262   - plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
263   - #~ plt.legend()
264   -
265   - plt.tight_layout()
266   - plt.show()
267   -
268   - dict_diff_class_nb_short={1:'TP', 2:'FN', 3:'FP', 4:'TN'}
269   - out_fig = op.join(main_dir, 'Statistics',
270   - 'misclassified_details_{}_{}_{}_{}.png'.format(dict_diff_class_nb_short[diff_class_nb],location, date, processing_chain))
271   - print(out_fig)
272   - #~ plt.savefig(out_fig, bbox_inches='tight')
273   -
274   -
275   -
276   -
277   -
278   -
279   -
280   -
281   -
282   -
283   -
284   -def get_normalized_histo(comparison_parameters, processing_chain, diff_class_nb, alcd_class_nb, band, min_range = None, max_range = None):
285   - '''
286   - Return a normalized histogram from a class and a feature nb
287   - The range is limited to a percentage of the total data
288   -
289   - TODO : change the range parameters, for the features 'time' especially
290   - Change the 'original_data' directory
291   - '''
292   - paths_configuration = json.load(open(op.join('..', 'paths_configuration.json')))
293   - #~ original_data = '/mnt/data/home/baetensl/clouds_detection_git/Data_ALCD'
294   - original_data = paths_configuration["data_paths"]["data_alcd"]
295   - location = str(comparison_parameters["automatically_generated"]["location"])
296   - location_dir = location + '_' + '*' + '_' + str(comparison_parameters["automatically_generated"]["current_date"])
297   - raw_img = glob.glob(op.join(original_data, location_dir, 'In_data', 'Image', (location + '_bands.tif')))[0]
298   - main_dir = comparison_parameters["automatically_generated"]["main_dir"]
299   -
300   - diff_img = glob.glob(op.join(main_dir, 'Binary_difference', 'ALCD_initial', '*{}*.tif'.format(processing_chain)))[0]
301   - alcd_labeled_img = op.join(main_dir, 'Original_data', comparison_parameters["processing"]["alcd_initial"]["cloud_mask"])
302   -
303   -
304   - alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
305   - '3':'high clouds', '4':'clouds shadows', '5':'land',
306   - '6':'water', '7':'snow'}
307   -
308   - # get the data for each class
309   - data = extract_class_band(raw_img, band, diff_img, diff_class_nb, alcd_labeled_img, alcd_class_nb)
310   -
311   - # remove the zero values
312   - data = data[data != 0]
313   - data = data[np.isnan(data) == False]
314   -
315   - if len(data) == 0:
316   - curve = []
317   - bin_centers = []
318   - return curve, bin_centers
319   -
320   - # Removes the tails of the dataset
321   - offset_percentage = 0.2
322   - offset = np.floor(float(offset_percentage)/100*len(data))
323   -
324   - if min_range == None:
325   - min_range = data[data.argsort()[offset]]
326   - if max_range == None:
327   - max_range = data[data.argsort()[-offset]]
328   -
329   - # to have a consistent step between classes for a given feature
330   - bins = np.arange(start = min_range, stop = max_range, step = 50)
331   -
332   - # force the bins to be in good enough number
333   - if len(bins) > 1000 or len(bins)<40:
334   - bins = 50
335   -
336   -
337   - # compute the histogram
338   - histo, bin_edges = np.histogram(data, range = (min_range, max_range), bins = bins)
339   - histo = [float(h) for h in histo]
340   - total_pixels_number = sum(histo)
341   -
342   - # set the max value of the histogram to 1
343   - curve = np.divide(histo, np.max(histo))
344   - # takes the center of the bins instead of the edges, better for curve style
345   - # instead of real histogram
346   - bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2
347   -
348   - return curve, bin_centers
349   -
350   -
351   -def extract_bands_per_class(comparison_parameters, processing_chain, alcd_class_nb):
352   - '''
353   - Extract the bands for all the classes and plot an histogram of every one
354   -
355   - DEPRECATED, should be improved !
356   - '''
357   - main_dir = comparison_parameters["automatically_generated"]["main_dir"]
358   - stats_dir = op.join(main_dir, 'Statistics')
359   -
360   - all_diff_class_nb = [1,2,3,4]
361   - features = np.arange(1,15)
362   - #~ features = np.arange(1,3)
363   -
364   - alcd_class_dict = {'0':'null value', '1':'background', '2':'low clouds',
365   - '3':'high clouds', '4':'clouds shadows', '5':'land',
366   - '6':'water', '7':'snow'}
367   -
368   - all_class_names = ['TP', 'FN', 'FP', 'TN']
369   - colormap = create_colormap(class_call = 'number')
370   -
371   - # Execution of the histograms
372   - for feat in features:
373   - curves = []
374   - bins = []
375   - colors = []
376   -
377   - for diff_class_nb in all_diff_class_nb:
378   - #~ curve, bin_centers = get_normalized_histo(comparison_parameters, diff_class_nb, band)
379   - curve, bin_centers = get_normalized_histo(comparison_parameters, processing_chain, diff_class_nb, alcd_class_nb, feat)
380   - curves.append(curve)
381   - bins.append(bin_centers)
382   - colors.append(colormap[str(diff_class_nb)])
383   -
384   - plt.figure()
385   - # first the fill between, then the solid curve
386   - for k in range(len(curves)):
387   - if len(curves[k]) > 0:
388   - plt.fill_between(bins[k], curves[k], color = colors[k], alpha = 0.1, linewidth = 0)
389   -
390   - for k in range(len(curves)):
391   - if len(curves[k]) > 0:
392   - linestyle = '-'
393   - plt.plot(bins[k], curves[k], color = colors[k],
394   - linewidth = 1.5, label = all_class_names[k], linestyle = linestyle)
395   -
396   -
397   - plt.title('True class: {}\nFeature {}'.format(alcd_class_dict[str(alcd_class_nb)], feat))
398   - plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
399   - plt.show()
400   - out_fig = op.join(main_dir, 'Statistics', 'features', 'class_{}_feature_{}.png'.format(alcd_class_nb, feat))
401   - #~ plt.savefig(out_fig, bbox_inches='tight')
402   -
403   - #~ plt.show(block = False)
404   -
405   -
406   -
407   -
408   -def create_colormap(color_pattern = 'diff', class_call = 'number'):
409   - '''
410   - Create a colormap for each class
411   - '''
412   -
413   - class_color = {}
414   -
415   - if color_pattern == 'diff':
416   - class_color["full_image"] = np.divide([100., 0., 255.], 255.)
417   - class_color["null_value"] = np.divide([0., 0., 0., 0.], 255.)
418   - class_color["TP"] = np.divide([10., 192., 56.], 255.)
419   - class_color["FN"] = np.divide([255., 0., 0.], 255.)
420   - class_color["FP"] = np.divide([192., 52., 193.], 255.)
421   - class_color["TN"] = np.divide([10., 10., 10.], 255.)
422   -
423   - if class_call == 'number':
424   - class_color["-1"] = class_color["full_image"]
425   - class_color["0"] = class_color["null_value"]
426   - class_color["1"] = class_color["TP"]
427   - class_color["2"] = class_color["FN"]
428   - class_color["3"] = class_color["FP"]
429   - class_color["4"] = class_color["TN"]
430   - else:
431   - class_color["full_image"] = np.divide([100., 0., 255.], 255.)
432   - class_color["null_value"] = np.divide([195., 255., 42.], 255.)
433   - class_color["background"] = np.divide([187., 83., 58.], 255.)
434   - class_color["low_clouds"] = np.divide([170., 170., 170.], 255.)
435   - class_color["high_clouds"] = np.divide([255., 123., 184.], 255.)
436   - class_color["clouds_shadows"] = np.divide([10., 10., 10.], 255.)
437   - class_color["land"] = np.divide([0., 151., 56.], 255.)
438   - class_color["water"] = np.divide([0., 142., 208.], 255.)
439   - class_color["snow"] = np.divide([77., 77., 77.], 255.)
440   - if class_call == 'number':
441   - class_color["-1"] = class_color["full_image"]
442   - class_color["0"] = class_color["null_value"]
443   - class_color["1"] = class_color["background"]
444   - class_color["2"] = class_color["low_clouds"]
445   - class_color["3"] = class_color["high_clouds"]
446   - class_color["4"] = class_color["clouds_shadows"]
447   - class_color["5"] = class_color["land"]
448   - class_color["6"] = class_color["water"]
449   - class_color["7"] = class_color["snow"]
450   -
451   -
452   - return class_color
453   -
454   -
455   -def main():
456   - comparison_parameters = json.load(open(op.join('parameters_files','comparison_parameters.json')))
457   -
458   - location = 'Arles'
459   - date = '20171002'
460   -
461   -
462   - for processing_chain in ['maja', 'sen2cor', 'fmask']:
463   - try:
464   - plot_confusion_repartition(comparison_parameters, processing_chain)
465   - except:
466   - pass
467   -
468   - return
469   - alcd_class_nb = 2
470   - extract_bands_per_class(comparison_parameters, processing_chain, alcd_class_nb)
471   -
472   -
473   -
474   -
475   -if __name__ == '__main__':
476   - main()
477   -