Allegato "plot_utility.py"
Scarica 1 import matplotlib.pyplot as plt
2 from matplotlib.pyplot import cm
3 import numpy as np
4
5 import scipy.stats as stats
6
7 from sklearn.metrics import roc_curve, auc
8
9
10 def colored_bar(data, class_map, color_map, attr_name, class_name):
11 width = 0.3
12 n_cols = len(data[attr_name].unique())
13 p = []
14
15 for gr in data.groupby(attr_name):
16 y_offset = np.zeros(n_cols)
17 attr_val = gr[0]
18 class_vals = dict(gr[1].groupby(class_name).size())
19
20 for class_val in class_vals:
21 p.append(plt.bar(attr_val, class_vals[class_val], width,
22 color=color_map[class_val], bottom=y_offset, align='center'))
23 y_offset = y_offset + class_vals[class_val]
24
25 plt.title(attr_name)
26 plt.ylabel('#count')
27 plt.legend(p, class_map.keys())
28
29
30 def class_dist_plot(data, color_map, attr_name, class_name, bins=50):
31 class_grs = data[[attr_name, class_name]].groupby([class_name])
32 class_vals = []
33 densities = []
34 x_vals = []
35
36 for class_gr in class_grs:
37 class_val = class_gr[0]
38 input_data = class_gr[1][attr_name].values
39 density = stats.gaussian_kde(input_data)
40 n, x_val, _ = plt.hist(input_data, bins, color=color_map[class_val], histtype=u'step', density=True)
41 plt.plot(x_val, density(x_val, _), color='k')
42 plt.title('Bar plot - attribute: ' + attr_name + ', class label: ' + class_val)
43 plt.ylabel('Density')
44 plt.show()
45
46 class_vals.append(class_val)
47 densities.append(density)
48 x_vals.append(x_val)
49
50 for i in range(len(class_vals)):
51 plt.plot(x_vals[i], densities[i](x_vals[i]), color=color_map[class_vals[i]])
52
53 plt.title('Class distributions - ' + attr_name)
54 plt.ylabel('Density')
55 plt.legend(class_vals)
56 plt.show()
57
58
59 def colored_bar_perc(data, class_map, color_map, attr_name, class_name):
60 width = 0.3
61 n_cols = len(data[attr_name].unique())
62 col_size = list(data.groupby([attr_name]).size())
63 p = []
64 jj = 0
65
66 for gr in data.groupby(attr_name):
67 y_offset = np.zeros(n_cols)
68 attr_val = gr[0]
69 class_vals = dict(gr[1].groupby(class_name).size())
70
71 for class_val in class_vals:
72 val = 100 * class_vals[class_val] / col_size[jj]
73 p.append(plt.bar(attr_val, val, width,
74 color=color_map[class_val], bottom=y_offset, align='center'))
75 y_offset = y_offset + val
76
77 jj += 1
78
79 plt.title(attr_name)
80 plt.ylabel('Percentage')
81 plt.xlabel('Values')
82 plt.legend(p, class_map.keys())
83
84
85 def colored_hist(data, class_map, color_map, attr_name, class_name, n_bins=10):
86 plot_data = []
87
88 for class_val in class_map:
89 plot_data.append(data[data[class_name] == class_val][attr_name].values)
90
91 plt.hist(plot_data, n_bins, histtype='bar', stacked=True, color=color_map.values())
92
93 plt.title(attr_name)
94 plt.ylabel('#count')
95 plt.legend(class_map.keys())
96
97
98 def colored_scatter(data, attr_name_x, attr_name_y, colors):
99 plt.scatter(data[attr_name_x], data[attr_name_y], color=colors, s=60, marker='x')
100 plt.xlabel(attr_name_x)
101 plt.ylabel(attr_name_y)
102
103
104 def heat_map(matrix, x_labels, y_labels, title):
105 fig, ax = plt.subplots()
106 ax.imshow(matrix, cmap=cm.Oranges)
107
108 # We want to show all ticks...
109 ax.set_xticks(np.arange(len(x_labels)))
110 ax.set_yticks(np.arange(len(y_labels)))
111 # ... and label them with the respective list entries
112 ax.set_xticklabels(x_labels)
113 ax.set_yticklabels(y_labels)
114
115 # Rotate the tick labels and set their alignment.
116 plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
117
118 # Loop over data dimensions and create text annotations.
119 for i in range(len(y_labels)):
120 for j in range(len(x_labels)):
121 ax.text(j, i, matrix[i, j], ha="center", va="center", color="k")
122
123 ax.set_title(title)
124 fig.tight_layout()
125
126
127 def plot_roc(x_train, y_train, x_validation, y_validation, class_map, models):
128 class_assingments = [class_map[class_val] for class_val in y_validation]
129 one_hot_encoding = np.zeros((len(y_validation), len(class_map)))
130 one_hot_encoding[range(len(class_assingments)), class_assingments] = 1
131
132 probs = []
133
134 for _, model in models:
135 model.fit(x_train, y_train)
136 probs.append(model.predict_proba(x_validation))
137
138 fpr = dict()
139 tpr = dict()
140 roc_auc = dict()
141
142 lw = 0.5
143
144 for i in range(len(class_map)):
145 plt.figure()
146 m = 0
147
148 for name, _ in models:
149 fpr[i], tpr[i], _ = roc_curve(one_hot_encoding[:, i], probs[m][:, i])
150 roc_auc[i] = auc(fpr[i], tpr[i])
151
152 plt.plot(fpr[i], tpr[i], lw=lw, label=name + ' (area = %0.2f)' % roc_auc[i])
153 m += 1
154
155 plt.plot([0, 1], [0, 1], lw=lw, linestyle='--')
156 plt.xlim([0.0, 1.0])
157 plt.ylim([0.0, 1.0])
158 plt.xlabel('False Positive Rate')
159 plt.ylabel('True Positive Rate')
160 plt.title('Class = ' + list(class_map.keys())[i])
161 plt.legend(loc="lower right")
162
163 plt.show()
Allegati
Non รจ consentito inserire allegati su questa pagina.