welcome: please sign in

Cerca

Link Dipartimentali

Link Esterni

Allegato "plot_utility.py"

Scarica

   1 import matplotlib.pyplot as plt
   2 from matplotlib.pyplot import cm
   3 import numpy as np
   4 
   5 import scipy.stats as stats
   6 
   7 from sklearn.metrics import roc_curve, auc
   8 
   9 
  10 def colored_bar(data, class_map, color_map, attr_name, class_name):
  11     width = 0.3
  12     n_cols = len(data[attr_name].unique())
  13     p = []
  14 
  15     for gr in data.groupby(attr_name):
  16         y_offset = np.zeros(n_cols)
  17         attr_val = gr[0]
  18         class_vals = dict(gr[1].groupby(class_name).size())
  19 
  20         for class_val in class_vals:
  21             p.append(plt.bar(attr_val, class_vals[class_val], width,
  22                              color=color_map[class_val], bottom=y_offset, align='center'))
  23             y_offset = y_offset + class_vals[class_val]
  24 
  25     plt.title(attr_name)
  26     plt.ylabel('#count')
  27     plt.legend(p, class_map.keys())
  28 
  29 
  30 def class_dist_plot(data, color_map, attr_name, class_name, bins=50):
  31     class_grs = data[[attr_name, class_name]].groupby([class_name])
  32     class_vals = []
  33     densities = []
  34     x_vals = []
  35 
  36     for class_gr in class_grs:
  37         class_val = class_gr[0]
  38         input_data = class_gr[1][attr_name].values
  39         density = stats.gaussian_kde(input_data)
  40         n, x_val, _ = plt.hist(input_data, bins, color=color_map[class_val], histtype=u'step', density=True)
  41         plt.plot(x_val, density(x_val, _), color='k')
  42         plt.title('Bar plot - attribute: ' + attr_name + ', class label: ' + class_val)
  43         plt.ylabel('Density')
  44         plt.show()
  45 
  46         class_vals.append(class_val)
  47         densities.append(density)
  48         x_vals.append(x_val)
  49 
  50     for i in range(len(class_vals)):
  51         plt.plot(x_vals[i], densities[i](x_vals[i]), color=color_map[class_vals[i]])
  52 
  53     plt.title('Class distributions - ' + attr_name)
  54     plt.ylabel('Density')
  55     plt.legend(class_vals)
  56     plt.show()
  57 
  58 
  59 def colored_bar_perc(data, class_map, color_map, attr_name, class_name):
  60     width = 0.3
  61     n_cols = len(data[attr_name].unique())
  62     col_size = list(data.groupby([attr_name]).size())
  63     p = []
  64     jj = 0
  65 
  66     for gr in data.groupby(attr_name):
  67         y_offset = np.zeros(n_cols)
  68         attr_val = gr[0]
  69         class_vals = dict(gr[1].groupby(class_name).size())
  70 
  71         for class_val in class_vals:
  72             val = 100 * class_vals[class_val] / col_size[jj]
  73             p.append(plt.bar(attr_val, val, width,
  74                              color=color_map[class_val], bottom=y_offset, align='center'))
  75             y_offset = y_offset + val
  76 
  77         jj += 1
  78 
  79     plt.title(attr_name)
  80     plt.ylabel('Percentage')
  81     plt.xlabel('Values')
  82     plt.legend(p, class_map.keys())
  83 
  84 
  85 def colored_hist(data, class_map, color_map, attr_name, class_name, n_bins=10):
  86     plot_data = []
  87 
  88     for class_val in class_map:
  89         plot_data.append(data[data[class_name] == class_val][attr_name].values)
  90 
  91     plt.hist(plot_data, n_bins, histtype='bar', stacked=True, color=color_map.values())
  92 
  93     plt.title(attr_name)
  94     plt.ylabel('#count')
  95     plt.legend(class_map.keys())
  96 
  97 
  98 def colored_scatter(data, attr_name_x, attr_name_y, colors):
  99     plt.scatter(data[attr_name_x], data[attr_name_y], color=colors, s=60, marker='x')
 100     plt.xlabel(attr_name_x)
 101     plt.ylabel(attr_name_y)
 102 
 103 
 104 def heat_map(matrix, x_labels, y_labels, title):
 105     fig, ax = plt.subplots()
 106     ax.imshow(matrix, cmap=cm.Oranges)
 107 
 108     # We want to show all ticks...
 109     ax.set_xticks(np.arange(len(x_labels)))
 110     ax.set_yticks(np.arange(len(y_labels)))
 111     # ... and label them with the respective list entries
 112     ax.set_xticklabels(x_labels)
 113     ax.set_yticklabels(y_labels)
 114 
 115     # Rotate the tick labels and set their alignment.
 116     plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
 117 
 118     # Loop over data dimensions and create text annotations.
 119     for i in range(len(y_labels)):
 120         for j in range(len(x_labels)):
 121             ax.text(j, i, matrix[i, j], ha="center", va="center", color="k")
 122 
 123     ax.set_title(title)
 124     fig.tight_layout()
 125 
 126 
 127 def plot_roc(x_train, y_train, x_validation, y_validation, class_map, models):
 128     class_assingments = [class_map[class_val] for class_val in y_validation]
 129     one_hot_encoding = np.zeros((len(y_validation), len(class_map)))
 130     one_hot_encoding[range(len(class_assingments)), class_assingments] = 1
 131 
 132     probs = []
 133 
 134     for _, model in models:
 135         model.fit(x_train, y_train)
 136         probs.append(model.predict_proba(x_validation))
 137 
 138     fpr = dict()
 139     tpr = dict()
 140     roc_auc = dict()
 141 
 142     lw = 0.5
 143 
 144     for i in range(len(class_map)):
 145         plt.figure()
 146         m = 0
 147 
 148         for name, _ in models:
 149             fpr[i], tpr[i], _ = roc_curve(one_hot_encoding[:, i], probs[m][:, i])
 150             roc_auc[i] = auc(fpr[i], tpr[i])
 151 
 152             plt.plot(fpr[i], tpr[i], lw=lw, label=name + ' (area = %0.2f)' % roc_auc[i])
 153             m += 1
 154 
 155         plt.plot([0, 1], [0, 1], lw=lw, linestyle='--')
 156         plt.xlim([0.0, 1.0])
 157         plt.ylim([0.0, 1.0])
 158         plt.xlabel('False Positive Rate')
 159         plt.ylabel('True Positive Rate')
 160         plt.title('Class = ' + list(class_map.keys())[i])
 161         plt.legend(loc="lower right")
 162 
 163         plt.show()

Allegati

Non รจ consentito inserire allegati su questa pagina.