-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot.py
120 lines (97 loc) · 3.94 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy as np
from keras import backend
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
import sys, getopt
import csv
import matplotlib
import matplotlib.pyplot as plt
import pickle
from configparser import ConfigParser
from explanations import create_explanations
from preprocess import spacyfy
def plot_explanation(attribution, name, dim, images_folder, save_fig=True):
plt.figure(figsize=(12.8,7.5), dpi=50)
data_array = attribution
plt.title(f'DeepExplain {dim}: {name}')
if dim == '2d':
data_max = np.max(np.abs(data_array))
plt.imshow(data_array, cmap='seismic', vmin=-data_max, vmax=data_max)
plt.colorbar()
plt.xlabel('index of token')
plt.ylabel('word vector dimensions')
elif dim == '1d':
mask1 = data_array < -1e-8
mask2 = data_array > 1e-8
plt.bar(np.arange(512)[mask1], data_array[mask1], color = 'blue')
plt.bar(np.arange(512)[mask2], data_array[mask2], color = 'red')
plt.xlabel('index of token')
plt.ylabel('summed attribution score')
if save_fig:
filename = f'{images_folder}/de{dim}_{name}.eps'
plt.savefig(filename)
print(f'saved plot {filename}')
def find_line(imdb_file, index):
with open(imdb_file, encoding="latin-1") as csvFile:
reader = csv.reader(csvFile)
count=0
for row in reader:
if row[1] == 'test':
if count==index:
return row[2]
else:
count += 1
def text_to_latex(line, scores):
output_list = []
for token in spacyfy(line):
output_list.append(token.lower_)
output_text = ''
for word, score in zip(output_list, scores):
red = 255*(1+min(score, 0))
green = 255*min(1-score, 1+score)
blue = 255*(1-max(score, 0))
output_text += '\colorbox[RGB]{' + str(int(red)) + ',' + \
str(int(green)) + ',' + str(int(blue)) + '}{\strut ' + word + '} '
return output_text
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 'pl')
except getopt.GetoptError as err:
print(err)
sys.exit()
print(backend.tensorflow_backend._get_available_gpus())
config = ConfigParser()
config.read('config.ini')
dataset_folder = config.get('DATA FOLDER', 'dataset')
images_folder = config.get('DATA FOLDER', 'images')
latex_folder = config.get('DATA FOLDER', 'latex')
preprocessed_folder = config.get('DATA FOLDER', 'preprocessed')
data_index = config.getint('PLOT', 'data_index')
matplotlib.rcParams.update({'font.size': 20})
x_data = pickle.load(open(f'{preprocessed_folder}/x_test.p', 'rb'))
x_test = pad_sequences(x_data[data_index:data_index+1], maxlen=512,
dtype='float', padding='post')
y_test = np.array(pickle.load(open(f'{preprocessed_folder}/y_test.p',
'rb')))[data_index:data_index+1]
model = load_model('models/deepcnn.hdf5')
attributions, attributions_summed = create_explanations(model, x_test,
y_test)
for p,l in opts:
if p in ("-p"):
for key in attributions.keys():
plot_explanation(np.transpose(attributions[key][0]), key, '2d',
images_folder)
plot_explanation(attributions_summed[key][0], key, '1d',
images_folder)
elif l in ("-l"):
latex = dict()
for key in attributions_summed.keys():
latex[key] = text_to_latex(find_line(f'{dataset_folder}/imdb_master.csv',
data_index), attributions_summed[key][0]/np.max(np.abs(attributions_summed[key][0])))
with open(f'{latex_folder}/{key}_{data_index}.tex', 'w') as tex_file:
tex_file.write(latex[key])
print(f'wrote latex file {latex_folder}/{key}_{data_index}.tex')
else:
sys.exit()
if __name__ == "__main__":
main()