show_loss_accurracy_plots[source]

show_loss_accurracy_plots(history)

Displays loss and accuracy plots for the input model history

Loss Accuracy Plots

We want to see if we reach criticality

history = pd.read_csv('../08_test/history_training.csv')
show_loss_accurracy_plots(history)
path = '../08_test/best_model.hdf5'
# You must be using tensorflow 2.3 or greater
criticality_network_load = load_model(path) #<----- The Model
corpora_test_x = np.load('../08_test/corpora_test_x.npy')
target_test_y = np.load('../08_test/target_test_y.npy')

evaluate_model[source]

evaluate_model(criticality_network_load, corpora_test_x, target_test_y)

Displays the given model's: loss, accuracy, Average prcision-recall and AUC for the given data.

Get Accuracy of Model

evaluate_model(criticality_network_load,corpora_test_x,target_test_y)
def clean_list(inp):
    out = []
    for i in inp:
        out.append(i[0])
    return out

def clean_input(inp):
    return tuple(clean_list(inp.tolist()))
def summarize(inp):
    total = 0
    for i in inp:
        # print(i)
        total+=i
    new = [total]
    return new

Shap Evaluations

from securereqnet.utils import Embeddings
import shap
model = criticality_network_load

Get Reverse Embeddings Mapping

embeddings = Embeddings()
embed_path = '../data/word_embeddings-embed_size_100-epochs_100.csv'
embeddings_dict = embeddings.get_embeddings_dict(embed_path)
reverse_embeddings = {}

for key, value in embeddings_dict.items():
    value = tuple(np.array(value, dtype='float32').tolist())
    # print(value)
    reverse_embeddings[value] = key

Calculate Shap Values for 200 Images

We need a background of 400 to calculate these 200 points over

background = corpora_test_x[np.random.choice(corpora_test_x.shape[0], 400, replace=False)]

# explain predictions of the model on four images
e = shap.DeepExplainer(model, background)
# ...or pass tensors directly
# e = shap.DeepExplainer((model.layers[0].input, model.layers[-1].output), background)
shap_values = e.shap_values(corpora_test_x[0:200])

Map Shap Values to Tokens

Using our reversed embeddings from earlier, we essentially undo the vectorization so we can map shap values to our tokens. Tokens are much more readable than vectors, and allow for easy human interpretation.

# (shap, string)
shaps = []

for doc in range(shap_values[0].shape[0]):
    for word in range(shap_values[0][doc].shape[0]):
        # grab the word
        try:
            string = reverse_embeddings[clean_input(corpora_test_x[doc, word])]
            shap_value = summarize(clean_list(shap_values[0][doc, word]))[0]
            shaps.append((shap_value, string))
        except KeyError as e:
            pass
shaps = sorted(shaps, key = lambda x: abs(x[0]), reverse=True)

Create Plot

Here we plot the top 25 shap values over the 200 data points and check their effects

import matplotlib.pyplot as plt
import math
import statistics
shap_vals = [] 
token = []
fig1 = plt.gcf()

data = {}

# Top 25 shap vals
uBound = 25
i = 0
while i < uBound:
    if(i < len(shaps)):
        curTok = shaps[i][1]
        curShap = shaps[i][0]
        if curTok in data.keys():
            data[curTok].append(curShap)
            uBound += 1
        else:
            data[curTok] = [curShap]
    i += 1

# get the rest
for i in range(len(shaps)):
    curTok = shaps[i][1]
    curShap = shaps[i][0]
    if curTok in data.keys():
        data[curTok].append(curShap)

for key in data.keys():
    for item in data[key]:
        shap_vals.append(item)
        token.append(key)
    
fig = plt.figure(figsize = (15, 10)) 

max_shap_val = max(shap_vals)
min_shap_val = min(shap_vals)
total_range = max_shap_val - min_shap_val
std_dev = statistics.stdev(shap_vals)
median = statistics.median(shap_vals)
mean = statistics.mean(shap_vals)

# define our gradient
# we want something less linear
redness = lambda x : math.sqrt(((x+abs(min_shap_val))/total_range) * 100) * 10 / 100
blueness = lambda x : 1 - redness(x)

# size as normal distribution 
size = lambda x : 500 * math.ceil(100 * ((1/(std_dev*math.sqrt(math.pi))*math.e)**(-1*((x-mean)**2)/(2*std_dev**2)))) / 100 + 35

plt.xlabel("Shap Value") 
plt.ylabel("token") 
plt.title("Shap Visualization for 200 Issues") 
plt.xlim([-1 * (max_shap_val + std_dev), max_shap_val + std_dev])
plt.gca().invert_yaxis()
# creating the bar plot 
plt.scatter(shap_vals, token, c = [(redness(x), 0, blueness(x)) for x in shap_vals], marker='.', s = [size(x) for x in shap_vals]) 
plt.savefig("../images/shap_200_issues_alpha.png", transparent=False)
plt.show()
<Figure size 432x288 with 0 Axes>