Commit 66cff740 by Pamela Osuna
parents 08490059 bb0354f0
......@@ -49,7 +49,8 @@ def plot_confusion_matrix(cm,
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(8, 6))
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot()
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
......@@ -88,5 +89,10 @@ def plot_confusion_matrix(cm,
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
#plt.show()
plt.savefig("confusion_matrix")
# Added labels
labels = ['']*(2*len(target_names))
labels[::2]=target_names
ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
plt.savefig("confusion_matrix.pdf")
plt.close()
import cnn
from cnn import CNN_Antifrag
from parser import parse_data
import itertools
from confusion_matrix import plot_confusion_matrix
"""
(c,b,e) will be read from the command line or a script
(c,b,e) corresponds to the combinations of the specific hyperparameters to build the model
(c,b,e,o) will be read from the command line or a script
(c,b,e,o) corresponds to the combinations of the specific hyperparameters
to build the model
c belongs to {0,1,2,3} and represents the layer architecture
b belongs to {0,1} and represents the batch size
e belongs to {0,1} and represents the number of epochs
o belongs to {0,1,2} and represents the balancing method
"""
#reading arguments from command line
#c = int(sys.argv[1])
#b = int(sys.argv[2])
#e = int(sys.argv[3])
c_ = [0,1,2,3]
b_ = [0,1]
e_ = [0,1]
n_experiences = 10001
combinations = itertools.product(c_,b_,e_)
b_ = [1]
e_ = [0]
o_ = [0,1,2]
n_experiences = 100
combinations = itertools.product(c_,b_,e_,o_)
#parse the data
input_, output_ = parse_data(n_experiences)
#run an specific combination
max_params = (0,0,0)
#parse the data
input_, output_ = parse_data(n_experiences,kind='linear')
#%%
max_avg_auc = 0
for params in combinations:
avg_acc, avg_auc, X_train_kfold, X_test_kfold, y_train_kfold, y_test_kfold = cnn.run_nn(input_, output_, n_experiences, params)
cnn = CNN_Antifrag(name="CNN_%d_%d_%d_%d"%params)
avg_acc, avg_auc = cnn.run_nn(input_, output_, params)
if avg_auc > max_avg_auc:
max_avg_auc = avg_auc
max_params = params
X_train_kfold_opt = X_train_kfold
X_test_kfold_opt = X_test_kfold
y_train_kfold_opt = y_train_kfold
y_test_kfold_opt = y_test_kfold
#%%
print("Best params:",max_params)
# once we have chosen the optimal parameters we can do the normal kfold
#note: the test data remains unbalanced
acc, auc, cm, pr = cnn.run_kfold(X_train_kfold_opt, X_test_kfold_opt, y_train_kfold_opt, y_test_kfold_opt, max_params)
cnn = CNN_Antifrag(name="CNN_%d_%d_%d_%d"%max_params)
acc, auc, cm, pr = cnn.run_kfold(input_, output_, max_params)
#to add: precision recall curve
labels = ['~robust&~evolvable', 'evolvable&~robust', 'robust&~evolvable', 'robust&evolvable']
plot_confusion_matrix(cm, labels) #this function saves the matrix image automatically
f = open("acc_auc.txt", 'w+')
#%%
labels = [
'[~R & ~E]',
'[~R & E]',
'[ R & ~E]',
'[ R & E]'
]
#this function saves the matrix image automatically
plot_confusion_matrix(cm, labels)
f = open("out/acc_auc.txt", 'w+')
f.write("Average accuracy: " + str(acc)+"\n")
f.write("Average area under the curve: " + str(auc))
f.close()
## TO DO: code that allows to execute in parallel, make sure it's the same random shuffle ...
......@@ -61,14 +61,23 @@ def model_architecture(c):
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
return model
def choose_batch_epochs(b,e):
if b == 0 and e == 0:
return 16, 128
return 16, 12
if b == 0 and e == 1:
return 16, 512
if b == 1 and e == 0:
return 64, 128
return 64, 12
if b == 1 and e == 1:
return 64, 512
def choose_balancing_method(o):
if o == 0:
return 'smote'
elif o == 1:
return 'adasyn'
elif o == 2:
return 'class_weight'
\ No newline at end of file
def output_convert(N, e, r):
from itertools import product
B = [0,1]
converter = {(e,r):[i] for (e,r),i in zip(product(B,B),range(4))}
def output_convert(e, r):
"""
Encodes outputs as integers
Parameters
-----------
e : 1 evolvable, 0 not evolvable
r : 1 robust, 0 not robust
Returns
-----------
the encoded output
output meaning:
0 : not evolv. and not rob.
1 : evol. and not rob.
2 : not evol. and rob.
3 : evol. and rob.
"""
output = []
if (e[0]):
if (r[0]):
output.append(3)
else: output.append(1)
elif (r[0]):
output.append(2)
else: output.append(0)
return output
return converter[r,e]
import numpy as np
import pandas as pd
import output_convert as oc
import os
from scipy.interpolate import interp1d as interp
def parse_data(n_experiences):
N_DATA = n_experiences #from 2 to 1001
#DATA PARSING
X = []
def interpolate(y,K,kind='linear'):
"""
Interpolates vector x
Parameters
----------
y : list of real numbers
dependen variable values.
K : integer
samples to interpolate
Returns
-------
the interpolation values
"""
N = len(y)
x = np.arange(1/N,1+1/N,1/N)
f = interp(x,y,kind=kind,fill_value="extrapolate")
xintp = np.arange(1/K,1+1/K,1/K)
return f(xintp)
def parse_data(n_experiences,folder="data",samples=30,kind='linear'):
N_DATA = n_experiences #from 2 to 10001
input_ = []
output_ = []
N0 = 15 #15 or 20
#table of the name of the bionets
str_ = ["arabidopsis", "cardiac", "cd4", "mammalian", "metabolic", "anemia", "aurka", "b-cell", "body-drosophila", "bt474", "bt474-ErbB", "cycle-cdk", "fgf-drosophila", "gonadal", "hcc1954", "hcc1954-ErbB", "hh-drosophila", "l-arabinose-operon", "leukemia", "neurotransmitter", "oxidative-stress", "skbr-long", "skbr3-short", "spz-drosophila", "t-lgl-survival", "tol", "toll-drosophila", "trichostrongylus", "vegf-drosophila", "wg-drosophila", "yeast-cycle", "aspergillus-fumigatus", "budding-yeast", "gene-cardiac", "t-cell-differentiation", "lac-operon-bistability", "core-cell-cycle", "cortical"]
str_ = [
'anemia',
'cd4',
'lac-operon',
'spz-drosophila',
'arabidopsis',
'core-cell-cycle',
'lac-operon-bistability',
't-cell-differentiation',
'aspergillus-fumigatus',
'cortical',
'l-arabinose-operon',
't-lgl-survival',
'aurka',
'cycle-cdk',
'leukemia',
'tol',
'b-cell',
'fgf-drosophila',
'mammalian',
'trichostrongylus',
'body-drosophila',
'gene-cardiac',
'metabolic',
'vegf-drosophila',
'bt474',
'gonadal',
'neurotransmitter',
'wg-drosophila',
'bt474-ErbB',
'hcc1954',
'oxidative-stress',
'yeast-cycle',
'budding-yeast',
'hcc1954-ErbB',
'skbr3-long',
'cardiac',
'hh-drosophila',
'skbr3-short'
]
for s in str_:
dataXi_original = pd.read_csv("updated_data/"+ s + "/" + s + "_metrics.csv", sep=",", header=0)
N = int(dataXi_original.loc[1, 'N'])
antifragility_original = list(np.array(dataXi_original.loc[:, 'Antifragility']).astype(float))
# 15 or 20 points describing the relationship between the original values of antifragility in the network before perturbations and X/N
X_tmp = list(np.arange(1,N+1, 1))
X_N = [X_tmp[i]/N for i in range(N)]
original_points = [np.interp(i/N0, X_N, antifragility_original) for i in range(1, N0+1)]
for i in range(1,N_DATA):
data = pd.read_csv(
os.path.join(folder,s,s + "_metrics.csv"), sep=",", header=0)
# 30 points describing the relationship between the original
# values of antifragility in the network before perturbations and X/N
before =interpolate(
np.array(data.loc[:, 'Antifragility']).astype(float),
samples,
kind=kind)
for i in range(N_DATA):
#read the data for each experience
n = format(i, '09')
dataXi_tmp = pd.read_csv("updated_data/"+ s + "/" + s + "_" + n + "_metrics.csv", sep=",", header=0)
n = format(i+1, '09')
data = pd.read_csv(
os.path.join(folder,s,s + "_" + n + "_metrics.csv"),
sep=",", header=0)
#antifragility of the mutant
after = interpolate(
np.array(data.loc[:, 'Antifragility']).astype(float),
samples,
kind=kind)
# computes the difference of the antifragility curves
input_tmp = after-before
antifragility_tmp = list(np.array(dataXi_tmp.loc[:, 'Antifragility']).astype(float))
evolvable_tmp = list(
np.array(data.loc[:, 'Evolvability']).astype(int))
input_tmp = original_points + [np.interp(i/N0, X_N, antifragility_tmp) for i in range(1, N0+1)]
robust_tmp = list(
np.array(data.loc[:, 'Robustness']).astype(int))
evolvable_tmp = list(np.array(dataXi_tmp.loc[:, 'Evolvability']).astype(int))
robust_tmp = list(np.array(dataXi_tmp.loc[:, 'Robustness']).astype(int))
output_tmp = oc.output_convert(N, evolvable_tmp, robust_tmp)
# even though evolvable_tmp and robust_tmp are vectors,
# all entries are the same, we take the first
output_tmp = oc.output_convert(evolvable_tmp[0], robust_tmp[0])
input_.append(input_tmp)
output_+= output_tmp
......
......@@ -31,7 +31,7 @@ def plot_pr(recall, precision, average_precision):
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Average precision score, micro-averaged over all classes: AP={0:0.2f}'.format(average_precision["micro"]))
plt.savefig("precision_recall_curve")
plt.savefig("out/precision_recall_curve.pdf")
#plt.show()
plt.close()
......@@ -67,4 +67,4 @@ def avg_pr(n_splits, num_classes, recs_k, precs_k, avgs_k):
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Average precision score, over class {0}: AP={1:0.2f}'.format(i, avg_prec[i]))
plt.savefig("pr_curve_class_" +str(i))
plt.savefig("out/pr_curve_class_" +str(i)+".pdf")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment