Commit 66cff740 by Pamela Osuna
parents 08490059 bb0354f0
...@@ -49,7 +49,8 @@ def plot_confusion_matrix(cm, ...@@ -49,7 +49,8 @@ def plot_confusion_matrix(cm,
if normalize: if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(8, 6)) fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot()
plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title) plt.title(title)
plt.colorbar() plt.colorbar()
...@@ -88,5 +89,10 @@ def plot_confusion_matrix(cm, ...@@ -88,5 +89,10 @@ def plot_confusion_matrix(cm,
plt.ylabel('True label') plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass)) plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
#plt.show() #plt.show()
plt.savefig("confusion_matrix") # Added labels
labels = ['']*(2*len(target_names))
labels[::2]=target_names
ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
plt.savefig("confusion_matrix.pdf")
plt.close() plt.close()
import cnn from cnn import CNN_Antifrag
from parser import parse_data from parser import parse_data
import itertools import itertools
from confusion_matrix import plot_confusion_matrix from confusion_matrix import plot_confusion_matrix
""" """
(c,b,e) will be read from the command line or a script (c,b,e,o) will be read from the command line or a script
(c,b,e) corresponds to the combinations of the specific hyperparameters to build the model (c,b,e,o) corresponds to the combinations of the specific hyperparameters
to build the model
c belongs to {0,1,2,3} and represents the layer architecture c belongs to {0,1,2,3} and represents the layer architecture
b belongs to {0,1} and represents the batch size b belongs to {0,1} and represents the batch size
e belongs to {0,1} and represents the number of epochs e belongs to {0,1} and represents the number of epochs
o belongs to {0,1,2} and represents the balancing method
""" """
#reading arguments from command line
#c = int(sys.argv[1])
#b = int(sys.argv[2])
#e = int(sys.argv[3])
c_ = [0,1,2,3] c_ = [0,1,2,3]
b_ = [0,1] b_ = [1]
e_ = [0,1] e_ = [0]
n_experiences = 10001 o_ = [0,1,2]
combinations = itertools.product(c_,b_,e_)
n_experiences = 100
combinations = itertools.product(c_,b_,e_,o_)
#parse the data
input_, output_ = parse_data(n_experiences)
#run an specific combination #parse the data
max_params = (0,0,0) input_, output_ = parse_data(n_experiences,kind='linear')
#%%
max_avg_auc = 0 max_avg_auc = 0
for params in combinations: for params in combinations:
avg_acc, avg_auc, X_train_kfold, X_test_kfold, y_train_kfold, y_test_kfold = cnn.run_nn(input_, output_, n_experiences, params) cnn = CNN_Antifrag(name="CNN_%d_%d_%d_%d"%params)
avg_acc, avg_auc = cnn.run_nn(input_, output_, params)
if avg_auc > max_avg_auc: if avg_auc > max_avg_auc:
max_avg_auc = avg_auc max_avg_auc = avg_auc
max_params = params max_params = params
X_train_kfold_opt = X_train_kfold
X_test_kfold_opt = X_test_kfold #%%
y_train_kfold_opt = y_train_kfold print("Best params:",max_params)
y_test_kfold_opt = y_test_kfold
# once we have chosen the optimal parameters we can do the normal kfold # once we have chosen the optimal parameters we can do the normal kfold
cnn = CNN_Antifrag(name="CNN_%d_%d_%d_%d"%max_params)
#note: the test data remains unbalanced acc, auc, cm, pr = cnn.run_kfold(input_, output_, max_params)
acc, auc, cm, pr = cnn.run_kfold(X_train_kfold_opt, X_test_kfold_opt, y_train_kfold_opt, y_test_kfold_opt, max_params)
#to add: precision recall curve #to add: precision recall curve
#%%
labels = ['~robust&~evolvable', 'evolvable&~robust', 'robust&~evolvable', 'robust&evolvable'] labels = [
plot_confusion_matrix(cm, labels) #this function saves the matrix image automatically '[~R & ~E]',
'[~R & E]',
f = open("acc_auc.txt", 'w+') '[ R & ~E]',
'[ R & E]'
]
#this function saves the matrix image automatically
plot_confusion_matrix(cm, labels)
f = open("out/acc_auc.txt", 'w+')
f.write("Average accuracy: " + str(acc)+"\n") f.write("Average accuracy: " + str(acc)+"\n")
f.write("Average area under the curve: " + str(auc)) f.write("Average area under the curve: " + str(auc))
f.close() f.close()
## TO DO: code that allows to execute in parallel, make sure it's the same random shuffle ...
...@@ -61,14 +61,23 @@ def model_architecture(c): ...@@ -61,14 +61,23 @@ def model_architecture(c):
model.add(Dropout(0.5)) model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax')) model.add(Dense(num_classes, activation='softmax'))
return model return model
def choose_batch_epochs(b,e): def choose_batch_epochs(b,e):
if b == 0 and e == 0: if b == 0 and e == 0:
return 16, 128 return 16, 12
if b == 0 and e == 1: if b == 0 and e == 1:
return 16, 512 return 16, 512
if b == 1 and e == 0: if b == 1 and e == 0:
return 64, 128 return 64, 12
if b == 1 and e == 1: if b == 1 and e == 1:
return 64, 512 return 64, 512
def choose_balancing_method(o):
if o == 0:
return 'smote'
elif o == 1:
return 'adasyn'
elif o == 2:
return 'class_weight'
\ No newline at end of file
def output_convert(N, e, r): from itertools import product
"""
output meaning: B = [0,1]
0 : not evolv. and not rob. converter = {(e,r):[i] for (e,r),i in zip(product(B,B),range(4))}
1 : evol. and not rob.
2 : not evol. and rob. def output_convert(e, r):
3 : evol. and rob. """
""" Encodes outputs as integers
output = []
if (e[0]): Parameters
if (r[0]): -----------
output.append(3) e : 1 evolvable, 0 not evolvable
else: output.append(1) r : 1 robust, 0 not robust
elif (r[0]):
output.append(2) Returns
else: output.append(0) -----------
return output the encoded output
output meaning:
0 : not evolv. and not rob.
1 : evol. and not rob.
2 : not evol. and rob.
3 : evol. and rob.
"""
return converter[r,e]
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import output_convert as oc import output_convert as oc
import os
from scipy.interpolate import interp1d as interp
def parse_data(n_experiences): def interpolate(y,K,kind='linear'):
N_DATA = n_experiences #from 2 to 1001 """
#DATA PARSING Interpolates vector x
X = []
Parameters
----------
y : list of real numbers
dependen variable values.
K : integer
samples to interpolate
Returns
-------
the interpolation values
"""
N = len(y)
x = np.arange(1/N,1+1/N,1/N)
f = interp(x,y,kind=kind,fill_value="extrapolate")
xintp = np.arange(1/K,1+1/K,1/K)
return f(xintp)
def parse_data(n_experiences,folder="data",samples=30,kind='linear'):
N_DATA = n_experiences #from 2 to 10001
input_ = [] input_ = []
output_ = [] output_ = []
N0 = 15 #15 or 20
#table of the name of the bionets #table of the name of the bionets
str_ = ["arabidopsis", "cardiac", "cd4", "mammalian", "metabolic", "anemia", "aurka", "b-cell", "body-drosophila", "bt474", "bt474-ErbB", "cycle-cdk", "fgf-drosophila", "gonadal", "hcc1954", "hcc1954-ErbB", "hh-drosophila", "l-arabinose-operon", "leukemia", "neurotransmitter", "oxidative-stress", "skbr-long", "skbr3-short", "spz-drosophila", "t-lgl-survival", "tol", "toll-drosophila", "trichostrongylus", "vegf-drosophila", "wg-drosophila", "yeast-cycle", "aspergillus-fumigatus", "budding-yeast", "gene-cardiac", "t-cell-differentiation", "lac-operon-bistability", "core-cell-cycle", "cortical"] str_ = [
'anemia',
'cd4',
'lac-operon',
'spz-drosophila',
'arabidopsis',
'core-cell-cycle',
'lac-operon-bistability',
't-cell-differentiation',
'aspergillus-fumigatus',
'cortical',
'l-arabinose-operon',
't-lgl-survival',
'aurka',
'cycle-cdk',
'leukemia',
'tol',
'b-cell',
'fgf-drosophila',
'mammalian',
'trichostrongylus',
'body-drosophila',
'gene-cardiac',
'metabolic',
'vegf-drosophila',
'bt474',
'gonadal',
'neurotransmitter',
'wg-drosophila',
'bt474-ErbB',
'hcc1954',
'oxidative-stress',
'yeast-cycle',
'budding-yeast',
'hcc1954-ErbB',
'skbr3-long',
'cardiac',
'hh-drosophila',
'skbr3-short'
]
for s in str_: for s in str_:
dataXi_original = pd.read_csv("updated_data/"+ s + "/" + s + "_metrics.csv", sep=",", header=0) data = pd.read_csv(
N = int(dataXi_original.loc[1, 'N']) os.path.join(folder,s,s + "_metrics.csv"), sep=",", header=0)
antifragility_original = list(np.array(dataXi_original.loc[:, 'Antifragility']).astype(float))
# 15 or 20 points describing the relationship between the original values of antifragility in the network before perturbations and X/N
X_tmp = list(np.arange(1,N+1, 1))
X_N = [X_tmp[i]/N for i in range(N)]
original_points = [np.interp(i/N0, X_N, antifragility_original) for i in range(1, N0+1)]
for i in range(1,N_DATA): # 30 points describing the relationship between the original
#read the data for each experience # values of antifragility in the network before perturbations and X/N
n = format(i, '09') before =interpolate(
dataXi_tmp = pd.read_csv("updated_data/"+ s + "/" + s + "_" + n + "_metrics.csv", sep=",", header=0) np.array(data.loc[:, 'Antifragility']).astype(float),
samples,
kind=kind)
for i in range(N_DATA):
#read the data for each experience
n = format(i+1, '09')
data = pd.read_csv(
os.path.join(folder,s,s + "_" + n + "_metrics.csv"),
sep=",", header=0)
antifragility_tmp = list(np.array(dataXi_tmp.loc[:, 'Antifragility']).astype(float)) #antifragility of the mutant
after = interpolate(
np.array(data.loc[:, 'Antifragility']).astype(float),
samples,
kind=kind)
# computes the difference of the antifragility curves
input_tmp = after-before
input_tmp = original_points + [np.interp(i/N0, X_N, antifragility_tmp) for i in range(1, N0+1)] evolvable_tmp = list(
np.array(data.loc[:, 'Evolvability']).astype(int))
robust_tmp = list(
np.array(data.loc[:, 'Robustness']).astype(int))
evolvable_tmp = list(np.array(dataXi_tmp.loc[:, 'Evolvability']).astype(int)) # even though evolvable_tmp and robust_tmp are vectors,
robust_tmp = list(np.array(dataXi_tmp.loc[:, 'Robustness']).astype(int)) # all entries are the same, we take the first
output_tmp = oc.output_convert(N, evolvable_tmp, robust_tmp) output_tmp = oc.output_convert(evolvable_tmp[0], robust_tmp[0])
input_.append(input_tmp) input_.append(input_tmp)
output_+= output_tmp output_+= output_tmp
input_ = np.array(input_) input_ = np.array(input_)
output_ = np.array(output_) output_ = np.array(output_)
......
...@@ -31,7 +31,7 @@ def plot_pr(recall, precision, average_precision): ...@@ -31,7 +31,7 @@ def plot_pr(recall, precision, average_precision):
plt.ylim([0.0, 1.05]) plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0]) plt.xlim([0.0, 1.0])
plt.title('Average precision score, micro-averaged over all classes: AP={0:0.2f}'.format(average_precision["micro"])) plt.title('Average precision score, micro-averaged over all classes: AP={0:0.2f}'.format(average_precision["micro"]))
plt.savefig("precision_recall_curve") plt.savefig("out/precision_recall_curve.pdf")
#plt.show() #plt.show()
plt.close() plt.close()
...@@ -67,4 +67,4 @@ def avg_pr(n_splits, num_classes, recs_k, precs_k, avgs_k): ...@@ -67,4 +67,4 @@ def avg_pr(n_splits, num_classes, recs_k, precs_k, avgs_k):
plt.ylim([0.0, 1.05]) plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0]) plt.xlim([0.0, 1.0])
plt.title('Average precision score, over class {0}: AP={1:0.2f}'.format(i, avg_prec[i])) plt.title('Average precision score, over class {0}: AP={1:0.2f}'.format(i, avg_prec[i]))
plt.savefig("pr_curve_class_" +str(i)) plt.savefig("out/pr_curve_class_" +str(i)+".pdf")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment