Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
antifragility
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ana Pamela Osuna Vargas
antifragility
Commits
05b4ddf8
Commit
05b4ddf8
authored
Jan 18, 2020
by
Stalin Munoz
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
k-fold refactoring, cubic interpolation, and adasyn
parent
4b274c88
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
364 additions
and
180 deletions
+364
-180
cnn.py
+225
-142
confusion_matrix.py
+8
-2
models.py
+12
-2
output_convert.py
+17
-10
parser.py
+100
-22
prec_recall.py
+2
-2
No files found.
cnn.py
View file @
05b4ddf8
import
models
as
m
import
models
as
m
import
roc_auc
as
ra
import
roc_auc
as
ra
from
matplotlib.pyplot
\
import
figure
,
plot
,
title
,
ylabel
,
xlabel
,
legend
,
savefig
,
ioff
from
numpy
import
expand_dims
as
dims
from
numpy
import
unique
from
prec_recall
import
create_pr
,
avg_pr
from
prec_recall
import
create_pr
,
avg_pr
import
sys
from
sklearn.metrics
import
confusion_matrix
from
sklearn.metrics
import
confusion_matrix
import
numpy
as
np
from
sklearn.model_selection
import
train_test_split
from
imblearn.over_sampling
import
SMOTE
from
sklearn.model_selection
import
StratifiedKFold
from
sklearn.model_selection
import
StratifiedKFold
from
sklearn.utils
import
class_weight
from
imblearn.over_sampling
import
SMOTE
,
ADASYN
from
tensorflow.keras.utils
import
to_categorical
from
tensorflow.keras.utils
import
to_categorical
def
encode
(
output
,
N
=
4
):
"""
One hot encoding of the input
## global variable
s
Parameter
s
N_SPLITS
=
5
# for the kfold
----------
N_CLASSES
=
4
output : numpy array
vector with the target outputs.
def
run_nn
(
input_
,
output_
,
n_experiences
,
params
):
Returns
-------
numpy array
matrix with the one hot encoding of the outputs.
c
,
b
,
e
=
params
"""
return
to_categorical
(
output
,
N
)
def
balance
(
X
,
y
,
method
):
"""
Balances the training data
Parameters
----------
X : numpy array
inputs.
y : numpy array
outputs.
method : str
any of 'smote','adasyn','class_weight'
Returns
-------
numpy array
balanced input.
numpy array
balanced output.
numpy array
class weights. Only present for the
'class_weight' method
# kfold validation
"""
"""
X for the input and y for the output
if
method
==
'smote'
:
print
(
'SMOTE'
)
smote
=
SMOTE
(
random_state
=
0xAAAA
)
return
smote
.
fit_resample
(
X
,
y
),
None
elif
method
==
'adasyn'
:
print
(
'ADASYN'
)
adasyn
=
ADASYN
(
random_state
=
0xAAAA
)
return
adasyn
.
fit_resample
(
X
,
y
),
None
elif
method
==
'class_weight'
:
print
(
'CLASS WEIGHTS'
)
weights
=
class_weight
.
compute_class_weight
(
'balanced'
,
unique
(
y
),
y
)
return
(
X
,
y
),
weights
class
CNN_Antifrag
:
"""
Convolutional Neural Network
For predicting Robustness and Evolvability
Based on antifragility estimations
"""
"""
skf
=
StratifiedKFold
(
N_SPLITS
)
def
__init__
(
self
,
name
=
'CNN'
,
K
=
5
,
N
=
4
):
#kfold = KFold(N_SPLITS, True, 1) #on definit la methode a utiliser en choisisant n_splits, shuffle on/off, random_state
"""
Creates a Convolutional Neural Network
X_train_kfold
=
[]
Modeling experiment
X_test_kfold
=
[]
y_train_kfold
=
[]
Parameters
y_test_kfold
=
[]
----------
name: str,optional
#split the input data into k sets
prefix for history files
The default is 'CNN'
#for train_index, test_index in kfold.split(input_):
K : int, optional
for
train_index
,
test_index
in
skf
.
split
(
input_
,
output_
):
Number of folds in the cross validation.
X_train_kfold
.
append
(
input_
[
train_index
])
The default is 5.
X_test_kfold
.
append
(
input_
[
test_index
])
N : int, optional
y_train_kfold
.
append
(
output_
[
train_index
])
Number of classes.
y_test_kfold
.
append
(
output_
[
test_index
])
The default is 4.
#balancing the data
Returns
sm
=
SMOTE
(
random_state
=
2
)
-------
for
i
in
range
(
len
(
X_train_kfold
)):
None.
X_train_kfold
[
i
],
y_train_kfold
[
i
]
=
sm
.
fit_sample
(
X_train_kfold
[
i
],
y_train_kfold
[
i
]
.
ravel
())
# print(len(X_train_kfold[0])/(len(X_train_kfold[0])+len(X_test_kfold[0]))) #gives 0.8 OK
#build 4 sub-sub-sets out of each of the k subsets (we iterate the validation, taking it from the train set)
X_validation
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
X_train
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
y_validation
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
y_train
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
len_validation
=
int
(
len
(
X_train_kfold
[
0
])
/
(
N_SPLITS
))
for
i
in
range
(
N_SPLITS
):
idx
=
0
for
j
in
range
(
N_SPLITS
-
1
):
X_validation
[
i
][
j
]
=
X_train_kfold
[
i
][
idx
:
idx
+
len_validation
]
X_train
[
i
][
j
]
=
list
(
X_train_kfold
[
i
][
0
:
idx
])
+
list
(
X_train_kfold
[
i
][
idx
+
len_validation
:])
y_validation
[
i
][
j
]
=
y_train_kfold
[
i
][
idx
:
idx
+
len_validation
]
y_train
[
i
][
j
]
=
list
(
y_train_kfold
[
i
][
0
:
idx
])
+
list
(
y_train_kfold
[
i
][
idx
+
len_validation
:])
idx
+=
len_validation
#print(len(X_validation[0][0]), len(X_train[0][0])) #we expect X_validation[0] to be 1/3 of X_train's length
validation_Y_one_hot
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
train_Y_one_hot
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
for
i
in
range
(
N_SPLITS
):
for
j
in
range
(
N_SPLITS
-
1
):
# change the labels from categorical to one-hot encoding
train_Y_one_hot
[
i
][
j
]
=
to_categorical
(
y_train
[
i
][
j
],
num_classes
=
4
)
validation_Y_one_hot
[
i
][
j
]
=
to_categorical
(
y_validation
[
i
][
j
],
num_classes
=
4
)
#convert input to np.array
"""
X_train
[
i
][
j
]
=
np
.
array
(
X_train
[
i
][
j
])
self
.
name
=
name
X_validation
[
i
][
j
]
=
np
.
array
(
X_validation
[
i
][
j
])
self
.
K
=
K
self
.
N
=
N
ioff
()
#convert each element of the train and test set into a matrix of size 30x1(?)
def
save_history_plots
(
self
,
history
,
outer
,
inner
=
None
,
name
=
None
):
X_train
[
i
][
j
]
=
X_train
[
i
][
j
]
.
reshape
(
-
1
,
30
,
1
)
"""
X_validation
[
i
][
j
]
=
X_validation
[
i
][
j
]
.
reshape
(
-
1
,
30
,
1
)
#convert the data from an int8 format to a float32 type
X_train
[
i
][
j
]
=
X_train
[
i
][
j
]
.
astype
(
'float32'
)
X_validation
[
i
][
j
]
=
X_validation
[
i
][
j
]
.
astype
(
'float32'
)
Parameters
----------
history : dictionary
Model fitting history.
inner : int
Index of validation set.
outer : int
Index of test set.
#self reminder : warning! be careful not to use i and j as indexes later in here for something else
Returns
-------
None.
#i: number of the test_set (i belongs to {0, ..., k-1})
"""
#j: number of the validation_set (j belongs to {0, ..., k-2})
name
=
name
if
name
else
self
.
name
total_acc
=
0
figure
()
total_auc
=
0
plot
(
history
.
history
[
'acc'
])
plot
(
history
.
history
[
'val_acc'
])
s1
=
'(inner fold =
%
d,outer fold=
%
d)'
%
(
inner
,
outer
)
\
if
inner
is
not
None
else
'(fold=
%
d)'
%
outer
title
(
'Model accuracy
%
s'
%
s1
)
ylabel
(
'Accuracy'
)
xlabel
(
'Epoch'
)
legend
([
'Training'
,
'Validation'
],
loc
=
'upper left'
)
s2
=
'
%
d_
%
d'
%
(
inner
,
outer
)
if
inner
is
not
None
else
'
%
d'
%
outer
savefig
(
'out/'
+
name
+
'_accuracy_
%
s.pdf'
%
s2
)
figure
()
plot
(
history
.
history
[
'loss'
])
plot
(
history
.
history
[
'val_loss'
])
title
(
'Model Loss
%
s'
%
s1
)
ylabel
(
'Cross entropy loss'
)
xlabel
(
'Epoch'
)
legend
([
'Training'
,
'Validation'
],
loc
=
'upper right'
)
savefig
(
'out/'
+
name
+
'_loss_
%
s.pdf'
%
s2
)
def
run_nn
(
self
,
X
,
y
,
params
):
c
,
b
,
e
,
o
=
params
bs
,
ep
=
m
.
choose_batch_epochs
(
b
,
e
)
bs
,
ep
=
m
.
choose_batch_epochs
(
b
,
e
)
o
=
m
.
choose_balancing_method
(
o
)
K
,
N
=
self
.
K
,
self
.
N
for
i
in
range
(
N_SPLITS
):
for
j
in
range
(
N_SPLITS
-
1
):
# random states are defined for reproducibility of results
#defining keras model
outer
=
StratifiedKFold
(
K
,
shuffle
=
True
,
random_state
=
0xBBBB
)
inner
=
StratifiedKFold
(
K
-
1
,
shuffle
=
True
,
random_state
=
0xCCCC
)
total_acc
,
total_auc
=
0
,
0
# outer loop splits test sets
# Test data is never used in training or cross validation
# therefore we use underscore to ignore indices
for
(
data_idx
,
_
),
i
in
zip
(
outer
.
split
(
X
,
y
),
range
(
K
)):
# balancing training and validation sets
(
X_D
,
y_D
),
weights
=
balance
(
X
[
data_idx
],
y
[
data_idx
],
o
)
# test set is left imbalanced, one hot encoding for output
# inner loop splits training and validation sets
for
(
train_idx
,
val_idx
),
j
in
zip
(
inner
.
split
(
X_D
,
y_D
),
range
(
K
-
1
)):
X_train
,
y_train
=
dims
(
X_D
[
train_idx
],
2
),
encode
(
y_D
[
train_idx
])
X_val
,
y_val
=
dims
(
X_D
[
val_idx
],
2
),
encode
(
y_D
[
val_idx
])
# creating a new instance of the architecture
model
=
m
.
model_architecture
(
c
)
model
=
m
.
model_architecture
(
c
)
#compile the keras model
# compile the keras model
model
.
compile
(
loss
=
'categorical_crossentropy'
,
optimizer
=
'adam'
,
metrics
=
[
'accuracy'
])
model
.
compile
(
loss
=
'categorical_crossentropy'
,
model
.
fit
(
X_train
[
i
][
j
],
train_Y_one_hot
[
i
][
j
],
batch_size
=
bs
,
epochs
=
ep
,
verbose
=
2
,
validation_data
=
(
X_validation
[
i
][
j
],
validation_Y_one_hot
[
i
][
j
]))
optimizer
=
'adam'
,
#calculate accuracy
metrics
=
[
'accuracy'
])
_
,
accuracy
=
model
.
evaluate
(
X_validation
[
i
][
j
],
validation_Y_one_hot
[
i
][
j
],
verbose
=
0
)
# model training, 3D expansion of the input required
# for convolutional layers
history
=
model
.
fit
(
X_train
,
y_train
,
batch_size
=
bs
,
epochs
=
ep
,
verbose
=
2
,
class_weight
=
weights
,
validation_data
=
(
X_val
,
y_val
))
# save history of accuracy and loss
self
.
save_history_plots
(
history
,
i
,
j
)
# calculate accuracy
_
,
accuracy
=
model
.
evaluate
(
X_val
,
y_val
,
verbose
=
0
)
total_acc
+=
accuracy
total_acc
+=
accuracy
print
(
"t_set = "
+
str
(
i
)
+
" v_set = "
+
str
(
j
))
print
(
"t_set = "
+
str
(
i
)
+
" v_set = "
+
str
(
j
))
print
(
'Test accuracy:'
,
accuracy
)
print
(
'Test accuracy:'
,
accuracy
)
# calculate area under the curve and confu
# calculate area under the curve
y_pred
=
model
.
predict
(
X_validation
[
i
][
j
]
,
batch_size
=
bs
)
y_pred
=
model
.
predict
(
X_val
,
batch_size
=
bs
)
fpr
,
tpr
,
auc
=
ra
.
roc_auc
(
N_CLASSES
,
validation_Y_one_hot
[
i
][
j
]
,
y_pred
)
fpr
,
tpr
,
auc
=
ra
.
roc_auc
(
N
,
y_val
,
y_pred
)
total_auc
+=
auc
total_auc
+=
auc
print
(
"Area under the curve:"
,
auc
)
print
(
"Area under the curve:"
,
auc
)
total_acc
=
total_acc
/
(
N_SPLITS
*
(
N_SPLITS
-
1
))
total_acc
=
total_acc
/
(
K
*
(
K
-
1
))
total_auc
=
total_auc
/
(
N_SPLITS
*
(
N_SPLITS
-
1
))
total_auc
=
total_auc
/
(
K
*
(
K
-
1
))
print
(
"Average accuracy: "
,
total_acc
)
print
(
"Average accuracy: "
,
total_acc
)
print
(
"Average area under the curve: "
,
total_auc
)
print
(
"Average area under the curve: "
,
total_auc
)
return
total_acc
,
total_auc
,
X_train_kfold
,
X_test_kfold
,
y_train_kfold
,
y_test_kfold
return
total_acc
,
total_auc
def
run_kfold
(
X_train
,
X_test
,
y_train
,
y_test
,
params
):
c
,
b
,
e
=
params
for
i
in
range
(
N_SPLITS
):
def
run_kfold
(
self
,
X
,
y
,
params
):
# change the labels from categorical to one-hot encoding
y_train
[
i
]
=
to_categorical
(
y_train
[
i
],
num_classes
=
4
)
y_test
[
i
]
=
to_categorical
(
y_test
[
i
],
num_classes
=
4
)
#convert input to np.array
c
,
b
,
e
,
o
=
params
X_train
[
i
]
=
np
.
array
(
X_train
[
i
])
X_test
[
i
]
=
np
.
array
(
X_test
[
i
])
#convert each element of the train and test set into a matrix of size 30x1(?)
X_train
[
i
]
=
X_train
[
i
]
.
reshape
(
-
1
,
30
,
1
)
X_test
[
i
]
=
X_test
[
i
]
.
reshape
(
-
1
,
30
,
1
)
#convert the data from an int8 format to a float32 type
X_train
[
i
]
=
X_train
[
i
]
.
astype
(
'float32'
)
X_test
[
i
]
=
X_test
[
i
]
.
astype
(
'float32'
)
total_acc
=
0
total_auc
=
0
precs_k
=
[]
#it will contain the average pr curve for each class
recs_k
=
[]
avgs_k
=
[]
bs
,
ep
=
m
.
choose_batch_epochs
(
b
,
e
)
bs
,
ep
=
m
.
choose_batch_epochs
(
b
,
e
)
o
=
m
.
choose_balancing_method
(
o
)
K
,
N
=
self
.
K
,
self
.
N
for
i
in
range
(
N_SPLITS
):
# random states are defined for reproducibility of results
kfold
=
StratifiedKFold
(
K
,
shuffle
=
True
,
random_state
=
0xBBBB
)
precs_k
,
recs_k
,
avgs_k
=
[],
[],
[]
total_acc
,
total_auc
=
0
,
0
# outer loop splits test sets
# Test data is never used in training or cross validation
# therefore we use underscore to ignore indices
for
(
train_idx
,
test_idx
),
i
in
zip
(
kfold
.
split
(
X
,
y
),
range
(
K
)):
# balancing training set
(
X_train
,
y_train
),
weights
=
balance
(
X
[
train_idx
],
y
[
train_idx
],
o
)
X_train
,
y_train
=
dims
(
X_train
,
2
),
encode
(
y_train
)
# test set is left imbalanced, one hot encoding for output
(
X_test
,
y_test
)
=
dims
(
X
[
test_idx
],
2
),
encode
(
y
[
test_idx
])
# creating a new instance of the architecture
model
=
m
.
model_architecture
(
c
)
model
=
m
.
model_architecture
(
c
)
#compile the keras model
# compile the keras model
model
.
compile
(
loss
=
'categorical_crossentropy'
,
optimizer
=
'adam'
,
metrics
=
[
'accuracy'
])
model
.
compile
(
loss
=
'categorical_crossentropy'
,
#train the model
optimizer
=
'adam'
,
model
.
fit
(
X_train
[
i
],
y_train
[
i
],
batch_size
=
bs
,
epochs
=
ep
,
verbose
=
1
,
validation_data
=
(
X_test
[
i
],
y_test
[
i
]))
metrics
=
[
'accuracy'
])
# model training, 3D expansion of the input required
#calculate accuracy
# for convolutional layers
_
,
accuracy
=
model
.
evaluate
(
X_test
[
i
],
y_test
[
i
],
verbose
=
0
)
history
=
model
.
fit
(
X_train
,
y_train
,
batch_size
=
bs
,
epochs
=
ep
,
verbose
=
2
,
class_weight
=
weights
,
validation_data
=
(
X_test
,
y_test
))
# save history of accuracy and loss
self
.
save_history_plots
(
history
,
i
,
name
=
'Eval_'
+
self
.
name
)
# calculate accuracy
_
,
accuracy
=
model
.
evaluate
(
X_test
,
y_test
,
verbose
=
0
)
total_acc
+=
accuracy
total_acc
+=
accuracy
print
(
"t_set
= "
+
str
(
i
))
print
(
"fold
= "
+
str
(
i
))
print
(
'Test accuracy:'
,
accuracy
)
print
(
'Test accuracy:'
,
accuracy
)
# calculate area under the curve
# calculate area under the curve
y_pred
=
model
.
predict
(
X_test
[
i
]
,
batch_size
=
bs
)
y_pred
=
model
.
predict
(
X_test
,
batch_size
=
bs
)
fpr
,
tpr
,
auc
=
ra
.
roc_auc
(
N_CLASSES
,
y_test
[
i
]
,
y_pred
)
fpr
,
tpr
,
auc
=
ra
.
roc_auc
(
N
,
y_test
,
y_pred
)
total_auc
+=
auc
total_auc
+=
auc
print
(
"Area under the curve:"
,
auc
)
print
(
"Area under the curve:"
,
auc
)
# confusion matrix
# confusion matrix
if
i
==
0
:
if
i
==
0
:
cm
=
confusion_matrix
(
y_test
[
i
]
.
argmax
(
axis
=
1
),
y_pred
.
argmax
(
axis
=
1
))
cm
=
confusion_matrix
(
y_test
.
argmax
(
axis
=
1
),
y_pred
.
argmax
(
axis
=
1
))
else
:
else
:
cm
+=
confusion_matrix
(
y_test
[
i
]
.
argmax
(
axis
=
1
),
y_pred
.
argmax
(
axis
=
1
))
cm
+=
confusion_matrix
(
y_test
.
argmax
(
axis
=
1
),
y_pred
.
argmax
(
axis
=
1
))
#pr curve (contains 4 pr curves: one for each class)
#pr curve (contains 4 pr curves: one for each class)
recall
,
precision
,
average_prec
=
create_pr
(
N_CLASSES
,
y_test
[
i
]
,
y_pred
)
recall
,
precision
,
average_prec
=
create_pr
(
N
,
y_test
,
y_pred
)
recs_k
.
append
(
recall
)
recs_k
.
append
(
recall
)
precs_k
.
append
(
precision
)
precs_k
.
append
(
precision
)
avgs_k
.
append
(
average_prec
)
avgs_k
.
append
(
average_prec
)
#average of acc, auc, cm, pr
total_acc
=
total_acc
/
K
total_acc
=
total_acc
/
(
N_SPLITS
)
total_auc
=
total_auc
/
K
total_auc
=
total_auc
/
(
N_SPLITS
)
cm
=
cm
/
K
cm
=
cm
/
N_SPLITS
pr
=
avg_pr
(
K
,
N
,
recs_k
,
precs_k
,
avgs_k
)
pr
=
avg_pr
(
N_SPLITS
,
N_CLASSES
,
recs_k
,
precs_k
,
avgs_k
)
print
(
"Average accuracy: "
,
total_acc
)
print
(
"Average accuracy: "
,
total_acc
)
print
(
"Average area under the curve: "
,
total_auc
)
print
(
"Average area under the curve: "
,
total_auc
)
return
total_acc
,
total_auc
,
cm
,
pr
return
total_acc
,
total_auc
,
cm
,
pr
confusion_matrix.py
View file @
05b4ddf8
...
@@ -49,7 +49,8 @@ def plot_confusion_matrix(cm,
...
@@ -49,7 +49,8 @@ def plot_confusion_matrix(cm,
if
normalize
:
if
normalize
:
cm
=
cm
.
astype
(
'float'
)
/
cm
.
sum
(
axis
=
1
)[:,
np
.
newaxis
]
cm
=
cm
.
astype
(
'float'
)
/
cm
.
sum
(
axis
=
1
)[:,
np
.
newaxis
]
plt
.
figure
(
figsize
=
(
8
,
6
))
fig
=
plt
.
figure
(
figsize
=
(
8
,
6
))
ax
=
fig
.
add_subplot
()
plt
.
imshow
(
cm
,
interpolation
=
'nearest'
,
cmap
=
cmap
)
plt
.
imshow
(
cm
,
interpolation
=
'nearest'
,
cmap
=
cmap
)
plt
.
title
(
title
)
plt
.
title
(
title
)
plt
.
colorbar
()
plt
.
colorbar
()
...
@@ -88,5 +89,10 @@ def plot_confusion_matrix(cm,
...
@@ -88,5 +89,10 @@ def plot_confusion_matrix(cm,
plt
.
ylabel
(
'True label'
)
plt
.
ylabel
(
'True label'
)
plt
.
xlabel
(
'Predicted label
\n
accuracy={:0.4f}; misclass={:0.4f}'
.
format
(
accuracy
,
misclass
))
plt
.
xlabel
(
'Predicted label
\n
accuracy={:0.4f}; misclass={:0.4f}'
.
format
(
accuracy
,
misclass
))
#plt.show()
#plt.show()
plt
.
savefig
(
"confusion_matrix"
)
# Added labels
labels
=
[
''
]
*
(
2
*
len
(
target_names
))
labels
[::
2
]
=
target_names
ax
.
set_xticklabels
([
''
]
+
labels
)
ax
.
set_yticklabels
([
''
]
+
labels
)
plt
.
savefig
(
"confusion_matrix.pdf"
)
plt
.
close
()
plt
.
close
()
models.py
View file @
05b4ddf8
...
@@ -61,14 +61,23 @@ def model_architecture(c):
...
@@ -61,14 +61,23 @@ def model_architecture(c):
model
.
add
(
Dropout
(
0.5
))
model
.
add
(
Dropout
(
0.5
))
model
.
add
(
Dense
(
num_classes
,
activation
=
'softmax'
))
model
.
add
(
Dense
(
num_classes
,
activation
=
'softmax'
))
return
model
return
model
def
choose_batch_epochs
(
b
,
e
):
def
choose_batch_epochs
(
b
,
e
):
if
b
==
0
and
e
==
0
:
if
b
==
0
and
e
==
0
:
return
16
,
12
8
return
16
,
12
if
b
==
0
and
e
==
1
:
if
b
==
0
and
e
==
1
:
return
16
,
512
return
16
,
512
if
b
==
1
and
e
==
0
:
if
b
==
1
and
e
==
0
:
return
64
,
12
8
return
64
,
12
if
b
==
1
and
e
==
1
:
if
b
==
1
and
e
==
1
:
return
64
,
512
return
64
,
512
def
choose_balancing_method
(
o
):
if
o
==
0
:
return
'smote'
elif
o
==
1
:
return
'adasyn'
elif
o
==
2
:
return
'class_weight'
\ No newline at end of file
output_convert.py
View file @
05b4ddf8
def
output_convert
(
N
,
e
,
r
):
from
itertools
import
product
B
=
[
0
,
1
]
converter
=
{(
e
,
r
):[
i
]
for
(
e
,
r
),
i
in
zip
(
product
(
B
,
B
),
range
(
4
))}
def
output_convert
(
e
,
r
):
"""
"""
Encodes outputs as integers
Parameters
-----------
e : 1 evolvable, 0 not evolvable
r : 1 robust, 0 not robust
Returns
-----------
the encoded output
output meaning:
output meaning:
0 : not evolv. and not rob.
0 : not evolv. and not rob.
1 : evol. and not rob.
1 : evol. and not rob.
2 : not evol. and rob.
2 : not evol. and rob.
3 : evol. and rob.
3 : evol. and rob.
"""
"""
output
=
[]
return
converter
[
r
,
e
]
if
(
e
[
0
]):
if
(
r
[
0
]):
output
.
append
(
3
)
else
:
output
.
append
(
1
)
elif
(
r
[
0
]):
output
.
append
(
2
)
else
:
output
.
append
(
0
)
return
output
parser.py
View file @
05b4ddf8
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
import
output_convert
as
oc
import
output_convert
as
oc
import
os
from
scipy.interpolate
import
interp1d
as
interp
def
parse_data
(
n_experiences
):
def
interpolate
(
y
,
K
,
kind
=
'linear'
):
N_DATA
=
n_experiences
#from 2 to 1001
"""
#DATA PARSING
Interpolates vector x
X
=
[]
Parameters
----------
y : list of real numbers
dependen variable values.
K : integer
samples to interpolate
Returns
-------
the interpolation values
"""
N
=
len
(
y
)
x
=
np
.
arange
(
1
/
N
,
1
+
1
/
N
,
1
/
N
)
f
=
interp
(
x
,
y
,
kind
=
kind
,
fill_value
=
"extrapolate"
)
xintp
=
np
.
arange
(
1
/
K
,
1
+
1
/
K
,
1
/
K
)
return
f
(
xintp
)
def
parse_data
(
n_experiences
,
folder
=
"data"
,
samples
=
30
,
kind
=
'linear'
):
N_DATA
=
n_experiences
#from 2 to 10001
input_
=
[]
input_
=
[]
output_
=
[]
output_
=
[]
N0
=
15
#15 or 20
#table of the name of the bionets
#table of the name of the bionets
str_
=
[
"arabidopsis"
,
"cardiac"
,
"cd4"
,
"mammalian"
,
"metabolic"
,
"anemia"
,
"aurka"
,
"b-cell"
,
"body-drosophila"
,
"bt474"
,
"bt474-ErbB"
,
"cycle-cdk"
,
"fgf-drosophila"
,
"gonadal"
,
"hcc1954"
,
"hcc1954-ErbB"
,
"hh-drosophila"
,
"l-arabinose-operon"
,
"leukemia"
,
"neurotransmitter"
,
"oxidative-stress"
,
"skbr-long"
,
"skbr3-short"
,
"spz-drosophila"
,
"t-lgl-survival"
,
"tol"
,
"toll-drosophila"
,
"trichostrongylus"
,
"vegf-drosophila"
,
"wg-drosophila"
,
"yeast-cycle"
,
"aspergillus-fumigatus"
,
"budding-yeast"
,
"gene-cardiac"
,
"t-cell-differentiation"
,
"lac-operon-bistability"
,
"core-cell-cycle"
,
"cortical"
]
str_
=
[
'anemia'
,
'cd4'
,
'lac-operon'
,
'spz-drosophila'
,
'arabidopsis'
,
'core-cell-cycle'
,
'lac-operon-bistability'
,
't-cell-differentiation'
,
'aspergillus-fumigatus'
,
'cortical'
,
'l-arabinose-operon'
,
't-lgl-survival'
,
'aurka'
,
'cycle-cdk'
,
'leukemia'
,
'tol'
,
'b-cell'
,
'fgf-drosophila'
,
'mammalian'
,
'trichostrongylus'
,
'body-drosophila'
,
'gene-cardiac'
,
'metabolic'
,
'vegf-drosophila'
,
'bt474'
,
'gonadal'
,
'neurotransmitter'
,
'wg-drosophila'
,
'bt474-ErbB'
,
'hcc1954'
,
'oxidative-stress'
,
'yeast-cycle'
,
'budding-yeast'
,
'hcc1954-ErbB'
,
'skbr3-long'
,
'cardiac'
,
'hh-drosophila'
,
'skbr3-short'
]
for
s
in
str_
:
for
s
in
str_
:
dataXi_original
=
pd
.
read_csv
(
"updated_data/"
+
s
+
"/"
+
s
+
"_metrics.csv"
,
sep
=
","
,
header
=
0
)
data
=
pd
.
read_csv
(
N
=
int
(
dataXi_original
.
loc
[
1
,
'N'
])
os
.
path
.
join
(
folder
,
s
,
s
+
"_metrics.csv"
),
sep
=
","
,
header
=
0
)
antifragility_original
=
list
(
np
.
array
(
dataXi_original
.
loc
[:,
'Antifragility'
])
.
astype
(
float
))
# 15 or 20 points describing the relationship between the original values of antifragility in the network before perturbations and X/N
# 30 points describing the relationship between the original
X_tmp
=
list
(
np
.
arange
(
1
,
N
+
1
,
1
))
# values of antifragility in the network before perturbations and X/N
X_N
=
[
X_tmp
[
i
]
/
N
for
i
in
range
(
N
)]
before
=
interpolate
(
original_points
=
[
np
.
interp
(
i
/
N0
,
X_N
,
antifragility_original
)
for
i
in
range
(
1
,
N0
+
1
)]
np
.
array
(
data
.
loc
[:,
'Antifragility'
])
.
astype
(
float
),
samples
,
for
i
in
range
(
1
,
N_DATA
):
kind
=
kind
)
for
i
in
range
(
N_DATA
):
#read the data for each experience
#read the data for each experience
n
=
format
(
i
,
'09'
)
n
=
format
(
i
+
1
,
'09'
)
dataXi_tmp
=
pd
.
read_csv
(
"updated_data/"
+
s
+
"/"
+
s
+
"_"
+
n
+
"_metrics.csv"
,
sep
=
","
,
header
=
0
)
data
=
pd
.
read_csv
(
os
.
path
.
join
(
folder
,
s
,
s
+
"_"
+
n
+
"_metrics.csv"
),
sep
=
","
,
header
=
0
)
#antifragility of the mutant
after
=
interpolate
(
np
.
array
(
data
.
loc
[:,
'Antifragility'
])
.
astype
(
float
),
samples
,
kind
=
kind
)
# computes the difference of the antifragility curves
input_tmp
=
after
-
before
antifragility_tmp
=
list
(
np
.
array
(
dataXi_tmp
.
loc
[:,
'Antifragility'
])
.
astype
(
float
))
evolvable_tmp
=
list
(
np
.
array
(
data
.
loc
[:,
'Evolvability'
])
.
astype
(
int
))
input_tmp
=
original_points
+
[
np
.
interp
(
i
/
N0
,
X_N
,
antifragility_tmp
)
for
i
in
range
(
1
,
N0
+
1
)]
robust_tmp
=
list
(
np
.
array
(
data
.
loc
[:,
'Robustness'
])
.
astype
(
int
))
evolvable_tmp
=
list
(
np
.
array
(
dataXi_tmp
.
loc
[:,
'Evolvability'
])
.
astype
(
int
))
# even though evolvable_tmp and robust_tmp are vectors,
robust_tmp
=
list
(
np
.
array
(
dataXi_tmp
.
loc
[:,
'Robustness'
])
.
astype
(
int
))
# all entries are the same, we take the first
output_tmp
=
oc
.
output_convert
(
N
,
evolvable_tmp
,
robust_tmp
)
output_tmp
=
oc
.
output_convert
(
evolvable_tmp
[
0
],
robust_tmp
[
0
]
)
input_
.
append
(
input_tmp
)
input_
.
append
(
input_tmp
)
output_
+=
output_tmp
output_
+=
output_tmp
...
...
prec_recall.py
View file @
05b4ddf8
...
@@ -31,7 +31,7 @@ def plot_pr(recall, precision, average_precision):
...
@@ -31,7 +31,7 @@ def plot_pr(recall, precision, average_precision):
plt
.
ylim
([
0.0
,
1.05
])
plt
.
ylim
([
0.0
,
1.05
])
plt
.
xlim
([
0.0
,
1.0
])
plt
.
xlim
([
0.0
,
1.0
])
plt
.
title
(
'Average precision score, micro-averaged over all classes: AP={0:0.2f}'
.
format
(
average_precision
[
"micro"
]))
plt
.
title
(
'Average precision score, micro-averaged over all classes: AP={0:0.2f}'
.
format
(
average_precision
[
"micro"
]))
plt
.
savefig
(
"
precision_recall_curve
"
)
plt
.
savefig
(
"
out/precision_recall_curve.pdf
"
)
#plt.show()
#plt.show()
plt
.
close
()
plt
.
close
()
...
@@ -67,4 +67,4 @@ def avg_pr(n_splits, num_classes, recs_k, precs_k, avgs_k):
...
@@ -67,4 +67,4 @@ def avg_pr(n_splits, num_classes, recs_k, precs_k, avgs_k):
plt
.
ylim
([
0.0
,
1.05
])
plt
.
ylim
([
0.0
,
1.05
])
plt
.
xlim
([
0.0
,
1.0
])
plt
.
xlim
([
0.0
,
1.0
])
plt
.
title
(
'Average precision score, over class {0}: AP={1:0.2f}'
.
format
(
i
,
avg_prec
[
i
]))
plt
.
title
(
'Average precision score, over class {0}: AP={1:0.2f}'
.
format
(
i
,
avg_prec
[
i
]))
plt
.
savefig
(
"
pr_curve_class_"
+
str
(
i
)
)
plt
.
savefig
(
"
out/pr_curve_class_"
+
str
(
i
)
+
".pdf"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment