Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
antifragility
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ana Pamela Osuna Vargas
antifragility
Commits
bbce07cf
Commit
bbce07cf
authored
Jan 08, 2020
by
Pamela Osuna
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
one line per epoch + independent models
parent
628238b0
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
137 additions
and
140 deletions
+137
-140
cnn.py
+137
-140
No files found.
cnn.py
View file @
bbce07cf
...
...
@@ -7,7 +7,7 @@ from sklearn.metrics import confusion_matrix
import
numpy
as
np
from
sklearn.model_selection
import
train_test_split
from
imblearn.over_sampling
import
SMOTE
from
sklearn.model_selection
import
KFold
from
sklearn.model_selection
import
Stratified
KFold
from
tensorflow.keras.utils
import
to_categorical
...
...
@@ -18,189 +18,186 @@ N_CLASSES = 4
def
run_nn
(
input_
,
output_
,
n_experiences
,
params
):
c
,
b
,
e
=
params
c
,
b
,
e
=
params
#
kfold validation
"
"""
X for the input and y for the output
"""
#
kfold validation
"""
X for the input and y for the output
"""
kfold
=
KFold
(
N_SPLITS
,
True
,
1
)
#on definit la methode a utiliser en choisisant n_splits, shuffle on/off, random_state
skf
=
StratifiedKFold
(
N_SPLITS
)
#kfold = KFold(N_SPLITS, True, 1) #on definit la methode a utiliser en choisisant n_splits, shuffle on/off, random_state
X_train_kfold
=
[]
X_test_kfold
=
[]
y_train_kfold
=
[]
y_test_kfold
=
[]
X_train_kfold
=
[]
X_test_kfold
=
[]
y_train_kfold
=
[]
y_test_kfold
=
[]
#split the input data into k sets
#split the input data into k sets
for
train_index
,
test_index
in
kfold
.
split
(
input_
):
X_train_kfold
.
append
(
input_
[
train_index
])
X_test_kfold
.
append
(
input_
[
test_index
])
y_train_kfold
.
append
(
output_
[
train_index
])
y_test_kfold
.
append
(
output_
[
test_index
])
#for train_index, test_index in kfold.split(input_):
for
train_index
,
test_index
in
skf
.
split
(
input_
,
output_
):
X_train_kfold
.
append
(
input_
[
train_index
])
X_test_kfold
.
append
(
input_
[
test_index
])
y_train_kfold
.
append
(
output_
[
train_index
])
y_test_kfold
.
append
(
output_
[
test_index
])
#balancing the data
sm
=
SMOTE
(
random_state
=
2
)
for
i
in
range
(
len
(
X_train_kfold
)):
X_train_kfold
[
i
],
y_train_kfold
[
i
]
=
sm
.
fit_sample
(
X_train_kfold
[
i
],
y_train_kfold
[
i
]
.
ravel
())
#balancing the data
sm
=
SMOTE
(
random_state
=
2
)
for
i
in
range
(
len
(
X_train_kfold
)):
X_train_kfold
[
i
],
y_train_kfold
[
i
]
=
sm
.
fit_sample
(
X_train_kfold
[
i
],
y_train_kfold
[
i
]
.
ravel
())
# print(len(X_train_kfold[0])/(len(X_train_kfold[0])+len(X_test_kfold[0]))) #gives 0.8 OK
#build 4 sub-sub-sets out of each of the k subsets (we iterate the validation, taking it from the train set)
X_validation
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
X_train
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
y_validation
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
y_train
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
# print(len(X_train_kfold[0])/(len(X_train_kfold[0])+len(X_test_kfold[0]))) #gives 0.8 OK
#build 4 sub-sub-sets out of each of the k subsets (we iterate the validation, taking it from the train set)
X_validation
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
X_train
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
y_validation
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
y_train
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
len_validation
=
int
(
len
(
X_train_kfold
[
0
])
/
4
)
len_validation
=
int
(
len
(
X_train_kfold
[
0
])
/
(
N_SPLITS
)
)
for
i
in
range
(
N_SPLITS
):
idx
=
0
for
j
in
range
(
N_SPLITS
-
1
):
X_validation
[
i
][
j
]
=
X_train_kfold
[
i
][
idx
:
idx
+
len_validation
]
X_train
[
i
][
j
]
=
list
(
X_train_kfold
[
i
][
0
:
idx
])
+
list
(
X_train_kfold
[
i
][
idx
+
len_validation
:])
y_validation
[
i
][
j
]
=
y_train_kfold
[
i
][
idx
:
idx
+
len_validation
]
y_train
[
i
][
j
]
=
list
(
y_train_kfold
[
i
][
0
:
idx
])
+
list
(
y_train_kfold
[
i
][
idx
+
len_validation
:])
for
i
in
range
(
N_SPLITS
):
idx
=
0
for
j
in
range
(
N_SPLITS
-
1
):
X_validation
[
i
][
j
]
=
X_train_kfold
[
i
][
idx
:
idx
+
len_validation
]
X_train
[
i
][
j
]
=
list
(
X_train_kfold
[
i
][
0
:
idx
])
+
list
(
X_train_kfold
[
i
][
idx
+
len_validation
:])
y_validation
[
i
][
j
]
=
y_train_kfold
[
i
][
idx
:
idx
+
len_validation
]
y_train
[
i
][
j
]
=
list
(
y_train_kfold
[
i
][
0
:
idx
])
+
list
(
y_train_kfold
[
i
][
idx
+
len_validation
:])
idx
+=
len_validation
idx
+=
len_validation
#print(len(X_validation[0][0]), len(X_train[0][0])) #we expect X_validation[0] to be 1/3 of X_train's length
#print(len(X_validation[0][0]), len(X_train[0][0])) #we expect X_validation[0] to be 1/3 of X_train's length
validation_Y_one_hot
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
train_Y_one_hot
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
validation_Y_one_hot
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
train_Y_one_hot
=
[[
0
]
*
(
N_SPLITS
-
1
)
for
i
in
range
(
N_SPLITS
)]
for
i
in
range
(
N_SPLITS
):
for
j
in
range
(
N_SPLITS
-
1
):
# change the labels from categorical to one-hot encoding
train_Y_one_hot
[
i
][
j
]
=
to_categorical
(
y_train
[
i
][
j
],
num_classes
=
4
)
validation_Y_one_hot
[
i
][
j
]
=
to_categorical
(
y_validation
[
i
][
j
],
num_classes
=
4
)
for
i
in
range
(
N_SPLITS
):
for
j
in
range
(
N_SPLITS
-
1
):
# change the labels from categorical to one-hot encoding
train_Y_one_hot
[
i
][
j
]
=
to_categorical
(
y_train
[
i
][
j
],
num_classes
=
4
)
validation_Y_one_hot
[
i
][
j
]
=
to_categorical
(
y_validation
[
i
][
j
],
num_classes
=
4
)
#convert input to np.array
X_train
[
i
][
j
]
=
np
.
array
(
X_train
[
i
][
j
])
X_validation
[
i
][
j
]
=
np
.
array
(
X_validation
[
i
][
j
])
#convert input to np.array
X_train
[
i
][
j
]
=
np
.
array
(
X_train
[
i
][
j
])
X_validation
[
i
][
j
]
=
np
.
array
(
X_validation
[
i
][
j
])
#convert each element of the train and test set into a matrix of size 30x1(?)
X_train
[
i
][
j
]
=
X_train
[
i
][
j
]
.
reshape
(
-
1
,
30
,
1
)
X_validation
[
i
][
j
]
=
X_validation
[
i
][
j
]
.
reshape
(
-
1
,
30
,
1
)
#convert each element of the train and test set into a matrix of size 30x1(?)
X_train
[
i
][
j
]
=
X_train
[
i
][
j
]
.
reshape
(
-
1
,
30
,
1
)
X_validation
[
i
][
j
]
=
X_validation
[
i
][
j
]
.
reshape
(
-
1
,
30
,
1
)
#convert the data from an int8 format to a float32 type
X_train
[
i
][
j
]
=
X_train
[
i
][
j
]
.
astype
(
'float32'
)
X_validation
[
i
][
j
]
=
X_validation
[
i
][
j
]
.
astype
(
'float32'
)
#convert the data from an int8 format to a float32 type
X_train
[
i
][
j
]
=
X_train
[
i
][
j
]
.
astype
(
'float32'
)
X_validation
[
i
][
j
]
=
X_validation
[
i
][
j
]
.
astype
(
'float32'
)
# defining keras model
model
=
m
.
model_architecture
(
c
)
#compile the keras model
model
.
compile
(
loss
=
'categorical_crossentropy'
,
optimizer
=
'adam'
,
metrics
=
[
'accuracy'
])
#self reminder : warning! be careful not to use i and j as indexes later in here for something else
#self reminder : warning! be careful not to use i and j as indexes later in here for something else
#i: number of the test_set (i belongs to {0, ..., k-1})
#j: number of the validation_set (j belongs to {0, ..., k-2})
total_acc
=
0
total_auc
=
0
bs
,
ep
=
m
.
choose_batch_epochs
(
b
,
e
)
#i: number of the test_set (i belongs to {0, ..., k-1})
#j: number of the validation_set (j belongs to {0, ..., k-2})
total_acc
=
0
total_auc
=
0
bs
,
ep
=
m
.
choose_batch_epochs
(
b
,
e
)
for
i
in
range
(
N_SPLITS
):
for
j
in
range
(
N_SPLITS
-
1
):
#train the model
model
.
fit
(
X_train
[
i
][
j
],
train_Y_one_hot
[
i
][
j
],
batch_size
=
bs
,
epochs
=
ep
,
verbose
=
1
,
validation_data
=
(
X_validation
[
i
][
j
],
validation_Y_one_hot
[
i
][
j
]))
for
i
in
range
(
N_SPLITS
):
for
j
in
range
(
N_SPLITS
-
1
):
#defining keras model
model
=
m
.
model_architecture
(
c
)
#compile the keras model
model
.
compile
(
loss
=
'categorical_crossentropy'
,
optimizer
=
'adam'
,
metrics
=
[
'accuracy'
])
model
.
fit
(
X_train
[
i
][
j
],
train_Y_one_hot
[
i
][
j
],
batch_size
=
bs
,
epochs
=
ep
,
verbose
=
2
,
validation_data
=
(
X_validation
[
i
][
j
],
validation_Y_one_hot
[
i
][
j
]))
#calculate accuracy
_
,
accuracy
=
model
.
evaluate
(
X_validation
[
i
][
j
],
validation_Y_one_hot
[
i
][
j
],
verbose
=
0
)
total_acc
+=
accuracy
print
(
"t_set = "
+
str
(
i
)
+
" v_set = "
+
str
(
j
))
print
(
'Test accuracy:'
,
accuracy
)
#calculate accuracy
_
,
accuracy
=
model
.
evaluate
(
X_validation
[
i
][
j
],
validation_Y_one_hot
[
i
][
j
],
verbose
=
0
)
total_acc
+=
accuracy
print
(
"t_set = "
+
str
(
i
)
+
" v_set = "
+
str
(
j
))
print
(
'Test accuracy:'
,
accuracy
)
# calculate area under the curve and confu
y_pred
=
model
.
predict
(
X_validation
[
i
][
j
],
batch_size
=
bs
)
fpr
,
tpr
,
auc
=
ra
.
roc_auc
(
N_CLASSES
,
validation_Y_one_hot
[
i
][
j
],
y_pred
)
total_auc
+=
auc
print
(
"Area under the curve:"
,
auc
)
# calculate area under the curve and confu
y_pred
=
model
.
predict
(
X_validation
[
i
][
j
],
batch_size
=
bs
)
fpr
,
tpr
,
auc
=
ra
.
roc_auc
(
N_CLASSES
,
validation_Y_one_hot
[
i
][
j
],
y_pred
)
total_auc
+=
auc
print
(
"Area under the curve:"
,
auc
)
total_acc
=
total_acc
/
(
N_SPLITS
*
(
N_SPLITS
-
1
))
total_auc
=
total_auc
/
(
N_SPLITS
*
(
N_SPLITS
-
1
))
print
(
"Average accuracy: "
,
total_acc
)
print
(
"Average area under the curve: "
,
total_auc
)
total_acc
=
total_acc
/
(
N_SPLITS
*
(
N_SPLITS
-
1
))
total_auc
=
total_acc
/
(
N_SPLITS
*
(
N_SPLITS
-
1
))
print
(
"Average accuracy: "
,
total_acc
)
print
(
"Average area under the curve: "
,
total_auc
)
return
total_acc
,
total_auc
,
X_train_kfold
,
X_test_kfold
,
y_train_kfold
,
y_test_kfold
return
total_acc
,
total_auc
,
X_train_kfold
,
X_test_kfold
,
y_train_kfold
,
y_test_kfold
def
run_kfold
(
X_train
,
X_test
,
y_train
,
y_test
,
params
):
c
,
b
,
e
=
params
for
i
in
range
(
N_SPLITS
):
# change the labels from categorical to one-hot encoding
y_train
[
i
]
=
to_categorical
(
y_train
[
i
],
num_classes
=
4
)
y_test
[
i
]
=
to_categorical
(
y_test
[
i
],
num_classes
=
4
)
#convert input to np.array
X_train
[
i
]
=
np
.
array
(
X_train
[
i
])
X_test
[
i
]
=
np
.
array
(
X_test
[
i
])
c
,
b
,
e
=
params
#convert each element of the train and test set into a matrix of size 30x1(?)
X_train
[
i
]
=
X_train
[
i
]
.
reshape
(
-
1
,
30
,
1
)
X_test
[
i
]
=
X_test
[
i
]
.
reshape
(
-
1
,
30
,
1
)
for
i
in
range
(
N_SPLITS
):
# change the labels from categorical to one-hot encoding
y_train
[
i
]
=
to_categorical
(
y_train
[
i
],
num_classes
=
4
)
y_test
[
i
]
=
to_categorical
(
y_test
[
i
],
num_classes
=
4
)
#convert the data from an int8 format to a float32 type
X_train
[
i
]
=
X_train
[
i
]
.
astype
(
'float32'
)
X_test
[
i
]
=
X_test
[
i
]
.
astype
(
'float32'
)
#convert input to np.array
X_train
[
i
]
=
np
.
array
(
X_train
[
i
]
)
X_test
[
i
]
=
np
.
array
(
X_test
[
i
]
)
# defining keras model
model
=
m
.
model_architecture
(
c
)
#compile the keras model
model
.
compile
(
loss
=
'categorical_crossentropy'
,
optimizer
=
'adam'
,
metrics
=
[
'accuracy'
])
#convert each element of the train and test set into a matrix of size 30x1(?)
X_train
[
i
]
=
X_train
[
i
]
.
reshape
(
-
1
,
30
,
1
)
X_test
[
i
]
=
X_test
[
i
]
.
reshape
(
-
1
,
30
,
1
)
#convert the data from an int8 format to a float32 type
X_train
[
i
]
=
X_train
[
i
]
.
astype
(
'float32'
)
X_test
[
i
]
=
X_test
[
i
]
.
astype
(
'float32'
)
total_acc
=
0
total_auc
=
0
precs_k
=
[]
#it will contain the average pr curve for each class
recs_k
=
[]
avgs_k
=
[]
bs
,
ep
=
m
.
choose_batch_epochs
(
b
,
e
)
total_acc
=
0
total_auc
=
0
precs_k
=
[]
#it will contain the average pr curve for each class
recs_k
=
[]
avgs_k
=
[]
bs
,
ep
=
m
.
choose_batch_epochs
(
b
,
e
)
for
i
in
range
(
N_SPLITS
):
#train the model
model
.
fit
(
X_train
[
i
],
y_train
[
i
],
batch_size
=
bs
,
epochs
=
ep
,
verbose
=
1
,
validation_data
=
(
X_test
[
i
],
y_test
[
i
]))
for
i
in
range
(
N_SPLITS
):
model
=
m
.
model_architecture
(
c
)
#compile the keras model
model
.
compile
(
loss
=
'categorical_crossentropy'
,
optimizer
=
'adam'
,
metrics
=
[
'accuracy'
])
#train the model
model
.
fit
(
X_train
[
i
],
y_train
[
i
],
batch_size
=
bs
,
epochs
=
ep
,
verbose
=
1
,
validation_data
=
(
X_test
[
i
],
y_test
[
i
]))
#calculate accuracy
_
,
accuracy
=
model
.
evaluate
(
X_test
[
i
],
y_test
[
i
],
verbose
=
0
)
total_acc
+=
accuracy
print
(
"t_set = "
+
str
(
i
))
print
(
'Test accuracy:'
,
accuracy
)
#calculate accuracy
_
,
accuracy
=
model
.
evaluate
(
X_test
[
i
],
y_test
[
i
],
verbose
=
0
)
total_acc
+=
accuracy
print
(
"t_set = "
+
str
(
i
))
print
(
'Test accuracy:'
,
accuracy
)
# calculate area under the curve
y_pred
=
model
.
predict
(
X_test
[
i
],
batch_size
=
bs
)
fpr
,
tpr
,
auc
=
ra
.
roc_auc
(
N_CLASSES
,
y_test
[
i
],
y_pred
)
total_auc
+=
auc
print
(
"Area under the curve:"
,
auc
)
# calculate area under the curve
y_pred
=
model
.
predict
(
X_test
[
i
],
batch_size
=
bs
)
fpr
,
tpr
,
auc
=
ra
.
roc_auc
(
N_CLASSES
,
y_test
[
i
],
y_pred
)
total_auc
+=
auc
print
(
"Area under the curve:"
,
auc
)
# confusion matrix
if
i
==
0
:
cm
=
confusion_matrix
(
y_test
[
i
]
.
argmax
(
axis
=
1
),
y_pred
.
argmax
(
axis
=
1
))
else
:
cm
+=
confusion_matrix
(
y_test
[
i
]
.
argmax
(
axis
=
1
),
y_pred
.
argmax
(
axis
=
1
))
# confusion matrix
if
i
==
0
:
cm
=
confusion_matrix
(
y_test
[
i
]
.
argmax
(
axis
=
1
),
y_pred
.
argmax
(
axis
=
1
))
else
:
cm
+=
confusion_matrix
(
y_test
[
i
]
.
argmax
(
axis
=
1
),
y_pred
.
argmax
(
axis
=
1
))
#pr curve (contains 4 pr curves: one for each class)
recall
,
precision
,
average_prec
=
create_pr
(
N_CLASSES
,
y_test
[
i
],
y_pred
)
recs_k
.
append
(
recall
)
precs_k
.
append
(
precision
)
avgs_k
.
append
(
average_prec
)
#pr curve (contains 4 pr curves: one for each class)
recall
,
precision
,
average_prec
=
create_pr
(
N_CLASSES
,
y_test
[
i
],
y_pred
)
recs_k
.
append
(
recall
)
precs_k
.
append
(
precision
)
avgs_k
.
append
(
average_prec
)
#average of acc, auc, cm, pr
total_acc
=
total_acc
/
(
N_SPLITS
)
total_auc
=
total_auc
/
(
N_SPLITS
)
cm
=
cm
/
N_SPLITS
pr
=
avg_pr
(
N_SPLITS
,
N_CLASSES
,
recs_k
,
precs_k
,
avgs_k
)
#average of acc, auc, cm, pr
total_acc
=
total_acc
/
(
N_SPLITS
)
total_auc
=
total_auc
/
(
N_SPLITS
)
cm
=
cm
/
N_SPLITS
pr
=
avg_pr
(
N_SPLITS
,
N_CLASSES
,
recs_k
,
precs_k
,
avgs_k
)
print
(
"Average accuracy: "
,
total_acc
)
print
(
"Average area under the curve: "
,
total_auc
)
print
(
"Average accuracy: "
,
total_acc
)
print
(
"Average area under the curve: "
,
total_auc
)
return
total_acc
,
total_auc
,
cm
,
pr
return
total_acc
,
total_auc
,
cm
,
pr
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment