Commit bbce07cf by Pamela Osuna

one line per epoch + independent models

parent 628238b0
......@@ -7,7 +7,7 @@ from sklearn.metrics import confusion_matrix
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.utils import to_categorical
......@@ -18,189 +18,186 @@ N_CLASSES = 4
def run_nn(input_, output_, n_experiences, params):
c, b, e = params
c, b, e = params
#kfold validation
""""
X for the input and y for the output
"""
# kfold validation
"""
X for the input and y for the output
"""
kfold = KFold(N_SPLITS, True, 1) #on definit la methode a utiliser en choisisant n_splits, shuffle on/off, random_state
skf = StratifiedKFold(N_SPLITS)
#kfold = KFold(N_SPLITS, True, 1) #on definit la methode a utiliser en choisisant n_splits, shuffle on/off, random_state
X_train_kfold = []
X_test_kfold = []
y_train_kfold = []
y_test_kfold = []
X_train_kfold = []
X_test_kfold = []
y_train_kfold = []
y_test_kfold = []
#split the input data into k sets
#split the input data into k sets
for train_index, test_index in kfold.split(input_):
X_train_kfold.append(input_[train_index])
X_test_kfold.append(input_[test_index])
y_train_kfold.append(output_[train_index])
y_test_kfold.append(output_[test_index])
#for train_index, test_index in kfold.split(input_):
for train_index, test_index in skf.split(input_, output_):
X_train_kfold.append(input_[train_index])
X_test_kfold.append(input_[test_index])
y_train_kfold.append(output_[train_index])
y_test_kfold.append(output_[test_index])
#balancing the data
sm = SMOTE(random_state=2)
for i in range(len(X_train_kfold)):
X_train_kfold[i], y_train_kfold[i] = sm.fit_sample(X_train_kfold[i],y_train_kfold[i].ravel())
#balancing the data
sm = SMOTE(random_state=2)
for i in range(len(X_train_kfold)):
X_train_kfold[i], y_train_kfold[i] = sm.fit_sample(X_train_kfold[i],y_train_kfold[i].ravel())
# print(len(X_train_kfold[0])/(len(X_train_kfold[0])+len(X_test_kfold[0]))) #gives 0.8 OK
#build 4 sub-sub-sets out of each of the k subsets (we iterate the validation, taking it from the train set)
X_validation = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
X_train = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
y_validation = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
y_train = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
# print(len(X_train_kfold[0])/(len(X_train_kfold[0])+len(X_test_kfold[0]))) #gives 0.8 OK
#build 4 sub-sub-sets out of each of the k subsets (we iterate the validation, taking it from the train set)
X_validation = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
X_train = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
y_validation = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
y_train = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
len_validation = int(len(X_train_kfold[0])/4)
len_validation = int(len(X_train_kfold[0])/(N_SPLITS))
for i in range(N_SPLITS):
idx = 0
for j in range(N_SPLITS-1):
X_validation[i][j] = X_train_kfold[i][idx:idx+len_validation]
X_train[i][j] = list(X_train_kfold[i][0:idx]) + list(X_train_kfold[i][idx+len_validation:])
y_validation[i][j] = y_train_kfold[i][idx:idx+len_validation]
y_train[i][j] = list(y_train_kfold[i][0:idx]) + list(y_train_kfold[i][idx+len_validation:])
for i in range(N_SPLITS):
idx = 0
for j in range(N_SPLITS-1):
X_validation[i][j] = X_train_kfold[i][idx:idx+len_validation]
X_train[i][j] = list(X_train_kfold[i][0:idx]) + list(X_train_kfold[i][idx+len_validation:])
y_validation[i][j] = y_train_kfold[i][idx:idx+len_validation]
y_train[i][j] = list(y_train_kfold[i][0:idx]) + list(y_train_kfold[i][idx+len_validation:])
idx+=len_validation
idx+=len_validation
#print(len(X_validation[0][0]), len(X_train[0][0])) #we expect X_validation[0] to be 1/3 of X_train's length
#print(len(X_validation[0][0]), len(X_train[0][0])) #we expect X_validation[0] to be 1/3 of X_train's length
validation_Y_one_hot = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
train_Y_one_hot = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
validation_Y_one_hot = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
train_Y_one_hot = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
for i in range(N_SPLITS):
for j in range(N_SPLITS-1):
# change the labels from categorical to one-hot encoding
train_Y_one_hot[i][j] = to_categorical(y_train[i][j], num_classes = 4)
validation_Y_one_hot[i][j] = to_categorical(y_validation[i][j], num_classes = 4)
for i in range(N_SPLITS):
for j in range(N_SPLITS-1):
# change the labels from categorical to one-hot encoding
train_Y_one_hot[i][j] = to_categorical(y_train[i][j], num_classes = 4)
validation_Y_one_hot[i][j] = to_categorical(y_validation[i][j], num_classes = 4)
#convert input to np.array
X_train[i][j] = np.array(X_train[i][j])
X_validation[i][j] = np.array(X_validation[i][j])
#convert input to np.array
X_train[i][j] = np.array(X_train[i][j])
X_validation[i][j] = np.array(X_validation[i][j])
#convert each element of the train and test set into a matrix of size 30x1(?)
X_train[i][j] = X_train[i][j].reshape(-1, 30, 1)
X_validation[i][j] = X_validation[i][j].reshape(-1, 30, 1)
#convert each element of the train and test set into a matrix of size 30x1(?)
X_train[i][j] = X_train[i][j].reshape(-1, 30, 1)
X_validation[i][j] = X_validation[i][j].reshape(-1, 30, 1)
#convert the data from an int8 format to a float32 type
X_train[i][j] = X_train[i][j].astype('float32')
X_validation[i][j] = X_validation[i][j].astype('float32')
#convert the data from an int8 format to a float32 type
X_train[i][j] = X_train[i][j].astype('float32')
X_validation[i][j] = X_validation[i][j].astype('float32')
# defining keras model
model = m.model_architecture(c)
#compile the keras model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#self reminder : warning! be careful not to use i and j as indexes later in here for something else
#self reminder : warning! be careful not to use i and j as indexes later in here for something else
#i: number of the test_set (i belongs to {0, ..., k-1})
#j: number of the validation_set (j belongs to {0, ..., k-2})
total_acc = 0
total_auc = 0
bs, ep = m.choose_batch_epochs(b,e)
#i: number of the test_set (i belongs to {0, ..., k-1})
#j: number of the validation_set (j belongs to {0, ..., k-2})
total_acc = 0
total_auc = 0
bs, ep = m.choose_batch_epochs(b,e)
for i in range(N_SPLITS):
for j in range(N_SPLITS-1):
#train the model
model.fit(X_train[i][j], train_Y_one_hot[i][j], batch_size = bs, epochs = ep, verbose = 1, validation_data = (X_validation[i][j], validation_Y_one_hot[i][j]))
for i in range(N_SPLITS):
for j in range(N_SPLITS-1):
#defining keras model
model = m.model_architecture(c)
#compile the keras model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
model.fit(X_train[i][j], train_Y_one_hot[i][j], batch_size = bs, epochs = ep, verbose = 2, validation_data = (X_validation[i][j], validation_Y_one_hot[i][j]))
#calculate accuracy
_,accuracy = model.evaluate(X_validation[i][j], validation_Y_one_hot[i][j], verbose = 0)
total_acc += accuracy
print("t_set = " + str(i) + " v_set = " + str(j))
print('Test accuracy:', accuracy)
#calculate accuracy
_,accuracy = model.evaluate(X_validation[i][j], validation_Y_one_hot[i][j], verbose = 0)
total_acc += accuracy
print("t_set = " + str(i) + " v_set = " + str(j))
print('Test accuracy:', accuracy)
# calculate area under the curve and confu
y_pred = model.predict(X_validation[i][j], batch_size = bs)
fpr, tpr, auc = ra.roc_auc(N_CLASSES, validation_Y_one_hot[i][j], y_pred)
total_auc += auc
print("Area under the curve:", auc)
# calculate area under the curve and confu
y_pred = model.predict(X_validation[i][j], batch_size = bs)
fpr, tpr, auc = ra.roc_auc(N_CLASSES, validation_Y_one_hot[i][j], y_pred)
total_auc += auc
print("Area under the curve:", auc)
total_acc = total_acc/(N_SPLITS*(N_SPLITS-1))
total_auc = total_auc/(N_SPLITS*(N_SPLITS-1))
print("Average accuracy: ", total_acc)
print("Average area under the curve: ", total_auc)
total_acc = total_acc/(N_SPLITS*(N_SPLITS-1))
total_auc = total_acc/(N_SPLITS*(N_SPLITS-1))
print("Average accuracy: ", total_acc)
print("Average area under the curve: ", total_auc)
return total_acc, total_auc, X_train_kfold, X_test_kfold, y_train_kfold, y_test_kfold
return total_acc, total_auc, X_train_kfold, X_test_kfold, y_train_kfold, y_test_kfold
def run_kfold(X_train, X_test, y_train, y_test, params):
c, b, e = params
for i in range(N_SPLITS):
# change the labels from categorical to one-hot encoding
y_train[i] = to_categorical(y_train[i], num_classes = 4)
y_test[i] = to_categorical(y_test[i], num_classes = 4)
#convert input to np.array
X_train[i] = np.array(X_train[i])
X_test[i] = np.array(X_test[i])
c, b, e = params
#convert each element of the train and test set into a matrix of size 30x1(?)
X_train[i] = X_train[i].reshape(-1, 30, 1)
X_test[i] = X_test[i].reshape(-1, 30, 1)
for i in range(N_SPLITS):
# change the labels from categorical to one-hot encoding
y_train[i] = to_categorical(y_train[i], num_classes = 4)
y_test[i] = to_categorical(y_test[i], num_classes = 4)
#convert the data from an int8 format to a float32 type
X_train[i] = X_train[i].astype('float32')
X_test[i] = X_test[i].astype('float32')
#convert input to np.array
X_train[i] = np.array(X_train[i])
X_test[i] = np.array(X_test[i])
# defining keras model
model = m.model_architecture(c)
#compile the keras model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#convert each element of the train and test set into a matrix of size 30x1(?)
X_train[i] = X_train[i].reshape(-1, 30, 1)
X_test[i] = X_test[i].reshape(-1, 30, 1)
#convert the data from an int8 format to a float32 type
X_train[i] = X_train[i].astype('float32')
X_test[i] = X_test[i].astype('float32')
total_acc = 0
total_auc = 0
precs_k = [] #it will contain the average pr curve for each class
recs_k = []
avgs_k = []
bs, ep = m.choose_batch_epochs(b,e)
total_acc = 0
total_auc = 0
precs_k = [] #it will contain the average pr curve for each class
recs_k = []
avgs_k = []
bs, ep = m.choose_batch_epochs(b,e)
for i in range(N_SPLITS):
#train the model
model.fit(X_train[i], y_train[i], batch_size = bs, epochs = ep, verbose = 1, validation_data = (X_test[i], y_test[i]))
for i in range(N_SPLITS):
model = m.model_architecture(c)
#compile the keras model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#train the model
model.fit(X_train[i], y_train[i], batch_size = bs, epochs = ep, verbose = 1, validation_data = (X_test[i], y_test[i]))
#calculate accuracy
_,accuracy = model.evaluate(X_test[i], y_test[i], verbose = 0)
total_acc += accuracy
print("t_set = " + str(i))
print('Test accuracy:', accuracy)
#calculate accuracy
_,accuracy = model.evaluate(X_test[i], y_test[i], verbose = 0)
total_acc += accuracy
print("t_set = " + str(i))
print('Test accuracy:', accuracy)
# calculate area under the curve
y_pred = model.predict(X_test[i], batch_size = bs)
fpr, tpr, auc = ra.roc_auc(N_CLASSES, y_test[i], y_pred)
total_auc += auc
print("Area under the curve:", auc)
# calculate area under the curve
y_pred = model.predict(X_test[i], batch_size = bs)
fpr, tpr, auc = ra.roc_auc(N_CLASSES, y_test[i], y_pred)
total_auc += auc
print("Area under the curve:", auc)
# confusion matrix
if i == 0:
cm = confusion_matrix(y_test[i].argmax(axis=1), y_pred.argmax(axis=1))
else:
cm+=confusion_matrix(y_test[i].argmax(axis=1), y_pred.argmax(axis=1))
# confusion matrix
if i == 0:
cm = confusion_matrix(y_test[i].argmax(axis=1), y_pred.argmax(axis=1))
else:
cm+=confusion_matrix(y_test[i].argmax(axis=1), y_pred.argmax(axis=1))
#pr curve (contains 4 pr curves: one for each class)
recall, precision, average_prec = create_pr(N_CLASSES, y_test[i], y_pred)
recs_k.append(recall)
precs_k.append(precision)
avgs_k.append(average_prec)
#pr curve (contains 4 pr curves: one for each class)
recall, precision, average_prec = create_pr(N_CLASSES, y_test[i], y_pred)
recs_k.append(recall)
precs_k.append(precision)
avgs_k.append(average_prec)
#average of acc, auc, cm, pr
total_acc = total_acc/(N_SPLITS)
total_auc = total_auc/(N_SPLITS)
cm = cm/N_SPLITS
pr = avg_pr(N_SPLITS, N_CLASSES, recs_k, precs_k, avgs_k)
#average of acc, auc, cm, pr
total_acc = total_acc/(N_SPLITS)
total_auc = total_auc/(N_SPLITS)
cm = cm/N_SPLITS
pr = avg_pr(N_SPLITS, N_CLASSES, recs_k, precs_k, avgs_k)
print("Average accuracy: ", total_acc)
print("Average area under the curve: ", total_auc)
print("Average accuracy: ", total_acc)
print("Average area under the curve: ", total_auc)
return total_acc, total_auc, cm, pr
return total_acc, total_auc, cm, pr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment