Commit bbce07cf by Pamela Osuna

one line per epoch + independent models

parent 628238b0
...@@ -7,7 +7,7 @@ from sklearn.metrics import confusion_matrix ...@@ -7,7 +7,7 @@ from sklearn.metrics import confusion_matrix
import numpy as np import numpy as np
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE from imblearn.over_sampling import SMOTE
from sklearn.model_selection import KFold from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.utils import to_categorical from tensorflow.keras.utils import to_categorical
...@@ -18,189 +18,186 @@ N_CLASSES = 4 ...@@ -18,189 +18,186 @@ N_CLASSES = 4
def run_nn(input_, output_, n_experiences, params): def run_nn(input_, output_, n_experiences, params):
c, b, e = params c, b, e = params
#kfold validation # kfold validation
"""" """
X for the input and y for the output X for the input and y for the output
""" """
kfold = KFold(N_SPLITS, True, 1) #on definit la methode a utiliser en choisisant n_splits, shuffle on/off, random_state skf = StratifiedKFold(N_SPLITS)
#kfold = KFold(N_SPLITS, True, 1) #on definit la methode a utiliser en choisisant n_splits, shuffle on/off, random_state
X_train_kfold = [] X_train_kfold = []
X_test_kfold = [] X_test_kfold = []
y_train_kfold = [] y_train_kfold = []
y_test_kfold = [] y_test_kfold = []
#split the input data into k sets #split the input data into k sets
for train_index, test_index in kfold.split(input_): #for train_index, test_index in kfold.split(input_):
X_train_kfold.append(input_[train_index]) for train_index, test_index in skf.split(input_, output_):
X_test_kfold.append(input_[test_index]) X_train_kfold.append(input_[train_index])
y_train_kfold.append(output_[train_index]) X_test_kfold.append(input_[test_index])
y_test_kfold.append(output_[test_index]) y_train_kfold.append(output_[train_index])
y_test_kfold.append(output_[test_index])
#balancing the data #balancing the data
sm = SMOTE(random_state=2) sm = SMOTE(random_state=2)
for i in range(len(X_train_kfold)): for i in range(len(X_train_kfold)):
X_train_kfold[i], y_train_kfold[i] = sm.fit_sample(X_train_kfold[i],y_train_kfold[i].ravel()) X_train_kfold[i], y_train_kfold[i] = sm.fit_sample(X_train_kfold[i],y_train_kfold[i].ravel())
# print(len(X_train_kfold[0])/(len(X_train_kfold[0])+len(X_test_kfold[0]))) #gives 0.8 OK # print(len(X_train_kfold[0])/(len(X_train_kfold[0])+len(X_test_kfold[0]))) #gives 0.8 OK
#build 4 sub-sub-sets out of each of the k subsets (we iterate the validation, taking it from the train set) #build 4 sub-sub-sets out of each of the k subsets (we iterate the validation, taking it from the train set)
X_validation = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)] X_validation = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
X_train = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)] X_train = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
y_validation = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)] y_validation = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
y_train = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)] y_train = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
len_validation = int(len(X_train_kfold[0])/4) len_validation = int(len(X_train_kfold[0])/(N_SPLITS))
for i in range(N_SPLITS): for i in range(N_SPLITS):
idx = 0 idx = 0
for j in range(N_SPLITS-1): for j in range(N_SPLITS-1):
X_validation[i][j] = X_train_kfold[i][idx:idx+len_validation] X_validation[i][j] = X_train_kfold[i][idx:idx+len_validation]
X_train[i][j] = list(X_train_kfold[i][0:idx]) + list(X_train_kfold[i][idx+len_validation:]) X_train[i][j] = list(X_train_kfold[i][0:idx]) + list(X_train_kfold[i][idx+len_validation:])
y_validation[i][j] = y_train_kfold[i][idx:idx+len_validation] y_validation[i][j] = y_train_kfold[i][idx:idx+len_validation]
y_train[i][j] = list(y_train_kfold[i][0:idx]) + list(y_train_kfold[i][idx+len_validation:]) y_train[i][j] = list(y_train_kfold[i][0:idx]) + list(y_train_kfold[i][idx+len_validation:])
idx+=len_validation idx+=len_validation
#print(len(X_validation[0][0]), len(X_train[0][0])) #we expect X_validation[0] to be 1/3 of X_train's length #print(len(X_validation[0][0]), len(X_train[0][0])) #we expect X_validation[0] to be 1/3 of X_train's length
validation_Y_one_hot = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)] validation_Y_one_hot = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
train_Y_one_hot = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)] train_Y_one_hot = [[0]*(N_SPLITS-1) for i in range(N_SPLITS)]
for i in range(N_SPLITS): for i in range(N_SPLITS):
for j in range(N_SPLITS-1): for j in range(N_SPLITS-1):
# change the labels from categorical to one-hot encoding # change the labels from categorical to one-hot encoding
train_Y_one_hot[i][j] = to_categorical(y_train[i][j], num_classes = 4) train_Y_one_hot[i][j] = to_categorical(y_train[i][j], num_classes = 4)
validation_Y_one_hot[i][j] = to_categorical(y_validation[i][j], num_classes = 4) validation_Y_one_hot[i][j] = to_categorical(y_validation[i][j], num_classes = 4)
#convert input to np.array #convert input to np.array
X_train[i][j] = np.array(X_train[i][j]) X_train[i][j] = np.array(X_train[i][j])
X_validation[i][j] = np.array(X_validation[i][j]) X_validation[i][j] = np.array(X_validation[i][j])
#convert each element of the train and test set into a matrix of size 30x1(?) #convert each element of the train and test set into a matrix of size 30x1(?)
X_train[i][j] = X_train[i][j].reshape(-1, 30, 1) X_train[i][j] = X_train[i][j].reshape(-1, 30, 1)
X_validation[i][j] = X_validation[i][j].reshape(-1, 30, 1) X_validation[i][j] = X_validation[i][j].reshape(-1, 30, 1)
#convert the data from an int8 format to a float32 type #convert the data from an int8 format to a float32 type
X_train[i][j] = X_train[i][j].astype('float32') X_train[i][j] = X_train[i][j].astype('float32')
X_validation[i][j] = X_validation[i][j].astype('float32') X_validation[i][j] = X_validation[i][j].astype('float32')
# defining keras model
model = m.model_architecture(c)
#compile the keras model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#self reminder : warning! be careful not to use i and j as indexes later in here for something else #self reminder : warning! be careful not to use i and j as indexes later in here for something else
#i: number of the test_set (i belongs to {0, ..., k-1}) #i: number of the test_set (i belongs to {0, ..., k-1})
#j: number of the validation_set (j belongs to {0, ..., k-2}) #j: number of the validation_set (j belongs to {0, ..., k-2})
total_acc = 0 total_acc = 0
total_auc = 0 total_auc = 0
bs, ep = m.choose_batch_epochs(b,e) bs, ep = m.choose_batch_epochs(b,e)
for i in range(N_SPLITS): for i in range(N_SPLITS):
for j in range(N_SPLITS-1): for j in range(N_SPLITS-1):
#train the model #defining keras model
model.fit(X_train[i][j], train_Y_one_hot[i][j], batch_size = bs, epochs = ep, verbose = 1, validation_data = (X_validation[i][j], validation_Y_one_hot[i][j])) model = m.model_architecture(c)
#compile the keras model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
model.fit(X_train[i][j], train_Y_one_hot[i][j], batch_size = bs, epochs = ep, verbose = 2, validation_data = (X_validation[i][j], validation_Y_one_hot[i][j]))
#calculate accuracy
_,accuracy = model.evaluate(X_validation[i][j], validation_Y_one_hot[i][j], verbose = 0)
total_acc += accuracy
print("t_set = " + str(i) + " v_set = " + str(j))
print('Test accuracy:', accuracy)
#calculate accuracy
_,accuracy = model.evaluate(X_validation[i][j], validation_Y_one_hot[i][j], verbose = 0)
total_acc += accuracy
print("t_set = " + str(i) + " v_set = " + str(j))
print('Test accuracy:', accuracy)
# calculate area under the curve and confu
y_pred = model.predict(X_validation[i][j], batch_size = bs)
fpr, tpr, auc = ra.roc_auc(N_CLASSES, validation_Y_one_hot[i][j], y_pred)
total_auc += auc
print("Area under the curve:", auc)
# calculate area under the curve and confu
y_pred = model.predict(X_validation[i][j], batch_size = bs)
fpr, tpr, auc = ra.roc_auc(N_CLASSES, validation_Y_one_hot[i][j], y_pred)
total_auc += auc
print("Area under the curve:", auc)
total_acc = total_acc/(N_SPLITS*(N_SPLITS-1))
total_auc = total_auc/(N_SPLITS*(N_SPLITS-1))
print("Average accuracy: ", total_acc)
print("Average area under the curve: ", total_auc)
total_acc = total_acc/(N_SPLITS*(N_SPLITS-1)) return total_acc, total_auc, X_train_kfold, X_test_kfold, y_train_kfold, y_test_kfold
total_auc = total_acc/(N_SPLITS*(N_SPLITS-1))
print("Average accuracy: ", total_acc)
print("Average area under the curve: ", total_auc)
return total_acc, total_auc, X_train_kfold, X_test_kfold, y_train_kfold, y_test_kfold
def run_kfold(X_train, X_test, y_train, y_test, params): def run_kfold(X_train, X_test, y_train, y_test, params):
c, b, e = params c, b, e = params
for i in range(N_SPLITS):
# change the labels from categorical to one-hot encoding
y_train[i] = to_categorical(y_train[i], num_classes = 4)
y_test[i] = to_categorical(y_test[i], num_classes = 4)
#convert input to np.array
X_train[i] = np.array(X_train[i])
X_test[i] = np.array(X_test[i])
#convert each element of the train and test set into a matrix of size 30x1(?) for i in range(N_SPLITS):
X_train[i] = X_train[i].reshape(-1, 30, 1) # change the labels from categorical to one-hot encoding
X_test[i] = X_test[i].reshape(-1, 30, 1) y_train[i] = to_categorical(y_train[i], num_classes = 4)
y_test[i] = to_categorical(y_test[i], num_classes = 4)
#convert the data from an int8 format to a float32 type #convert input to np.array
X_train[i] = X_train[i].astype('float32') X_train[i] = np.array(X_train[i])
X_test[i] = X_test[i].astype('float32') X_test[i] = np.array(X_test[i])
# defining keras model #convert each element of the train and test set into a matrix of size 30x1(?)
model = m.model_architecture(c) X_train[i] = X_train[i].reshape(-1, 30, 1)
#compile the keras model X_test[i] = X_test[i].reshape(-1, 30, 1)
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#convert the data from an int8 format to a float32 type
X_train[i] = X_train[i].astype('float32')
X_test[i] = X_test[i].astype('float32')
total_acc = 0 total_acc = 0
total_auc = 0 total_auc = 0
precs_k = [] #it will contain the average pr curve for each class precs_k = [] #it will contain the average pr curve for each class
recs_k = [] recs_k = []
avgs_k = [] avgs_k = []
bs, ep = m.choose_batch_epochs(b,e) bs, ep = m.choose_batch_epochs(b,e)
for i in range(N_SPLITS): for i in range(N_SPLITS):
#train the model model = m.model_architecture(c)
model.fit(X_train[i], y_train[i], batch_size = bs, epochs = ep, verbose = 1, validation_data = (X_test[i], y_test[i])) #compile the keras model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#train the model
model.fit(X_train[i], y_train[i], batch_size = bs, epochs = ep, verbose = 1, validation_data = (X_test[i], y_test[i]))
#calculate accuracy #calculate accuracy
_,accuracy = model.evaluate(X_test[i], y_test[i], verbose = 0) _,accuracy = model.evaluate(X_test[i], y_test[i], verbose = 0)
total_acc += accuracy total_acc += accuracy
print("t_set = " + str(i)) print("t_set = " + str(i))
print('Test accuracy:', accuracy) print('Test accuracy:', accuracy)
# calculate area under the curve # calculate area under the curve
y_pred = model.predict(X_test[i], batch_size = bs) y_pred = model.predict(X_test[i], batch_size = bs)
fpr, tpr, auc = ra.roc_auc(N_CLASSES, y_test[i], y_pred) fpr, tpr, auc = ra.roc_auc(N_CLASSES, y_test[i], y_pred)
total_auc += auc total_auc += auc
print("Area under the curve:", auc) print("Area under the curve:", auc)
# confusion matrix # confusion matrix
if i == 0: if i == 0:
cm = confusion_matrix(y_test[i].argmax(axis=1), y_pred.argmax(axis=1)) cm = confusion_matrix(y_test[i].argmax(axis=1), y_pred.argmax(axis=1))
else: else:
cm+=confusion_matrix(y_test[i].argmax(axis=1), y_pred.argmax(axis=1)) cm+=confusion_matrix(y_test[i].argmax(axis=1), y_pred.argmax(axis=1))
#pr curve (contains 4 pr curves: one for each class) #pr curve (contains 4 pr curves: one for each class)
recall, precision, average_prec = create_pr(N_CLASSES, y_test[i], y_pred) recall, precision, average_prec = create_pr(N_CLASSES, y_test[i], y_pred)
recs_k.append(recall) recs_k.append(recall)
precs_k.append(precision) precs_k.append(precision)
avgs_k.append(average_prec) avgs_k.append(average_prec)
#average of acc, auc, cm, pr #average of acc, auc, cm, pr
total_acc = total_acc/(N_SPLITS) total_acc = total_acc/(N_SPLITS)
total_auc = total_auc/(N_SPLITS) total_auc = total_auc/(N_SPLITS)
cm = cm/N_SPLITS cm = cm/N_SPLITS
pr = avg_pr(N_SPLITS, N_CLASSES, recs_k, precs_k, avgs_k) pr = avg_pr(N_SPLITS, N_CLASSES, recs_k, precs_k, avgs_k)
print("Average accuracy: ", total_acc) print("Average accuracy: ", total_acc)
print("Average area under the curve: ", total_auc) print("Average area under the curve: ", total_auc)
return total_acc, total_auc, cm, pr return total_acc, total_auc, cm, pr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment