Commit 5145eb4a by Pamela Osuna

new simple models

parent c22cd53d
{
"cells": [
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from imblearn.over_sampling import SMOTE\n",
"import collections\n",
"from sklearn.linear_model import LogisticRegression\n",
"import os\n",
"from scipy.interpolate import interp1d as interp"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"N_DATA = 201 #from 2 to 1001"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
" Encodes outputs as integers\n",
"\n",
" Parameters\n",
" -----------\n",
" e : 1 evolvable, 0 not evolvable\n",
" r : 1 robust, 0 not robust\n",
"\n",
" Returns\n",
" -----------\n",
" the encoded output\n",
" output meaning:\n",
" 0 : not evolv. and not rob.\n",
" 1 : evol. and not rob.\n",
" 2 : not evol. and rob.\n",
" 3 : evol. and rob.\n",
"\"\"\"\n",
"converter = {(0,0): 0, (1,0): 1, (0,1): 2, (1,1):3}"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"def parse_data(n_experiences,folder=\"data\",samples=15,kind='linear'): \n",
" input_ = []\n",
" output_ = []\n",
" N0 = samples\n",
" \n",
" #table of the name of the bionets\n",
" str_ = [\"arabidopsis\", \"cardiac\", \"cd4\", \"mammalian\", \"metabolic\", \"anemia\", \"aurka\", \"b-cell\", \"body-drosophila\", \"bt474\", \"bt474-ErbB\", \"cycle-cdk\", \"fgf-drosophila\", \"gonadal\", \"hcc1954\", \"hcc1954-ErbB\", \"hh-drosophila\", \"l-arabinose-operon\", \"leukemia\", \"neurotransmitter\", \"oxidative-stress\", \"skbr3-long\", \"skbr3-short\", \"spz-drosophila\", \"t-lgl-survival\", \"tol\", \"trichostrongylus\", \"vegf-drosophila\", \"wg-drosophila\", \"yeast-cycle\", \"aspergillus-fumigatus\", \"budding-yeast\", \"gene-cardiac\", \"t-cell-differentiation\", \"lac-operon-bistability\", \"core-cell-cycle\", \"cortical\"]\n",
"\n",
" for s in str_:\n",
" data = pd.read_csv(\n",
" os.path.join(folder,s,s + \"_metrics.csv\"), sep=\",\", header=0)\n",
" \n",
" N = int(data.loc[1, 'N'])\n",
" antifrag = list(np.array(data.loc[:, 'Antifragility']).astype(float))\n",
" \n",
" # 15 or 20 points describing the relationship between the original values of antifragility in the network before perturbations and X/N\n",
" X_tmp = list(np.arange(1,N+1, 1))\n",
" X_N = [X_tmp[i]/N for i in range(N)]\n",
" before = [np.interp(i/N0, X_N, antifrag) for i in range(1, N0+1)]\n",
"\n",
" for i in range(1, N_DATA):\n",
" #read the data for each experience\n",
" n = format(i, '09')\n",
" dataXi_tmp = pd.read_csv(\n",
" os.path.join(folder,s,s + \"_\" + n + \"_metrics.csv\"), \n",
" sep=\",\", header=0)\n",
"\n",
" antifrag_tmp = list(np.array(dataXi_tmp.loc[:, 'Antifragility']).astype(float))\n",
" \n",
" #antifragility of the mutant\n",
" after = [np.interp(i/N0, X_N, antifrag_tmp) for i in range(1, N0+1)]\n",
" \n",
" input_tmp = before + after\n",
"\n",
" ev_tmp = list(\n",
" np.array(dataXi_tmp.loc[:, 'Evolvability']).astype(int))\n",
" \n",
" rob_tmp = list(\n",
" np.array(dataXi_tmp.loc[:, 'Robustness']).astype(int))\n",
" \n",
" output_tmp = converter[(ev_tmp[0], rob_tmp[0])]\n",
"\n",
" input_.append(input_tmp)\n",
" output_.append(output_tmp)\n",
" \n",
"\n",
" input_ = np.array(input_)\n",
" output_ = np.array(output_)\n",
" \n",
" return input_, output_"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"input_, ouput_ = parse_data(100)"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test accuracy: 0.7873873873873873\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/anaconda3/envs/PythonCPU/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/opt/anaconda3/envs/PythonCPU/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n"
]
}
],
"source": [
"from tensorflow.keras.utils import to_categorical\n",
"\n",
"#balancing the data\n",
"#sm = SMOTE(random_state=2)\n",
"#input_, output_ = sm.fit_sample(input_, output_.ravel())\n",
"\n",
"#split the training data into one for training and another one for validation\n",
"X_train, X_test, y_train, y_test = train_test_split(input_, output_, test_size=0.3, random_state=0)\n",
"\n",
"\n",
"X_train = np.array(X_train)\n",
"X_test = np.array(X_test)\n",
"\n",
"#defining the model\n",
"\n",
"clf = LogisticRegression(random_state=0).fit(X_train, y_train)\n",
"acc = clf.score(X_test, y_test)\n",
"\n",
"print('Test accuracy:', acc)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense\n",
"from sklearn.model_selection import train_test_split\n",
"from imblearn.over_sampling import SMOTE\n",
"from tensorflow.keras.utils import to_categorical\n",
"import os\n",
"from scipy.interpolate import interp1d as interp"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"N_DATA = 100 "
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
" Encodes outputs as integers\n",
"\n",
" Parameters\n",
" -----------\n",
" e : 1 evolvable, 0 not evolvable\n",
" r : 1 robust, 0 not robust\n",
"\n",
" Returns\n",
" -----------\n",
" the encoded output\n",
" output meaning:\n",
" 0 : not evolv. and not rob.\n",
" 1 : evol. and not rob.\n",
" 2 : not evol. and rob.\n",
" 3 : evol. and rob.\n",
"\"\"\"\n",
"converter = {(0,0): 0, (1,0): 1, (0,1): 2, (1,1):3}"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"def parse_data(n_experiences,folder=\"data\",samples=15,kind='linear'): \n",
" input_ = []\n",
" output_ = []\n",
" N0 = samples\n",
" \n",
" #table of the name of the bionets\n",
" str_ = [\"arabidopsis\", \"cardiac\", \"cd4\", \"mammalian\", \"metabolic\", \"anemia\", \"aurka\", \"b-cell\", \"body-drosophila\", \"bt474\", \"bt474-ErbB\", \"cycle-cdk\", \"fgf-drosophila\", \"gonadal\", \"hcc1954\", \"hcc1954-ErbB\", \"hh-drosophila\", \"l-arabinose-operon\", \"leukemia\", \"neurotransmitter\", \"oxidative-stress\", \"skbr3-long\", \"skbr3-short\", \"spz-drosophila\", \"t-lgl-survival\", \"tol\", \"trichostrongylus\", \"vegf-drosophila\", \"wg-drosophila\", \"yeast-cycle\", \"aspergillus-fumigatus\", \"budding-yeast\", \"gene-cardiac\", \"t-cell-differentiation\", \"lac-operon-bistability\", \"core-cell-cycle\", \"cortical\"]\n",
"\n",
" for s in str_:\n",
" data = pd.read_csv(\n",
" os.path.join(folder,s,s + \"_metrics.csv\"), sep=\",\", header=0)\n",
" \n",
" N = int(data.loc[1, 'N'])\n",
" antifrag = list(np.array(data.loc[:, 'Antifragility']).astype(float))\n",
" \n",
" # 15 or 20 points describing the relationship between the original values of antifragility in the network before perturbations and X/N\n",
" X_tmp = list(np.arange(1,N+1, 1))\n",
" X_N = [X_tmp[i]/N for i in range(N)]\n",
" before = [np.interp(i/N0, X_N, antifrag) for i in range(1, N0+1)]\n",
"\n",
" for i in range(1, N_DATA):\n",
" #read the data for each experience\n",
" n = format(i, '09')\n",
" dataXi_tmp = pd.read_csv(\n",
" os.path.join(folder,s,s + \"_\" + n + \"_metrics.csv\"), \n",
" sep=\",\", header=0)\n",
"\n",
" antifrag_tmp = list(np.array(dataXi_tmp.loc[:, 'Antifragility']).astype(float))\n",
" \n",
" #antifragility of the mutant\n",
" after = [np.interp(i/N0, X_N, antifrag_tmp) for i in range(1, N0+1)]\n",
" \n",
" input_tmp = before + after\n",
"\n",
" ev_tmp = list(\n",
" np.array(dataXi_tmp.loc[:, 'Evolvability']).astype(int))\n",
" \n",
" rob_tmp = list(\n",
" np.array(dataXi_tmp.loc[:, 'Robustness']).astype(int))\n",
" \n",
" output_tmp = converter[(ev_tmp[0], rob_tmp[0])]\n",
"\n",
" input_.append(input_tmp)\n",
" output_.append(output_tmp)\n",
" \n",
"\n",
" input_ = np.array(input_)\n",
" output_ = np.array(output_)\n",
" \n",
" return input_, output_"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"input_, output_ = parse_data(100)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 2564 samples, validate on 1099 samples\n",
"Epoch 1/50\n",
"2564/2564 [==============================] - 2s 839us/sample - loss: 0.9360 - accuracy: 0.7761 - val_loss: 0.6754 - val_accuracy: 0.7780\n",
"Epoch 2/50\n",
"2564/2564 [==============================] - 1s 385us/sample - loss: 0.5802 - accuracy: 0.8081 - val_loss: 0.6172 - val_accuracy: 0.7780\n",
"Epoch 3/50\n",
"2564/2564 [==============================] - 1s 386us/sample - loss: 0.5506 - accuracy: 0.8081 - val_loss: 0.5958 - val_accuracy: 0.7780\n",
"Epoch 4/50\n",
"2564/2564 [==============================] - 1s 454us/sample - loss: 0.5369 - accuracy: 0.8081 - val_loss: 0.5928 - val_accuracy: 0.7780\n",
"Epoch 5/50\n",
"2564/2564 [==============================] - 1s 436us/sample - loss: 0.5265 - accuracy: 0.8081 - val_loss: 0.5752 - val_accuracy: 0.7780\n",
"Epoch 6/50\n",
"2564/2564 [==============================] - 1s 478us/sample - loss: 0.5176 - accuracy: 0.8081 - val_loss: 0.5674 - val_accuracy: 0.7780\n",
"Epoch 7/50\n",
"2564/2564 [==============================] - 1s 436us/sample - loss: 0.5107 - accuracy: 0.8081 - val_loss: 0.5597 - val_accuracy: 0.7780\n",
"Epoch 8/50\n",
"2564/2564 [==============================] - 1s 525us/sample - loss: 0.5063 - accuracy: 0.8081 - val_loss: 0.5564 - val_accuracy: 0.7789\n",
"Epoch 9/50\n",
"2564/2564 [==============================] - 1s 488us/sample - loss: 0.5008 - accuracy: 0.8089 - val_loss: 0.5484 - val_accuracy: 0.7825\n",
"Epoch 10/50\n",
"2564/2564 [==============================] - 1s 460us/sample - loss: 0.4964 - accuracy: 0.8120 - val_loss: 0.5445 - val_accuracy: 0.7843\n",
"Epoch 11/50\n",
"2564/2564 [==============================] - 1s 413us/sample - loss: 0.4917 - accuracy: 0.8140 - val_loss: 0.5425 - val_accuracy: 0.7807\n",
"Epoch 12/50\n",
"2564/2564 [==============================] - 1s 449us/sample - loss: 0.4886 - accuracy: 0.8128 - val_loss: 0.5351 - val_accuracy: 0.7807\n",
"Epoch 13/50\n",
"2564/2564 [==============================] - 1s 409us/sample - loss: 0.4843 - accuracy: 0.8144 - val_loss: 0.5316 - val_accuracy: 0.7816\n",
"Epoch 14/50\n",
"2564/2564 [==============================] - 1s 397us/sample - loss: 0.4825 - accuracy: 0.8151 - val_loss: 0.5261 - val_accuracy: 0.7825\n",
"Epoch 15/50\n",
"2564/2564 [==============================] - 1s 440us/sample - loss: 0.4799 - accuracy: 0.8179 - val_loss: 0.5255 - val_accuracy: 0.7825\n",
"Epoch 16/50\n",
"2564/2564 [==============================] - 1s 409us/sample - loss: 0.4759 - accuracy: 0.8237 - val_loss: 0.5282 - val_accuracy: 0.8044\n",
"Epoch 17/50\n",
"2564/2564 [==============================] - 1s 430us/sample - loss: 0.4755 - accuracy: 0.8268 - val_loss: 0.5157 - val_accuracy: 0.8107\n",
"Epoch 18/50\n",
"2564/2564 [==============================] - 1s 408us/sample - loss: 0.4712 - accuracy: 0.8288 - val_loss: 0.5120 - val_accuracy: 0.8089\n",
"Epoch 19/50\n",
"2564/2564 [==============================] - 1s 392us/sample - loss: 0.4699 - accuracy: 0.8323 - val_loss: 0.5095 - val_accuracy: 0.8089\n",
"Epoch 20/50\n",
"2564/2564 [==============================] - 1s 398us/sample - loss: 0.4663 - accuracy: 0.8319 - val_loss: 0.5105 - val_accuracy: 0.8116\n",
"Epoch 21/50\n",
"2564/2564 [==============================] - 1s 396us/sample - loss: 0.4642 - accuracy: 0.8331 - val_loss: 0.5060 - val_accuracy: 0.8089\n",
"Epoch 22/50\n",
"2564/2564 [==============================] - 1s 428us/sample - loss: 0.4625 - accuracy: 0.8323 - val_loss: 0.5014 - val_accuracy: 0.8089\n",
"Epoch 23/50\n",
"2564/2564 [==============================] - 1s 406us/sample - loss: 0.4605 - accuracy: 0.8319 - val_loss: 0.4979 - val_accuracy: 0.8089\n",
"Epoch 24/50\n",
"2564/2564 [==============================] - 1s 430us/sample - loss: 0.4597 - accuracy: 0.8319 - val_loss: 0.4978 - val_accuracy: 0.8089\n",
"Epoch 25/50\n",
"2564/2564 [==============================] - 1s 410us/sample - loss: 0.4588 - accuracy: 0.8327 - val_loss: 0.4955 - val_accuracy: 0.8089\n",
"Epoch 26/50\n",
"2564/2564 [==============================] - 1s 441us/sample - loss: 0.4574 - accuracy: 0.8327 - val_loss: 0.4922 - val_accuracy: 0.8126\n",
"Epoch 27/50\n",
"2564/2564 [==============================] - 1s 410us/sample - loss: 0.4555 - accuracy: 0.8319 - val_loss: 0.4947 - val_accuracy: 0.8089\n",
"Epoch 28/50\n",
"2564/2564 [==============================] - 1s 399us/sample - loss: 0.4532 - accuracy: 0.8327 - val_loss: 0.4876 - val_accuracy: 0.8126\n",
"Epoch 29/50\n",
"2564/2564 [==============================] - 1s 400us/sample - loss: 0.4535 - accuracy: 0.8315 - val_loss: 0.4853 - val_accuracy: 0.8089\n",
"Epoch 30/50\n",
"2564/2564 [==============================] - 1s 401us/sample - loss: 0.4491 - accuracy: 0.8331 - val_loss: 0.5016 - val_accuracy: 0.8116\n",
"Epoch 31/50\n",
"2564/2564 [==============================] - 1s 503us/sample - loss: 0.4498 - accuracy: 0.8327 - val_loss: 0.4873 - val_accuracy: 0.8089\n",
"Epoch 32/50\n",
"2564/2564 [==============================] - 1s 478us/sample - loss: 0.4474 - accuracy: 0.8331 - val_loss: 0.4935 - val_accuracy: 0.8089\n",
"Epoch 33/50\n",
"2564/2564 [==============================] - 1s 411us/sample - loss: 0.4472 - accuracy: 0.8327 - val_loss: 0.4792 - val_accuracy: 0.8089\n",
"Epoch 34/50\n",
"2564/2564 [==============================] - 1s 404us/sample - loss: 0.4449 - accuracy: 0.8335 - val_loss: 0.4806 - val_accuracy: 0.8089\n",
"Epoch 35/50\n",
"2564/2564 [==============================] - 1s 437us/sample - loss: 0.4437 - accuracy: 0.8342 - val_loss: 0.4740 - val_accuracy: 0.8098\n",
"Epoch 36/50\n",
"2564/2564 [==============================] - 1s 479us/sample - loss: 0.4432 - accuracy: 0.8319 - val_loss: 0.4765 - val_accuracy: 0.8107\n",
"Epoch 37/50\n",
"2564/2564 [==============================] - 1s 505us/sample - loss: 0.4421 - accuracy: 0.8323 - val_loss: 0.4709 - val_accuracy: 0.8098\n",
"Epoch 38/50\n",
"2564/2564 [==============================] - 1s 439us/sample - loss: 0.4402 - accuracy: 0.8327 - val_loss: 0.4692 - val_accuracy: 0.8107\n",
"Epoch 39/50\n",
"2564/2564 [==============================] - 1s 423us/sample - loss: 0.4393 - accuracy: 0.8319 - val_loss: 0.4712 - val_accuracy: 0.8107\n",
"Epoch 40/50\n",
"2564/2564 [==============================] - 1s 391us/sample - loss: 0.4391 - accuracy: 0.8331 - val_loss: 0.4677 - val_accuracy: 0.8098\n",
"Epoch 41/50\n",
"2564/2564 [==============================] - 1s 392us/sample - loss: 0.4374 - accuracy: 0.8327 - val_loss: 0.4650 - val_accuracy: 0.8098\n",
"Epoch 42/50\n",
"2564/2564 [==============================] - 1s 391us/sample - loss: 0.4348 - accuracy: 0.8323 - val_loss: 0.4628 - val_accuracy: 0.8116\n",
"Epoch 43/50\n",
"2564/2564 [==============================] - 1s 388us/sample - loss: 0.4347 - accuracy: 0.8323 - val_loss: 0.4680 - val_accuracy: 0.8089\n",
"Epoch 44/50\n",
"2564/2564 [==============================] - 1s 390us/sample - loss: 0.4359 - accuracy: 0.8327 - val_loss: 0.4604 - val_accuracy: 0.8107\n",
"Epoch 45/50\n",
"2564/2564 [==============================] - 1s 390us/sample - loss: 0.4328 - accuracy: 0.8319 - val_loss: 0.4602 - val_accuracy: 0.8135\n",
"Epoch 46/50\n",
"2564/2564 [==============================] - 1s 399us/sample - loss: 0.4311 - accuracy: 0.8350 - val_loss: 0.4570 - val_accuracy: 0.8098\n",
"Epoch 47/50\n",
"2564/2564 [==============================] - 1s 407us/sample - loss: 0.4313 - accuracy: 0.8323 - val_loss: 0.4584 - val_accuracy: 0.8126\n",
"Epoch 48/50\n",
"2564/2564 [==============================] - 1s 409us/sample - loss: 0.4314 - accuracy: 0.8342 - val_loss: 0.4544 - val_accuracy: 0.8126\n",
"Epoch 49/50\n",
"2564/2564 [==============================] - 1s 412us/sample - loss: 0.4294 - accuracy: 0.8331 - val_loss: 0.4555 - val_accuracy: 0.8098\n",
"Epoch 50/50\n",
"2564/2564 [==============================] - 1s 408us/sample - loss: 0.4271 - accuracy: 0.8339 - val_loss: 0.4615 - val_accuracy: 0.8144\n",
"Test accuracy: 0.8143767\n"
]
}
],
"source": [
"from tensorflow.keras.utils import to_categorical\n",
"#balancing the data\n",
"#sm = SMOTE(random_state=2)\n",
"#input_, output_ = sm.fit_sample(input_, output_.ravel())\n",
"\n",
"#split the training data into one for training and another one for validation\n",
"X_train, X_test, y_train, y_test = train_test_split(input_, output_, test_size=0.3, random_state=0)\n",
"\n",
"#change the labels from categorical to one-hot encoding\n",
"y_train = to_categorical(y_train)\n",
"y_test = to_categorical(y_test)\n",
"\n",
"##\n",
"X_train = np.array(X_train)\n",
"X_test = np.array(X_test)\n",
"\n",
"\n",
"#defining the keras model\n",
"model = Sequential()\n",
"\n",
"num_classes = 4\n",
"#network architecture\n",
"model.add(Dense(15, input_dim=30, activation='relu'))\n",
"model.add(Dense(8, activation='relu'))\n",
"model.add(Dense(num_classes, activation='softmax'))\n",
"\n",
"#compile the keras model\n",
"model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])\n",
"\n",
"#train the model\n",
"model.fit(X_train, y_train, batch_size = 10, epochs = 50, verbose = 1, validation_data = (X_test, y_test))\n",
"\n",
"#calculate accuracy\n",
"_,accuracy = model.evaluate(X_test, y_test, verbose = 0)\n",
"\n",
"print('Test accuracy:', accuracy)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment