Commit 5145eb4a by Pamela Osuna

new simple models

parent c22cd53d
{
"cells": [
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from imblearn.over_sampling import SMOTE\n",
"import collections\n",
"from sklearn.linear_model import LogisticRegression\n",
"import os\n",
"from scipy.interpolate import interp1d as interp"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"N_DATA = 201 #from 2 to 1001"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
" Encodes outputs as integers\n",
"\n",
" Parameters\n",
" -----------\n",
" e : 1 evolvable, 0 not evolvable\n",
" r : 1 robust, 0 not robust\n",
"\n",
" Returns\n",
" -----------\n",
" the encoded output\n",
" output meaning:\n",
" 0 : not evolv. and not rob.\n",
" 1 : evol. and not rob.\n",
" 2 : not evol. and rob.\n",
" 3 : evol. and rob.\n",
"\"\"\"\n",
"converter = {(0,0): 0, (1,0): 1, (0,1): 2, (1,1):3}"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"def parse_data(n_experiences,folder=\"data\",samples=15,kind='linear'): \n",
" input_ = []\n",
" output_ = []\n",
" N0 = samples\n",
" \n",
" #table of the name of the bionets\n",
" str_ = [\"arabidopsis\", \"cardiac\", \"cd4\", \"mammalian\", \"metabolic\", \"anemia\", \"aurka\", \"b-cell\", \"body-drosophila\", \"bt474\", \"bt474-ErbB\", \"cycle-cdk\", \"fgf-drosophila\", \"gonadal\", \"hcc1954\", \"hcc1954-ErbB\", \"hh-drosophila\", \"l-arabinose-operon\", \"leukemia\", \"neurotransmitter\", \"oxidative-stress\", \"skbr3-long\", \"skbr3-short\", \"spz-drosophila\", \"t-lgl-survival\", \"tol\", \"trichostrongylus\", \"vegf-drosophila\", \"wg-drosophila\", \"yeast-cycle\", \"aspergillus-fumigatus\", \"budding-yeast\", \"gene-cardiac\", \"t-cell-differentiation\", \"lac-operon-bistability\", \"core-cell-cycle\", \"cortical\"]\n",
"\n",
" for s in str_:\n",
" data = pd.read_csv(\n",
" os.path.join(folder,s,s + \"_metrics.csv\"), sep=\",\", header=0)\n",
" \n",
" N = int(data.loc[1, 'N'])\n",
" antifrag = list(np.array(data.loc[:, 'Antifragility']).astype(float))\n",
" \n",
" # 15 or 20 points describing the relationship between the original values of antifragility in the network before perturbations and X/N\n",
" X_tmp = list(np.arange(1,N+1, 1))\n",
" X_N = [X_tmp[i]/N for i in range(N)]\n",
" before = [np.interp(i/N0, X_N, antifrag) for i in range(1, N0+1)]\n",
"\n",
" for i in range(1, N_DATA):\n",
" #read the data for each experience\n",
" n = format(i, '09')\n",
" dataXi_tmp = pd.read_csv(\n",
" os.path.join(folder,s,s + \"_\" + n + \"_metrics.csv\"), \n",
" sep=\",\", header=0)\n",
"\n",
" antifrag_tmp = list(np.array(dataXi_tmp.loc[:, 'Antifragility']).astype(float))\n",
" \n",
" #antifragility of the mutant\n",
" after = [np.interp(i/N0, X_N, antifrag_tmp) for i in range(1, N0+1)]\n",
" \n",
" input_tmp = before + after\n",
"\n",
" ev_tmp = list(\n",
" np.array(dataXi_tmp.loc[:, 'Evolvability']).astype(int))\n",
" \n",
" rob_tmp = list(\n",
" np.array(dataXi_tmp.loc[:, 'Robustness']).astype(int))\n",
" \n",
" output_tmp = converter[(ev_tmp[0], rob_tmp[0])]\n",
"\n",
" input_.append(input_tmp)\n",
" output_.append(output_tmp)\n",
" \n",
"\n",
" input_ = np.array(input_)\n",
" output_ = np.array(output_)\n",
" \n",
" return input_, output_"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"input_, ouput_ = parse_data(100)"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test accuracy: 0.7873873873873873\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/anaconda3/envs/PythonCPU/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/opt/anaconda3/envs/PythonCPU/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n"
]
}
],
"source": [
"from tensorflow.keras.utils import to_categorical\n",
"\n",
"#balancing the data\n",
"#sm = SMOTE(random_state=2)\n",
"#input_, output_ = sm.fit_sample(input_, output_.ravel())\n",
"\n",
"#split the training data into one for training and another one for validation\n",
"X_train, X_test, y_train, y_test = train_test_split(input_, output_, test_size=0.3, random_state=0)\n",
"\n",
"\n",
"X_train = np.array(X_train)\n",
"X_test = np.array(X_test)\n",
"\n",
"#defining the model\n",
"\n",
"clf = LogisticRegression(random_state=0).fit(X_train, y_train)\n",
"acc = clf.score(X_test, y_test)\n",
"\n",
"print('Test accuracy:', acc)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment