new simple models

5145eb4a · Pamela Osuna · c22cd53d · 5145eb4a · 5145eb4a
Commit 5145eb4a authored Mar 07, 2020 by Pamela Osuna
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 196 additions and 0 deletions

logistic_regression.ipynb
+196 -0

perceptron_model.ipynb
+0 -0

No files found.
--- a/logistic_regression.ipynb
+++ b/logistic_regression.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from imblearn.over_sampling import SMOTE\n",
+    "import collections\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "import os\n",
+    "from scipy.interpolate import interp1d as interp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "N_DATA = 201 #from 2 to 1001"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "    Encodes outputs as integers\n",
+    "\n",
+    "    Parameters\n",
+    "    -----------\n",
+    "    e : 1 evolvable, 0 not evolvable\n",
+    "    r : 1 robust, 0 not robust\n",
+    "\n",
+    "    Returns\n",
+    "    -----------\n",
+    "    the encoded output\n",
+    "    output meaning:\n",
+    "    0 : not evolv. and not rob.\n",
+    "    1 : evol. and not rob.\n",
+    "    2 : not evol. and rob.\n",
+    "    3 : evol. and rob.\n",
+    "\"\"\"\n",
+    "converter = {(0,0): 0, (1,0): 1, (0,1): 2, (1,1):3}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def parse_data(n_experiences,folder=\"data\",samples=15,kind='linear'):   \n",
+    "    input_ = []\n",
+    "    output_ = []\n",
+    "    N0 = samples\n",
+    "    \n",
+    "    #table of the name of the bionets\n",
+    "    str_ = [\"arabidopsis\", \"cardiac\", \"cd4\", \"mammalian\", \"metabolic\", \"anemia\", \"aurka\", \"b-cell\", \"body-drosophila\", \"bt474\", \"bt474-ErbB\", \"cycle-cdk\", \"fgf-drosophila\", \"gonadal\", \"hcc1954\", \"hcc1954-ErbB\", \"hh-drosophila\", \"l-arabinose-operon\", \"leukemia\", \"neurotransmitter\", \"oxidative-stress\", \"skbr3-long\", \"skbr3-short\", \"spz-drosophila\", \"t-lgl-survival\", \"tol\", \"trichostrongylus\", \"vegf-drosophila\", \"wg-drosophila\", \"yeast-cycle\", \"aspergillus-fumigatus\", \"budding-yeast\", \"gene-cardiac\", \"t-cell-differentiation\", \"lac-operon-bistability\", \"core-cell-cycle\", \"cortical\"]\n",
+    "\n",
+    "    for s in str_:\n",
+    "        data = pd.read_csv(\n",
+    "            os.path.join(folder,s,s + \"_metrics.csv\"), sep=\",\", header=0)\n",
+    "        \n",
+    "        N = int(data.loc[1, 'N'])\n",
+    "        antifrag = list(np.array(data.loc[:, 'Antifragility']).astype(float))\n",
+    "        \n",
+    "        # 15 or 20 points describing the relationship between the original values of antifragility in the network before perturbations and X/N\n",
+    "        X_tmp = list(np.arange(1,N+1, 1))\n",
+    "        X_N = [X_tmp[i]/N for i in range(N)]\n",
+    "        before = [np.interp(i/N0, X_N, antifrag) for i in range(1, N0+1)]\n",
+    "\n",
+    "        for i in range(1, N_DATA):\n",
+    "            #read the data for each experience\n",
+    "            n = format(i, '09')\n",
+    "            dataXi_tmp = pd.read_csv(\n",
+    "                os.path.join(folder,s,s + \"_\" + n + \"_metrics.csv\"), \n",
+    "                sep=\",\", header=0)\n",
+    "\n",
+    "            antifrag_tmp = list(np.array(dataXi_tmp.loc[:, 'Antifragility']).astype(float))\n",
+    "    \n",
+    "            #antifragility of the mutant\n",
+    "            after = [np.interp(i/N0, X_N, antifrag_tmp) for i in range(1, N0+1)]\n",
+    "            \n",
+    "            input_tmp = before + after\n",
+    "\n",
+    "            ev_tmp = list(\n",
+    "                np.array(dataXi_tmp.loc[:, 'Evolvability']).astype(int))\n",
+    "            \n",
+    "            rob_tmp = list(\n",
+    "                np.array(dataXi_tmp.loc[:, 'Robustness']).astype(int))\n",
+    "            \n",
+    "            output_tmp = converter[(ev_tmp[0], rob_tmp[0])]\n",
+    "\n",
+    "            input_.append(input_tmp)\n",
+    "            output_.append(output_tmp)\n",
+    "            \n",
+    "\n",
+    "    input_ = np.array(input_)\n",
+    "    output_ = np.array(output_)\n",
+    "    \n",
+    "    return input_, output_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_, ouput_ = parse_data(100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Test accuracy: 0.7873873873873873\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/anaconda3/envs/PythonCPU/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "  FutureWarning)\n",
+      "/opt/anaconda3/envs/PythonCPU/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "  \"this warning.\", FutureWarning)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from tensorflow.keras.utils import to_categorical\n",
+    "\n",
+    "#balancing the data\n",
+    "#sm = SMOTE(random_state=2)\n",
+    "#input_, output_ = sm.fit_sample(input_, output_.ravel())\n",
+    "\n",
+    "#split the training data into one for training and another one for validation\n",
+    "X_train, X_test, y_train, y_test = train_test_split(input_, output_, test_size=0.3, random_state=0)\n",
+    "\n",
+    "\n",
+    "X_train = np.array(X_train)\n",
+    "X_test = np.array(X_test)\n",
+    "\n",
+    "#defining the model\n",
+    "\n",
+    "clf = LogisticRegression(random_state=0).fit(X_train, y_train)\n",
+    "acc = clf.score(X_test, y_test)\n",
+    "\n",
+    "print('Test accuracy:', acc)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/perceptron_model.ipynb
+++ b/perceptron_model.ipynb