import numpy as np
import pandas as pd
import output_convert as oc
import os
from scipy.interpolate import interp1d as interp

def interpolate(y,K,kind='linear'):
    """
    Interpolates vector x

    Parameters
    ----------
    y : list of real numbers
        dependen variable values.
    K : integer
        samples to interpolate
        

    Returns
    -------
    the interpolation values

    """
        
    N = len(y)
    x = np.arange(1/N,1+1/N,1/N)
    f = interp(x,y,kind=kind,fill_value="extrapolate")
    xintp = np.arange(1/K,1+1/K,1/K)
    return f(xintp)
    

def parse_data(n_experiences,folder="data",samples=30,kind='linear'):
    N_DATA = n_experiences #from 2 to 10001
    input_ = []
    output_ = []

    #table of the name of the bionets
    str_ = [
        'anemia',
        'cd4',
        'lac-operon',
        'spz-drosophila',
        'arabidopsis',
        'core-cell-cycle',
        'lac-operon-bistability',
        't-cell-differentiation',
        'aspergillus-fumigatus',
        'cortical',
        'l-arabinose-operon',
        't-lgl-survival',
        'aurka',
        'cycle-cdk',
        'leukemia',
        'tol',
        'b-cell',
        'fgf-drosophila',
        'mammalian',
        'trichostrongylus',
        'body-drosophila',
        'gene-cardiac',
        'metabolic',
        'vegf-drosophila',
        'bt474',
        'gonadal',
        'neurotransmitter',
        'wg-drosophila',
        'bt474-ErbB',
        'hcc1954',
        'oxidative-stress',
        'yeast-cycle',
        'budding-yeast',
        'hcc1954-ErbB',
        'skbr3-long',
        'cardiac',
        'hh-drosophila',
        'skbr3-short'
        ]
    
    for s in str_:
        data = pd.read_csv(
            os.path.join(folder,s,s + "_metrics.csv"), sep=",", header=0)

        # 30 points describing the relationship between the original 
        # values of antifragility in the network before perturbations and X/N        
        before =interpolate(
            np.array(data.loc[:, 'Antifragility']).astype(float),
            samples,
            kind=kind)
        
        for i in range(N_DATA):
            #read the data for each experience
            n = format(i+1, '09')
            data = pd.read_csv(
                os.path.join(folder,s,s + "_" + n + "_metrics.csv"), 
                sep=",", header=0)

            #antifragility of the mutant
            after = interpolate(
                np.array(data.loc[:, 'Antifragility']).astype(float),
                samples,
                kind=kind)
            
            # computes the difference of the antifragility curves
            input_tmp = after-before

            evolvable_tmp = list(
                np.array(data.loc[:, 'Evolvability']).astype(int))
            
            robust_tmp = list(
                np.array(data.loc[:, 'Robustness']).astype(int))

            # even though evolvable_tmp and robust_tmp are vectors, 
            # all entries are the same, we take the first
            output_tmp = oc.output_convert(evolvable_tmp[0], robust_tmp[0])

            input_.append(input_tmp)
            output_+= output_tmp

    input_ = np.array(input_)
    output_ = np.array(output_)
    return input_, output_
