programa de estadísticas y lista de requerimientos

c075bb23 · Jose Luis Gordillo Ruiz · a1b9758a · c075bb23 · c075bb23
Commit c075bb23 authored Dec 05, 2024 by Jose Luis Gordillo Ruiz
Hide whitespace changes
Inline Side-by-side

Showing with 399 additions and 0 deletions

estadisticas.py
+395 -0

requeriments.txt
+4 -0

No files found.
--- a/estadisticas.py
+++ b/estadisticas.py
+import pandas as pd
+from dash import Dash, Input, Output, dcc, html
+import plotly.graph_objects as go
+import plotly.express as px
+import requests
+import numpy as np
+from plotly.subplots import make_subplots
+import socket
+from sklearn.cluster import KMeans
+from scipy import stats
+from ast import literal_eval
+
+def datavis(datos):
+    medidas = ["imc", "age", "glucose", "hba1c", "ct", "hdl"]    
+    fig = make_subplots(rows=2, cols=3, shared_xaxes=True, vertical_spacing=0.1,
+                        subplot_titles=medidas)
+    lcol = 1
+    lrow = 1
+    for medida in medidas:
+        fig.add_trace(
+            go.Scatter(y=datos[medida], name=medida,mode="markers"),
+            row=lrow, col=lcol
+        )
+        lrow = lrow + (lcol // 3)
+        lcol = lcol + 1 if lcol < 3 else 1
+    fig.update_layout(showlegend = False)
+    return fig
+
+def popdist(datos):
+    medidas = ["imc", "age", "glucose", "hba1c", "ct", "hdl"]
+    # removemos outliers
+    fig = make_subplots(rows=2, cols=3, shared_yaxes=True, subplot_titles=medidas)
+    lcol = 1
+    lrow = 1
+
+    for medida in medidas:
+        datosfil = datos[np.abs(stats.zscore(datos[medida], nan_policy='omit')) < 3]
+        fig.add_trace(
+            go.Histogram(x=datosfil[medida], name=medida),
+            row=lrow, col=lcol
+        )
+        lrow = lrow + (lcol // 3)
+        lcol = lcol + 1 if lcol < 3 else 1
+
+    fig.update_layout(
+        bargap=0.05, # gap between bars of adjacent location coordinates
+        bargroupgap=0.05, # gap between bars of the same location coordinates
+        showlegend = False
+    )
+    return fig
+
+def correlaciones(datos, controlado):
+# filtramos solo a los que tienen datos del controlado
+    datos_fil = datos.loc[datos[controlado].notnull()]
+    datos_sorted = datos_fil.sort_values(by=controlado, ascending=True)
+    datos_sorted.reset_index(inplace=True)
+    metricas = ["imc", "age", "glucose", "hba1c"]
+    figura = make_subplots(rows=2, cols=2, shared_xaxes=True, vertical_spacing=0.1, subplot_titles=metricas)
+    lrow = 1
+    lcol = 1
+    for metrica in metricas:
+        figura.add_trace(
+            go.Scatter(x=datos_sorted.index, y=datos_sorted[metrica], name=metrica, mode='markers'),
+            row=lrow, col=lcol
+        )
+        figura.add_trace(
+            go.Scatter(x=datos_sorted.index, y=datos_sorted[metrica].rolling(5).mean(), name="PM(5)"),
+            row=lrow, col=lcol
+        )
+        corr = datos_sorted[[metrica]].corrwith(datos_sorted[controlado]).values[0]
+        figura.add_annotation(row=lrow, col=lcol, text="R = " + "{:.2f}".format(corr))
+        lcol = lcol + 1 if lcol < 2 else 1
+        lrow = lrow + (lcol % 2)
+    figura.update_layout(yaxis1= dict(range=[15,45]))    
+    figura.update_layout(yaxis3= dict(range=[80,120]))
+    figura.update_layout(yaxis4= dict(range=[4,7]))
+    figura.update_layout(showlegend=False)
+
+    return figura
+    
+def controlado_ppgr(datos):
+    datos_fil = datos.loc[datos["Glucosa"].notnull()]
+    controlados = ["Glucosa", "Gelatina", "Pan"]
+    figura = make_subplots(rows=2, cols=3, subplot_titles=controlados)
+    lcol = 1
+    for controlado in controlados:
+        figura.add_trace(
+            go.Scatter(y=datos[controlado], name=controlado, mode="markers"),
+            row = 1, col = lcol
+        )
+        figura.add_trace(
+            go.Histogram(x=datos[controlado], name=controlado, histnorm = "probability"),
+            row = 2, col = lcol
+        )
+        lcol = lcol + 1
+    figura.update_layout(
+        showlegend = False,
+        bargap=0.05, # gap between bars of adjacent location coordinates
+        bargroupgap=0.05 # gap between bars of the same location coordinates
+    )
+    return figura
+
+def triareas(pacientes):
+    figura = make_subplots(rows=1,cols=1)
+    for paciente in pacientes:
+        
+        url_datos = url + "patients/" + str(paciente) + "/visit-data/1"
+        datos = requests.get(url_datos).json()["tolerance_curve_measure"][0]
+        valores = list(datos.values())
+        alimento = valores.pop(0)
+        iauc = valores.pop(-1)
+        figura.add_trace(
+            go.Scatter(x=[0,15,30,45,60,90,120], y= valores, name=str(paciente)+":"+str(iauc)),
+            row = 1, col=1
+        )
+    return figura
+
+def comidas_datos(datos):
+    columnas = ["hc_total", "kcal_total", "fiber_total", "protein_total", "pre-AUC", "post-AUC", "iAUC", "N-alimentos"]
+    datos["N-alimentos"] = datos["foods"].apply(lambda x: len(x))
+
+    figura = make_subplots(cols=4, rows=2, shared_xaxes=True, vertical_spacing=0.1,
+                           subplot_titles=columnas)
+    lrow = 1
+    lcol = 1
+    for columna in columnas:
+        figura.add_trace(
+            go.Scatter(y=datos[columna], name=columna, mode="markers"),
+            row=lrow, col=lcol
+        )
+        lrow = lrow + (lcol // 4)
+        lcol = lcol + 1 if lcol < 4 else 1
+    figura.update_layout(showlegend=False)
+    return figura
+
+def comidas_describe(datos):
+    columnas = ["hc_total", "kcal_total", "fiber_total", "protein_total", "pre-AUC", "post-AUC", "iAUC", "N-alimentos"]
+    datos["N-alimentos"] = datos["foods"].apply(lambda x: len(x))
+    print(datos.columns)
+    figura = make_subplots(cols=4, rows=2, shared_yaxes=True,
+                           subplot_titles=columnas)
+    lrow = 1
+    lcol = 1
+    for columna in columnas:
+        datosfil = datos[np.abs(stats.zscore(datos[columna], nan_policy='omit')) < 3]
+        figura.add_trace(
+            go.Histogram(x=datosfil[columna], name=columna),
+            row=lrow, col=lcol
+        )
+        lrow = lrow + (lcol // 4)
+        lcol = lcol + 1 if lcol < 4 else 1
+
+
+    figura.update_layout(
+        showlegend = False,
+        bargap=0.05, # gap between bars of adjacent location coordinates
+        bargroupgap=0.05 # gap between bars of the same location coordinates
+    )
+    return figura
+
+        
+def heatmap(datos):
+
+    datosfil = datos.replace(0,np.nan)
+    datosfil.dropna(inplace=True, subset=["Gelatina", "Pan", "Glucosa"])
+    ranks = datosfil[["Pan", "Gelatina", "Glucosa"]].rank()
+    model = KMeans(n_clusters=3)
+    model.fit(ranks)
+    all_predictions = model.predict(ranks)
+    datosfil["grupo"] = all_predictions
+
+    datosfil.sort_values(by="grupo", inplace=True)
+    fig = go.Figure(
+        data = go.Heatmap(
+            z=[datosfil["Gelatina"], datosfil["Pan"], datosfil["Glucosa"]]
+        )
+    )
+    return fig
+
+def comidas_paciente_datos(datos):
+    columnas = ["hc_total", "kcal_total", "fiber_total", "protein_total", "lipids_total"]
+    figura = make_subplots(cols=5, rows=4, subplot_titles=("hc","kcal","fiber", "protein", "lipids"))
+    lcol = 1
+    for columna in columnas:
+        datosfil = datos[np.abs(stats.zscore(datos[columna], nan_policy='omit')) < 3]        
+        figura.add_trace(
+            go.Scatter(y=datosfil.groupby("patient")[columna].sum(), name="total_" + columna, mode= "markers"),
+            row=1, col=lcol
+        )
+        figura.add_trace(
+            go.Histogram(x=datosfil.groupby("patient")[columna].sum(), name="total_" + columna),
+            row=2, col=lcol
+        )
+
+        figura.add_trace(
+            go.Scatter(y=datosfil.groupby("patient")[columna].mean(), name="promedio_" + columna, mode= "markers"),
+            row=3, col=lcol
+        )
+        figura.add_trace(
+            go.Histogram(x=datosfil.groupby("patient")[columna].mean(), name="promedio_" + columna),
+            row=4, col=lcol
+        )
+        lcol = lcol + 1
+    figura.update_layout(showlegend = False)
+    return figura    
+
+def comidas_auc_paciente(datos):
+    columnas = ["pre-AUC", "post-AUC", "iAUC"]
+    figura = make_subplots(cols = 4, rows = 2, subplot_titles=("pre-AUC","post-AUC", "iAUC", "N-Comidas"))
+    lcol = 1
+    for columna in columnas:
+        datosfil = datos[np.abs(stats.zscore(datos[columna], nan_policy='omit')) < 3]        
+        figura.add_trace(
+            go.Scatter(y=datosfil.groupby("patient")[columna].mean(), name="promedio_" + columna, mode= "markers"),
+            row=1, col=lcol
+        )
+        figura.add_trace(
+            go.Histogram(x=datosfil.groupby("patient")[columna].mean(), name="Distr. " + columna),
+            row=2, col=lcol
+        )
+        lcol = lcol + 1
+    figura.add_trace(
+        go.Scatter(y=datosfil.groupby("patient")["iAUC"].count(), name="N-comidas", mode= "markers"),
+        row = 1, col = 4
+    )
+    figura.add_trace(
+        go.Histogram(x=datosfil.groupby("patient")["iAUC"].count(), name="Distr. N-comidas"),
+        row = 2, col = 4
+    )
+    figura.update_layout(
+        showlegend = False,
+        bargap=0.05, # gap between bars of adjacent location coordinates
+        bargroupgap=0.05 # gap between bars of the same location coordinates
+    )
+
+    return figura        
+        
+# algunos parámetros globales
+url = 'https://nutricion.c3.unam.mx/nd/'
+
+
+def get_visit_data(visitnumber):
+    url_visitas = url + "/visits/" + str(visitnumber) + "/patients-data/"
+#    url_visitas = "https://nutricion.c3.unam.mx/nd/visits/1/patients-data/"
+    try:
+        yaisons = requests.get(url_visitas).json()["patient_data"]
+    except Exception as e:
+        print("error al traer datos desde ", url_visitas)
+        print(e)
+        exit()
+        
+    df = pd.json_normalize(yaisons)
+    df.drop(["tolerance_curve_measure","name","patient_visit_id", "visit_date"], inplace=True, axis=1)
+    lista = [ { cf["controlled_food"]: cf["increment_auc"] for cf in yaison["tolerance_curve_measure"] } for yaison in yaisons ] 
+    df.rename(columns= lambda x: x.split(".")[1] if "." in x else x, inplace=True)
+    #df.drop(["patient_visit_id"], inplace=True, axis=0) #esta etiqueta aparece dos veces: dentro del primer nivel de json y dentro del json de sample
+    return pd.concat([df, pd.DataFrame(lista)], axis=1)
+
+# fuentes de datos
+
+# consulta online a la API para las visitas, que proporcionan la info de
+# visitas y microbiota
+visitas_datos = [ get_visit_data(visita) for visita in range(1, 5)]
+comidas = pd.read_csv("todos-foods.csv", usecols=["hc_total", "kcal_total", "fiber_total",\
+                                                  "protein_total", "lipids_total","fecha_hora", "patient", "visita",\
+                                                  "glucosa_basal_area", "glucosa_estimulo_area",\
+                                                  "glucosa_area_efectiva","glucosa_area_relativa","foods"])
+
+comidas["foods"] = comidas["foods"].apply(literal_eval)
+comidas = comidas[comidas["visita"] == "e1"]
+comidas.rename(columns={"glucosa_basal_area":"pre-AUC", "glucosa_estimulo_area":"post-AUC", "glucosa_area_efectiva":"iAUC"}, inplace=True)
+print(comidas.columns)
+# la info de glucómetro y comidas es más compleja. por el momento, se estab armando
+# la info offline.
+
+data_o = (
+    pd.read_csv("inventario.csv")
+)
+
+data_o["paciente"] = data_o["paciente"].astype("str") + "_p"
+data_o["Secuenciaciones"] = 0
+
+
+Total_pacientes = len(data_o.index)
+data = data_o.loc[data_o["incluido"]]
+Total_incluidos = len(data)
+# gráficas de distribución por indicadores.
+
+visitas_incluidos = visitas_datos[0].loc[visitas_datos[0]["include_in_analysis"]]
+
+
+    
+
+# grafica estática de visitas/secuenciaciones
+scat = px.scatter(data, x="NDias_glucosa", y="Comidas_Aisladas",size="AUCs_glucosa")
+vis_sec = {
+        "data": [
+            {
+                #"x": temp["paciente"],
+                "x": data["Nvisitas"].astype("str") + "_visitas",
+                "type": "histogram",
+                "name": "Visitas",
+                "histnorm": "percent",                
+                #"orientation": "v",
+            },
+            { "x": data["Secuenciaciones"], "type": "histogram", "name": "Secuenciaciones", "histnorm": "percent"},
+        ],
+        "layout": {
+            #"title": {"text": title + " " + etapalab + str(etapaDet)},
+            "xaxis": { "title": {"text": "Visitas por paciente"}},
+            "yaxis": { "title": {"text": "% de pacientes"}},            
+        }
+    }
+
+
+external_stylesheets = [
+    {
+        "href": (
+            "https://fonts.googleapis.com/css2?"
+            "family=Lato:wght@400;700&display=swap"
+        ),
+        "rel": "stylesheet",
+    },
+]
+app = Dash(__name__, external_stylesheets=external_stylesheets)
+app.title = "NutrIndMex. Análisis de datos."
+
+app.layout = html.Div(children=[
+    #encabezado
+    html.Div(children=[
+        html.P(children="🥑", className="header-emoji"),
+        html.H1(children="NutrIndMex", className="header-title"),
+        html.H2(children="Análisis de Datos", className="header-title"),
+
+    ],
+    className="header",),
+    # descripción por diferentes indices
+    html.Div(children=[
+        html.Div(children= [ html.H1("Características de la población", className= "header-title2"),], ),
+        dcc.Graph(figure=datavis(visitas_incluidos))
+    ]),
+
+    # descripción por diferentes indices
+    html.Div(children=[
+        html.Div(children= [ html.H1("Distribuciones de la población", className= "header-title2"),], ),
+        dcc.Graph(figure=popdist(visitas_incluidos))
+    ]),
+    # ppgr a controlados
+    html.Div(children= [
+        html.Div(children= [html.H1("iAUC/Controlados ", className = "header-title2")]),
+        dcc.Graph(figure=controlado_ppgr(visitas_incluidos))
+    ]),
+
+    # correlaciones
+    html.Div(children= [
+        html.Div(children= [html.H1("Relación de iAUC/Controlados con diversos índices", className = "header-title2")]),        dcc.Graph(figure=correlaciones(visitas_incluidos, "Glucosa"))
+    ]),
+    # areas de tres pacientes
+    html.Div(children= [
+        html.Div(children= [html.H1("Ejemplos de iAUC de Glucosa", className = "header-title2")]),
+        dcc.Graph(figure=triareas([85,148,56]))
+    ]),
+    # heatmap
+    html.Div(children= [
+        html.Div(children= [html.H1("Mapa de calor de iAUC/alimentos controlados", className = "header-title2")]),
+        dcc.Graph(figure=heatmap(visitas_incluidos))
+    ,]),
+    # datos de comidas
+    html.Div(children= [
+        html.Div(children= [html.H1("Macronutrientes / AUCs de comidas", className = "header-title2")]),
+        dcc.Graph(figure=comidas_datos(comidas))
+    ,]),
+    # descripcion de comidas
+    html.Div(children= [
+        html.Div(children= [html.H1("Distribución de Macronutrientes de comidas", className = "header-title2")]),
+        dcc.Graph(figure=comidas_describe(comidas))
+    ,]),
+    # descripcion de comidas por paciente
+    html.Div(children= [
+        html.Div(children= [html.H1("Macronutrientes de comidas por paciente", className = "header-title2")]),
+        dcc.Graph(figure=comidas_paciente_datos(comidas))
+    ,]),
+    # descripcion de aucs comidas por paciente
+    html.Div(children= [
+        html.Div(children= [html.H1("AUCs de comidas por paciente", className = "header-title2")]),
+        dcc.Graph(figure=comidas_auc_paciente(comidas))
+    ,]),
+    
+    
+]) # cierre del layout
+
+
+
+if __name__ == "__main__":
+    app.run_server(debug=True, port=8051, host='0.0.0.0')
--- a/requeriments.txt
+++ b/requeriments.txt
+pandas
+plotly
+dash
+scikit-learn