programa de estadísticas y lista de requerimientos

parent a1b9758a
import pandas as pd
from dash import Dash, Input, Output, dcc, html
import plotly.graph_objects as go
import plotly.express as px
import requests
import numpy as np
from plotly.subplots import make_subplots
import socket
from sklearn.cluster import KMeans
from scipy import stats
from ast import literal_eval
def datavis(datos):
medidas = ["imc", "age", "glucose", "hba1c", "ct", "hdl"]
fig = make_subplots(rows=2, cols=3, shared_xaxes=True, vertical_spacing=0.1,
subplot_titles=medidas)
lcol = 1
lrow = 1
for medida in medidas:
fig.add_trace(
go.Scatter(y=datos[medida], name=medida,mode="markers"),
row=lrow, col=lcol
)
lrow = lrow + (lcol // 3)
lcol = lcol + 1 if lcol < 3 else 1
fig.update_layout(showlegend = False)
return fig
def popdist(datos):
medidas = ["imc", "age", "glucose", "hba1c", "ct", "hdl"]
# removemos outliers
fig = make_subplots(rows=2, cols=3, shared_yaxes=True, subplot_titles=medidas)
lcol = 1
lrow = 1
for medida in medidas:
datosfil = datos[np.abs(stats.zscore(datos[medida], nan_policy='omit')) < 3]
fig.add_trace(
go.Histogram(x=datosfil[medida], name=medida),
row=lrow, col=lcol
)
lrow = lrow + (lcol // 3)
lcol = lcol + 1 if lcol < 3 else 1
fig.update_layout(
bargap=0.05, # gap between bars of adjacent location coordinates
bargroupgap=0.05, # gap between bars of the same location coordinates
showlegend = False
)
return fig
def correlaciones(datos, controlado):
# filtramos solo a los que tienen datos del controlado
datos_fil = datos.loc[datos[controlado].notnull()]
datos_sorted = datos_fil.sort_values(by=controlado, ascending=True)
datos_sorted.reset_index(inplace=True)
metricas = ["imc", "age", "glucose", "hba1c"]
figura = make_subplots(rows=2, cols=2, shared_xaxes=True, vertical_spacing=0.1, subplot_titles=metricas)
lrow = 1
lcol = 1
for metrica in metricas:
figura.add_trace(
go.Scatter(x=datos_sorted.index, y=datos_sorted[metrica], name=metrica, mode='markers'),
row=lrow, col=lcol
)
figura.add_trace(
go.Scatter(x=datos_sorted.index, y=datos_sorted[metrica].rolling(5).mean(), name="PM(5)"),
row=lrow, col=lcol
)
corr = datos_sorted[[metrica]].corrwith(datos_sorted[controlado]).values[0]
figura.add_annotation(row=lrow, col=lcol, text="R = " + "{:.2f}".format(corr))
lcol = lcol + 1 if lcol < 2 else 1
lrow = lrow + (lcol % 2)
figura.update_layout(yaxis1= dict(range=[15,45]))
figura.update_layout(yaxis3= dict(range=[80,120]))
figura.update_layout(yaxis4= dict(range=[4,7]))
figura.update_layout(showlegend=False)
return figura
def controlado_ppgr(datos):
datos_fil = datos.loc[datos["Glucosa"].notnull()]
controlados = ["Glucosa", "Gelatina", "Pan"]
figura = make_subplots(rows=2, cols=3, subplot_titles=controlados)
lcol = 1
for controlado in controlados:
figura.add_trace(
go.Scatter(y=datos[controlado], name=controlado, mode="markers"),
row = 1, col = lcol
)
figura.add_trace(
go.Histogram(x=datos[controlado], name=controlado, histnorm = "probability"),
row = 2, col = lcol
)
lcol = lcol + 1
figura.update_layout(
showlegend = False,
bargap=0.05, # gap between bars of adjacent location coordinates
bargroupgap=0.05 # gap between bars of the same location coordinates
)
return figura
def triareas(pacientes):
figura = make_subplots(rows=1,cols=1)
for paciente in pacientes:
url_datos = url + "patients/" + str(paciente) + "/visit-data/1"
datos = requests.get(url_datos).json()["tolerance_curve_measure"][0]
valores = list(datos.values())
alimento = valores.pop(0)
iauc = valores.pop(-1)
figura.add_trace(
go.Scatter(x=[0,15,30,45,60,90,120], y= valores, name=str(paciente)+":"+str(iauc)),
row = 1, col=1
)
return figura
def comidas_datos(datos):
columnas = ["hc_total", "kcal_total", "fiber_total", "protein_total", "pre-AUC", "post-AUC", "iAUC", "N-alimentos"]
datos["N-alimentos"] = datos["foods"].apply(lambda x: len(x))
figura = make_subplots(cols=4, rows=2, shared_xaxes=True, vertical_spacing=0.1,
subplot_titles=columnas)
lrow = 1
lcol = 1
for columna in columnas:
figura.add_trace(
go.Scatter(y=datos[columna], name=columna, mode="markers"),
row=lrow, col=lcol
)
lrow = lrow + (lcol // 4)
lcol = lcol + 1 if lcol < 4 else 1
figura.update_layout(showlegend=False)
return figura
def comidas_describe(datos):
columnas = ["hc_total", "kcal_total", "fiber_total", "protein_total", "pre-AUC", "post-AUC", "iAUC", "N-alimentos"]
datos["N-alimentos"] = datos["foods"].apply(lambda x: len(x))
print(datos.columns)
figura = make_subplots(cols=4, rows=2, shared_yaxes=True,
subplot_titles=columnas)
lrow = 1
lcol = 1
for columna in columnas:
datosfil = datos[np.abs(stats.zscore(datos[columna], nan_policy='omit')) < 3]
figura.add_trace(
go.Histogram(x=datosfil[columna], name=columna),
row=lrow, col=lcol
)
lrow = lrow + (lcol // 4)
lcol = lcol + 1 if lcol < 4 else 1
figura.update_layout(
showlegend = False,
bargap=0.05, # gap between bars of adjacent location coordinates
bargroupgap=0.05 # gap between bars of the same location coordinates
)
return figura
def heatmap(datos):
datosfil = datos.replace(0,np.nan)
datosfil.dropna(inplace=True, subset=["Gelatina", "Pan", "Glucosa"])
ranks = datosfil[["Pan", "Gelatina", "Glucosa"]].rank()
model = KMeans(n_clusters=3)
model.fit(ranks)
all_predictions = model.predict(ranks)
datosfil["grupo"] = all_predictions
datosfil.sort_values(by="grupo", inplace=True)
fig = go.Figure(
data = go.Heatmap(
z=[datosfil["Gelatina"], datosfil["Pan"], datosfil["Glucosa"]]
)
)
return fig
def comidas_paciente_datos(datos):
columnas = ["hc_total", "kcal_total", "fiber_total", "protein_total", "lipids_total"]
figura = make_subplots(cols=5, rows=4, subplot_titles=("hc","kcal","fiber", "protein", "lipids"))
lcol = 1
for columna in columnas:
datosfil = datos[np.abs(stats.zscore(datos[columna], nan_policy='omit')) < 3]
figura.add_trace(
go.Scatter(y=datosfil.groupby("patient")[columna].sum(), name="total_" + columna, mode= "markers"),
row=1, col=lcol
)
figura.add_trace(
go.Histogram(x=datosfil.groupby("patient")[columna].sum(), name="total_" + columna),
row=2, col=lcol
)
figura.add_trace(
go.Scatter(y=datosfil.groupby("patient")[columna].mean(), name="promedio_" + columna, mode= "markers"),
row=3, col=lcol
)
figura.add_trace(
go.Histogram(x=datosfil.groupby("patient")[columna].mean(), name="promedio_" + columna),
row=4, col=lcol
)
lcol = lcol + 1
figura.update_layout(showlegend = False)
return figura
def comidas_auc_paciente(datos):
columnas = ["pre-AUC", "post-AUC", "iAUC"]
figura = make_subplots(cols = 4, rows = 2, subplot_titles=("pre-AUC","post-AUC", "iAUC", "N-Comidas"))
lcol = 1
for columna in columnas:
datosfil = datos[np.abs(stats.zscore(datos[columna], nan_policy='omit')) < 3]
figura.add_trace(
go.Scatter(y=datosfil.groupby("patient")[columna].mean(), name="promedio_" + columna, mode= "markers"),
row=1, col=lcol
)
figura.add_trace(
go.Histogram(x=datosfil.groupby("patient")[columna].mean(), name="Distr. " + columna),
row=2, col=lcol
)
lcol = lcol + 1
figura.add_trace(
go.Scatter(y=datosfil.groupby("patient")["iAUC"].count(), name="N-comidas", mode= "markers"),
row = 1, col = 4
)
figura.add_trace(
go.Histogram(x=datosfil.groupby("patient")["iAUC"].count(), name="Distr. N-comidas"),
row = 2, col = 4
)
figura.update_layout(
showlegend = False,
bargap=0.05, # gap between bars of adjacent location coordinates
bargroupgap=0.05 # gap between bars of the same location coordinates
)
return figura
# algunos parámetros globales
url = 'https://nutricion.c3.unam.mx/nd/'
def get_visit_data(visitnumber):
url_visitas = url + "/visits/" + str(visitnumber) + "/patients-data/"
# url_visitas = "https://nutricion.c3.unam.mx/nd/visits/1/patients-data/"
try:
yaisons = requests.get(url_visitas).json()["patient_data"]
except Exception as e:
print("error al traer datos desde ", url_visitas)
print(e)
exit()
df = pd.json_normalize(yaisons)
df.drop(["tolerance_curve_measure","name","patient_visit_id", "visit_date"], inplace=True, axis=1)
lista = [ { cf["controlled_food"]: cf["increment_auc"] for cf in yaison["tolerance_curve_measure"] } for yaison in yaisons ]
df.rename(columns= lambda x: x.split(".")[1] if "." in x else x, inplace=True)
#df.drop(["patient_visit_id"], inplace=True, axis=0) #esta etiqueta aparece dos veces: dentro del primer nivel de json y dentro del json de sample
return pd.concat([df, pd.DataFrame(lista)], axis=1)
# fuentes de datos
# consulta online a la API para las visitas, que proporcionan la info de
# visitas y microbiota
visitas_datos = [ get_visit_data(visita) for visita in range(1, 5)]
comidas = pd.read_csv("todos-foods.csv", usecols=["hc_total", "kcal_total", "fiber_total",\
"protein_total", "lipids_total","fecha_hora", "patient", "visita",\
"glucosa_basal_area", "glucosa_estimulo_area",\
"glucosa_area_efectiva","glucosa_area_relativa","foods"])
comidas["foods"] = comidas["foods"].apply(literal_eval)
comidas = comidas[comidas["visita"] == "e1"]
comidas.rename(columns={"glucosa_basal_area":"pre-AUC", "glucosa_estimulo_area":"post-AUC", "glucosa_area_efectiva":"iAUC"}, inplace=True)
print(comidas.columns)
# la info de glucómetro y comidas es más compleja. por el momento, se estab armando
# la info offline.
data_o = (
pd.read_csv("inventario.csv")
)
data_o["paciente"] = data_o["paciente"].astype("str") + "_p"
data_o["Secuenciaciones"] = 0
Total_pacientes = len(data_o.index)
data = data_o.loc[data_o["incluido"]]
Total_incluidos = len(data)
# gráficas de distribución por indicadores.
visitas_incluidos = visitas_datos[0].loc[visitas_datos[0]["include_in_analysis"]]
# grafica estática de visitas/secuenciaciones
scat = px.scatter(data, x="NDias_glucosa", y="Comidas_Aisladas",size="AUCs_glucosa")
vis_sec = {
"data": [
{
#"x": temp["paciente"],
"x": data["Nvisitas"].astype("str") + "_visitas",
"type": "histogram",
"name": "Visitas",
"histnorm": "percent",
#"orientation": "v",
},
{ "x": data["Secuenciaciones"], "type": "histogram", "name": "Secuenciaciones", "histnorm": "percent"},
],
"layout": {
#"title": {"text": title + " " + etapalab + str(etapaDet)},
"xaxis": { "title": {"text": "Visitas por paciente"}},
"yaxis": { "title": {"text": "% de pacientes"}},
}
}
external_stylesheets = [
{
"href": (
"https://fonts.googleapis.com/css2?"
"family=Lato:wght@400;700&display=swap"
),
"rel": "stylesheet",
},
]
app = Dash(__name__, external_stylesheets=external_stylesheets)
app.title = "NutrIndMex. Análisis de datos."
app.layout = html.Div(children=[
#encabezado
html.Div(children=[
html.P(children="🥑", className="header-emoji"),
html.H1(children="NutrIndMex", className="header-title"),
html.H2(children="Análisis de Datos", className="header-title"),
],
className="header",),
# descripción por diferentes indices
html.Div(children=[
html.Div(children= [ html.H1("Características de la población", className= "header-title2"),], ),
dcc.Graph(figure=datavis(visitas_incluidos))
]),
# descripción por diferentes indices
html.Div(children=[
html.Div(children= [ html.H1("Distribuciones de la población", className= "header-title2"),], ),
dcc.Graph(figure=popdist(visitas_incluidos))
]),
# ppgr a controlados
html.Div(children= [
html.Div(children= [html.H1("iAUC/Controlados ", className = "header-title2")]),
dcc.Graph(figure=controlado_ppgr(visitas_incluidos))
]),
# correlaciones
html.Div(children= [
html.Div(children= [html.H1("Relación de iAUC/Controlados con diversos índices", className = "header-title2")]), dcc.Graph(figure=correlaciones(visitas_incluidos, "Glucosa"))
]),
# areas de tres pacientes
html.Div(children= [
html.Div(children= [html.H1("Ejemplos de iAUC de Glucosa", className = "header-title2")]),
dcc.Graph(figure=triareas([85,148,56]))
]),
# heatmap
html.Div(children= [
html.Div(children= [html.H1("Mapa de calor de iAUC/alimentos controlados", className = "header-title2")]),
dcc.Graph(figure=heatmap(visitas_incluidos))
,]),
# datos de comidas
html.Div(children= [
html.Div(children= [html.H1("Macronutrientes / AUCs de comidas", className = "header-title2")]),
dcc.Graph(figure=comidas_datos(comidas))
,]),
# descripcion de comidas
html.Div(children= [
html.Div(children= [html.H1("Distribución de Macronutrientes de comidas", className = "header-title2")]),
dcc.Graph(figure=comidas_describe(comidas))
,]),
# descripcion de comidas por paciente
html.Div(children= [
html.Div(children= [html.H1("Macronutrientes de comidas por paciente", className = "header-title2")]),
dcc.Graph(figure=comidas_paciente_datos(comidas))
,]),
# descripcion de aucs comidas por paciente
html.Div(children= [
html.Div(children= [html.H1("AUCs de comidas por paciente", className = "header-title2")]),
dcc.Graph(figure=comidas_auc_paciente(comidas))
,]),
]) # cierre del layout
if __name__ == "__main__":
app.run_server(debug=True, port=8051, host='0.0.0.0')
pandas
plotly
dash
scikit-learn
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment