import pandas
import re
import numpy as np
metabolic_df = pandas.read_pickle("data_exoskeleton_metabolic/first_data_for_Toby.pkl")
metabolic_df.columns
# I think the primary way you would want to stratify this data is with the impairment level.  It is split into binary columns for each impairment type. Cerebral Palsy (CP) has 4 categories (CP_4 may never be used), then we also have unimpaired, elderly and stroke victims. This should give you 6 distinct groups unless you want to pool all the CP together.
group_list = ["CP_"+str(i+1) for i in range(4)]+["stroke","elderly","unimpaired"]
metabolic_df["group"] = ""
for col_name in group_list:
    group_val = re.sub("_.*", "", col_name)
    metabolic_df["group"] = np.where(
        metabolic_df[col_name]==1,
        group_val,
        metabolic_df["group"])
    print(pandas.crosstab(metabolic_df["group"],metabolic_df[col_name]))
    print("")
metabolic_df.group.value_counts()
metabolic_df.stroke.value_counts()
metabolic_df.iloc[:,1:25]#features
metabolic_df.iloc[:,25]#label
metabolic_df.iloc[:,26]#width of 95% CI
metabolic_df.iloc[:,27]#type of filtering
metabolic_df.iloc[:,28]#label in a different domain