# Table S3-4

In [1]:
import scipy.stats
import numpy as np
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.sandbox.stats.multicomp import get_tukey_pvalue
import joblib
import pandas as pd

In [2]:
decoding_results = pd.read_hdf("../data/Figure5Revision.h5", data="key")

In [3]:
def get_data2(decoding_results, task, decoders, methods, window, modality, num_neurons):
    if modality == "ca":
        index = 0
    elif modality == "np":
        index = 1
    accs = []
    keys = []
    for decoder, method in zip(decoders, methods):
        key = f"{modality}_{method}_{window}"
        if "joint" in method:
            acc = np.array(decoding_results[task][decoder][key][num_neurons])[:, index]
        else:
            acc = np.array(decoding_results[task][decoder][key][num_neurons])
        accs.append(acc)
        keys.append([f"{key}_{decoder}"] * len(acc))
    return np.concatenate(accs), np.concatenate(keys)


def concat_neurons(decoding_results, task, decoder, method, window, modality):
    if modality == "ca":
        index = 0
    elif modality == "np":
        index = 1
    key = f"{modality}_{method}_{window}"
    accs = []
    for n in decoding_results[task][decoder][key].keys():
        if "joint" in method:
            accs.append(np.array(decoding_results[task][decoder][key][n])[:, index])
        else:
            accs.append(np.array(decoding_results[task][decoder][key][n]))
    return np.concatenate(accs)

### ANOVA for CEBRA, CEBRA-joint, baseline 330 ms (10 frame window):

In [4]:
np_total_stats = scipy.stats.f_oneway(
    concat_neurons(decoding_results, "frame_id", "knn", "cebra", "330", "np"),
    concat_neurons(decoding_results, "frame_id", "knn", "cebra_joint", "330", "np"),
    concat_neurons(decoding_results, "frame_id", "knn", "baseline", "330", "np"),
    concat_neurons(decoding_results, "frame_id", "bayes", "baseline", "330", "np"),
)


print(f"NP total stats \n {np_total_stats}")

NP total stats 
 F_onewayResult(statistic=1.2871516854445968, pvalue=0.27999880116247905)


### ANOVA for CEBRA, CEBRA-joint, baseline 33 ms (1 frame window):

In [5]:
np_total_stats = scipy.stats.f_oneway(
    concat_neurons(decoding_results, "frame_id", "knn", "cebra", "33", "np"),
    concat_neurons(decoding_results, "frame_id", "knn", "cebra_joint", "33", "np"),
    concat_neurons(decoding_results, "frame_id", "knn", "baseline", "33", "np"),
    concat_neurons(decoding_results, "frame_id", "bayes", "baseline", "33", "np"),
)


print(f"NP total stats \n {np_total_stats}")

NP total stats 
 F_onewayResult(statistic=5.881619459851054, pvalue=0.0007278034882167102)


### ANOVA for CEBRA, CEBRA-joint, baseline for each neuron numbers

In [6]:
num_neurons = [10, 30, 50, 100, 200, 400, 600, 800, 900, 1000]
for i in num_neurons:
    print(f"For {i} neurons from np recording (330ms):")

    np_data, np_keys = get_data2(
        decoding_results,
        "frame_id",
        ["knn", "knn", "knn", "bayes"],
        ["cebra", "cebra_joint", "baseline", "baseline"],
        "330",
        "np",
        i,
    )

    stats = pairwise_tukeyhsd(
        np_data.flatten(),
        np_keys,
    )
    print(stats)

For 10 neurons from np recording (330ms):
                Multiple Comparison of Means - Tukey HSD, FWER=0.05                 
        group1                group2         meandiff p-adj   lower    upper  reject
------------------------------------------------------------------------------------
np_baseline_330_bayes    np_baseline_330_knn     -5.7 0.5269 -17.4894  6.0894  False
np_baseline_330_bayes       np_cebra_330_knn  -4.1111 0.7529 -15.9005  7.6782  False
np_baseline_330_bayes np_cebra_joint_330_knn  -8.0833 0.2429 -19.8727   3.706  False
  np_baseline_330_knn       np_cebra_330_knn   1.5889 0.9798 -10.2005 13.3782  False
  np_baseline_330_knn np_cebra_joint_330_knn  -2.3833 0.9371 -14.1727   9.406  False
     np_cebra_330_knn np_cebra_joint_330_knn  -3.9722 0.7713 -15.7616  7.8171  False
------------------------------------------------------------------------------------
For 30 neurons from np recording (330ms):
                Multiple Comparison of Means - Tukey HSD, FWER=0.0

In [7]:
num_neurons = [10, 30, 50, 100, 200, 400, 600, 800, 900, 1000]
for i in num_neurons:
    print(f"For {i} neurons from np recording (33ms):")

    np_data, np_keys = get_data2(
        decoding_results,
        "frame_id",
        ["knn", "knn", "knn", "bayes"],
        ["cebra", "cebra_joint", "baseline", "baseline"],
        "33",
        "np",
        i,
    )

    stats = pairwise_tukeyhsd(np_data.flatten(), np_keys)
    print(stats)

For 10 neurons from np recording (33ms):
              Multiple Comparison of Means - Tukey HSD, FWER=0.05               
       group1                group2        meandiff p-adj   lower  upper  reject
--------------------------------------------------------------------------------
np_baseline_33_bayes    np_baseline_33_knn  -1.7333 0.6751 -6.1089 2.6423  False
np_baseline_33_bayes       np_cebra_33_knn   0.3556 0.9954   -4.02 4.7312  False
np_baseline_33_bayes np_cebra_joint_33_knn   0.6667 0.9714 -3.7089 5.0423  False
  np_baseline_33_knn       np_cebra_33_knn   2.0889 0.5371 -2.2867 6.4645  False
  np_baseline_33_knn np_cebra_joint_33_knn      2.4 0.4225 -1.9756 6.7756  False
     np_cebra_33_knn np_cebra_joint_33_knn   0.3111 0.9969 -4.0645 4.6867  False
--------------------------------------------------------------------------------
For 30 neurons from np recording (33ms):
               Multiple Comparison of Means - Tukey HSD, FWER=0.05                
       group1            