# Table S5

In [1]:
import scipy.stats
import numpy as np
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.sandbox.stats.multicomp import get_tukey_pvalue
import joblib
import pandas as pd

In [2]:
decoding_results = pd.read_hdf("../data/Figure5Revision.h5", data="key")

In [3]:
def get_data(decoding_results, task, decoders, methods, window, modality, num_neurons):
    if modality == "ca":
        index = 0
    elif modality == "np":
        index = 1
    accs = []
    keys = []
    for decoder, method in zip(decoders, methods):
        key = f"{modality}_{method}_{window}"
        if "joint" in method:
            acc = np.array(decoding_results[task][decoder][key][num_neurons])[:, index]
        else:
            acc = np.array(decoding_results[task][decoder][key][num_neurons])
        accs.append(acc)
        keys.append([f"{key}_{decoder}"] * len(acc))
    return np.concatenate(accs), np.concatenate(keys)


def concat_neurons(decoding_results, task, decoder, method, window, modality):
    if modality == "ca":
        index = 0
    elif modality == "np":
        index = 1
    key = f"{modality}_{method}_{window}"
    accs = []
    for n in decoding_results[task][decoder][key].keys():
        if "joint" in method:
            accs.append(np.array(decoding_results[task][decoder][key][n])[:, index])
        else:
            accs.append(np.array(decoding_results[task][decoder][key][n]))
    return np.concatenate(accs)

## ANOVA for CEBRA, CEBRA-joint, baseline 33 ms (1 frame window):

In [4]:
np_total_stats = scipy.stats.f_oneway(
    concat_neurons(decoding_results, "scene_annotation", "knn", "cebra", "33", "np"),
    concat_neurons(
        decoding_results, "scene_annotation", "knn", "cebra_joint", "33", "np"
    ),
    concat_neurons(decoding_results, "scene_annotation", "knn", "baseline", "33", "np"),
    concat_neurons(
        decoding_results, "scene_annotation", "bayes", "baseline", "33", "np"
    ),
)


print(f"NP total stats \n {np_total_stats}")

NP total stats 
 F_onewayResult(statistic=15.733284888195138, pvalue=3.3055419499175817e-09)


## ANOVA for CEBRA, CEBRA-joint, baseline for each neuron numbers

In [5]:
num_neurons = [10, 30, 50, 100, 200, 400, 600, 800, 900, 1000]
for i in num_neurons:
    print(f"For {i} neurons from np recording (33ms, 1 frame):")

    np_data, np_keys = get_data(
        decoding_results,
        "scene_annotation",
        ["knn", "knn", "knn", "bayes"],
        ["cebra", "cebra_joint", "baseline", "baseline"],
        "33",
        "np",
        i,
    )

    stats = pairwise_tukeyhsd(np_data.flatten(), np_keys)
    print(stats)

For 10 neurons from np recording (33ms, 1 frame):
               Multiple Comparison of Means - Tukey HSD, FWER=0.05               
       group1                group2        meandiff p-adj   lower   upper  reject
---------------------------------------------------------------------------------
np_baseline_33_bayes    np_baseline_33_knn  -0.0231 0.7553 -0.0897  0.0435  False
np_baseline_33_bayes       np_cebra_33_knn   0.0113 0.9609 -0.0552  0.0779  False
np_baseline_33_bayes np_cebra_joint_33_knn  -0.0891 0.0072 -0.1557 -0.0225   True
  np_baseline_33_knn       np_cebra_33_knn   0.0344 0.4713 -0.0321   0.101  False
  np_baseline_33_knn np_cebra_joint_33_knn   -0.066 0.0524 -0.1326  0.0006  False
     np_cebra_33_knn np_cebra_joint_33_knn  -0.1004 0.0027  -0.167 -0.0339   True
---------------------------------------------------------------------------------
For 30 neurons from np recording (33ms, 1 frame):
              Multiple Comparison of Means - Tukey HSD, FWER=0.05               