Clustering multimodal data with sincei

Clustering multimodal data with sincei#

[ ]:
import numpy as np
import pandas as pd

from sincei.multimodalClustering import (
    multiModal_clustering,
    umap_aligned
)

import mudata as md
import scanpy as sc

%matplotlib inline

Load data#

[ ]:
multi_modal_path = 'sincei_output/scCounts_10x_multiome_clustered.h5mu'
mdata = md.read_h5mu(multi_modal_path)
mdata

Load metadata and add celltype#

[ ]:
metadata = pd.read_csv('metadata_cd34_rna.csv', header=0, index_col=0)
metadata.index = metadata.index.str.replace('rna_', '')
[ ]:
mdata.obs = mdata.obs.merge(metadata['celltype'], left_index=True, right_index=True)
mdata

Clustering#

[ ]:
multiModal_clustering(
    mdata=mdata,
    modalities=['rna', 'atac'],
    method=['glmPCA', 'LSA'],
    modal_weights=[1, 1],
    nK=30,
    nPrinComps=[20, 30],
    clusterResolution=[0.8, 1.0],
    binarize=False,
    glmPCAfamily='poisson',
)
mdata
# mdata.write_h5mu('sincei_output/10x_multiome_clustered.h5mu')
[ ]:
mdata['rna'].obs['celltype'] = mdata.obs['celltype']
mdata['atac'].obs['celltype'] = mdata.obs['celltype']

mdata['rna'].obs['cluster_multi'] = mdata.obs['cluster_multi']
mdata['atac'].obs['cluster_multi'] = mdata.obs['cluster_multi']

mdata['rna'].obs['leiden_muon'] = mdata.obs['leiden_muon']
mdata['atac'].obs['leiden_muon'] = mdata.obs['leiden_muon']
[ ]:
sc.pl.umap(mdata['rna'], color=['cluster_multi', 'leiden','celltype', 'sample'], ncols=2)
sc.pl.umap(mdata['atac'], color=['cluster_multi', 'leiden', 'celltype', 'sample'], ncols=2)

Align UMAP#

[ ]:
umap_aligned(
    mdata,
    modalities=['rna', 'atac'],
    nK=30,
    distance_metric="euclidean",
)
[ ]:
sc.pl.umap(mdata['rna'], color=['cluster_multi', 'leiden', 'celltype', 'sample'], ncols=2)
sc.pl.umap(mdata['atac'], color=['cluster_multi', 'leiden', 'celltype', 'sample'], ncols=2)