Clustering multimodal data with sincei#
[ ]:
import numpy as np
import pandas as pd
from sincei.multimodalClustering import (
multiModal_clustering,
umap_aligned
)
import mudata as md
import scanpy as sc
%matplotlib inline
Load data#
[ ]:
multi_modal_path = 'sincei_output/scCounts_10x_multiome_clustered.h5mu'
mdata = md.read_h5mu(multi_modal_path)
mdata
Load metadata and add celltype#
[ ]:
metadata = pd.read_csv('metadata_cd34_rna.csv', header=0, index_col=0)
metadata.index = metadata.index.str.replace('rna_', '')
[ ]:
mdata.obs = mdata.obs.merge(metadata['celltype'], left_index=True, right_index=True)
mdata
Clustering#
[ ]:
multiModal_clustering(
mdata=mdata,
modalities=['rna', 'atac'],
method=['glmPCA', 'LSA'],
modal_weights=[1, 1],
nK=30,
nPrinComps=[20, 30],
clusterResolution=[0.8, 1.0],
binarize=False,
glmPCAfamily='poisson',
)
mdata
# mdata.write_h5mu('sincei_output/10x_multiome_clustered.h5mu')
[ ]:
mdata['rna'].obs['celltype'] = mdata.obs['celltype']
mdata['atac'].obs['celltype'] = mdata.obs['celltype']
mdata['rna'].obs['cluster_multi'] = mdata.obs['cluster_multi']
mdata['atac'].obs['cluster_multi'] = mdata.obs['cluster_multi']
mdata['rna'].obs['leiden_muon'] = mdata.obs['leiden_muon']
mdata['atac'].obs['leiden_muon'] = mdata.obs['leiden_muon']
[ ]:
sc.pl.umap(mdata['rna'], color=['cluster_multi', 'leiden','celltype', 'sample'], ncols=2)
sc.pl.umap(mdata['atac'], color=['cluster_multi', 'leiden', 'celltype', 'sample'], ncols=2)
Align UMAP#
[ ]:
umap_aligned(
mdata,
modalities=['rna', 'atac'],
nK=30,
distance_metric="euclidean",
)
[ ]:
sc.pl.umap(mdata['rna'], color=['cluster_multi', 'leiden', 'celltype', 'sample'], ncols=2)
sc.pl.umap(mdata['atac'], color=['cluster_multi', 'leiden', 'celltype', 'sample'], ncols=2)