Intergrating Tri-OMICS

[137]:

import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from sklearn.preprocessing import normalize
from mudata import MuData
import os.path as path
import seaborn as sns
import numexpr as ne
import pandas as pd
import scanpy as sc
import mudata as md
import numpy as np
import muon as mu
import anndata
import h5py
import time
import umap
import gc

data introduction and prepocessing

[49]:

adata_adt=sc.read_h5ad("adt_obj.h5ad")
adata_rna=sc.read_h5ad("rna_obj.h5ad")
adata_atac=sc.read_h5ad("atac_obj.h5ad")

[54]:

adata_rna

[54]:

AnnData object with n_obs × n_vars = 13763 × 15360
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'stim', 'celltype', 'nCount_atac', 'nFeature_atac', 'nCount_adt', 'nFeature_adt'
    var: 'features', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'log1p', 'hvg'

[55]:

adata_atac

[55]:

AnnData object with n_obs × n_vars = 13763 × 68963
    obs: 'orig.ident', 'nCount_atac', 'nFeature_atac', 'nCount_RNA', 'nFeature_RNA', 'stim', 'celltype', 'nCount_adt', 'nFeature_adt'
    var: 'features', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'log1p', 'hvg'

[56]:

adata_adt

[56]:

AnnData object with n_obs × n_vars = 13763 × 210
    obs: 'orig.ident', 'nCount_adt', 'nFeature_adt', 'nCount_RNA', 'nFeature_RNA', 'stim', 'celltype', 'nCount_atac', 'nFeature_atac'
    var: 'features'

[50]:

sc.pp.filter_genes(adata_rna, min_cells = 50)
sc.pp.normalize_total(adata_rna, target_sum=1e4)
sc.pp.log1p(adata_rna)
sc.pp.highly_variable_genes(adata_rna, min_mean=0.02, max_mean=4, min_disp= -1, max_disp = 10)
sc.pl.highly_variable_genes(adata_rna)

[51]:

HVG = adata_rna[:, adata_rna.var['highly_variable'] == True]

[52]:

sc.pp.log1p(adata_atac)
sc.pp.highly_variable_genes(adata_atac, min_mean=0.02, max_mean=4, min_disp= -1.5, max_disp = 10)
sc.pl.highly_variable_genes(adata_atac)

[53]:

HVP = adata_atac[:, adata_atac.var['highly_variable'] == True]

[57]:

mdata = mu.MuData({'rna': HVG, 'atac': HVP, 'adt':adata_adt})

/home/data/ssy092/miniconda3/envs/maxine/lib/python3.11/site-packages/mudata/_core/mudata.py:1531: FutureWarning: From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. Set mudata.set_options(pull_on_update=False) to adopt the new behaviour, which will become the default. Use new pull_obs/pull_var and push_obs/push_var methods for more flexibility.
  self._update_attr("var", axis=0, join_common=join_common)
/home/data/ssy092/miniconda3/envs/maxine/lib/python3.11/site-packages/mudata/_core/mudata.py:931: UserWarning: Cannot join columns with the same name because var_names are intersecting.
  warnings.warn(
/home/data/ssy092/miniconda3/envs/maxine/lib/python3.11/site-packages/mudata/_core/mudata.py:1429: FutureWarning: From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. Set mudata.set_options(pull_on_update=False) to adopt the new behaviour, which will become the default. Use new pull_obs/pull_var and push_obs/push_var methods for more flexibility.
  self._update_attr("obs", axis=1, join_common=join_common)

[59]:

 mdata.obs['celltype'] = adata_rna.obs['celltype']

[60]:

 mdata

[60]:

MuData object with n_obs × n_vars = 13763 × 65654
  obs:      'celltype'
  3 modalities
    rna:    13763 x 13002
      obs:  'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'stim', 'celltype', 'nCount_atac', 'nFeature_atac', 'nCount_adt', 'nFeature_adt'
      var:  'features', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
      uns:  'log1p', 'hvg'
    atac:   13763 x 52442
      obs:  'orig.ident', 'nCount_atac', 'nFeature_atac', 'nCount_RNA', 'nFeature_RNA', 'stim', 'celltype', 'nCount_adt', 'nFeature_adt'
      var:  'features', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
      uns:  'log1p', 'hvg'
    adt:    13763 x 210
      obs:  'orig.ident', 'nCount_adt', 'nFeature_adt', 'nCount_RNA', 'nFeature_RNA', 'stim', 'celltype', 'nCount_atac', 'nFeature_atac'
      var:  'features'

running TriTan

[ ]:

import TriTan

[115]:

n_component= {'rna': [20,50], 'atac': [20,50], 'adt': [20,50]}

[116]:

tritan = TriTan_multi.TriTan(n_modalities = 3, n_component=n_component,complexity = 10)

[117]:

tritan.fit(mdata)

/home/data1/ssy092/maxine_neurip/TriTan_multi.py:255: RuntimeWarning: invalid value encountered in divide
  # bound the values to -1 to 1 in the event of precision issues
/home/data1/ssy092/maxine_neurip/TriTan_multi.py:255: RuntimeWarning: invalid value encountered in divide
  # bound the values to -1 to 1 in the event of precision issues

[119]:

sc.pl.embedding(mdata, color ='celltype', basis='tritan_umap')

../_images/notebooks_PBMC-DOGMA_19_0.png

downstream analysis

[120]:

S_gene = tritan.S['rna']
S_atac = tritan.S['atac']
S_adt = tritan.S['adt']
S_gene = normalize(S_gene, axis=0, norm='max')
S_atac = normalize(S_atac, axis=0, norm='max')
S_adt = normalize(S_adt, axis=0, norm='max')

[121]:

cg = sns.clustermap(S_gene,row_cluster=False,col_cluster=True,figsize=(8, 4), metric='correlation',cbar_pos=(0.14, .2, .02, .4),cmap='viridis')
cg.ax_row_dendrogram.set_visible(False)
cg.ax_col_dendrogram.set_visible(False)

../_images/notebooks_PBMC-DOGMA_22_0.png

[122]:

cg = sns.clustermap(S_atac,row_cluster=False,col_cluster=True,figsize=(8, 4), metric='correlation',cbar_pos=(0.14, .2, .02, .4),cmap='viridis')
cg.ax_row_dendrogram.set_visible(False)
cg.ax_col_dendrogram.set_visible(False)

../_images/notebooks_PBMC-DOGMA_23_0.png

[123]:

cg = sns.clustermap(S_adt,row_cluster=False,col_cluster=True,figsize=(8, 4), metric='correlation',cbar_pos=(0.14, .2, .02, .4),cmap='viridis')
cg.ax_row_dendrogram.set_visible(False)
cg.ax_col_dendrogram.set_visible(False)

../_images/notebooks_PBMC-DOGMA_24_0.png

[125]:

def np_pearson_cor(x, y):
    xv = x - x.mean(axis=0)
    yv = y - y.mean(axis=0)
    xvss = (xv * xv).sum(axis=0)
    yvss = (yv * yv).sum(axis=0)
    result = np.matmul(xv.transpose(), yv) / np.sqrt(np.outer(xvss, yvss))
    # bound the values to -1 to 1 in the event of precision issues
    return np.maximum(np.minimum(result, 1.0), -1.0)

[126]:

I1 = np_pearson_cor(S_gene,S_atac)
I2 = np_pearson_cor(S_gene,S_adt)
I3 = np_pearson_cor(S_atac,S_adt)

[144]:

sns.set(font_scale=1)
g=sns.clustermap(np.abs(I1),figsize=(5, 5),
    cbar_pos=(0.1, .2, .03, .4), metric='correlation',yticklabels=False,xticklabels=False)
ax = g.ax_heatmap
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
ax.set_xlabel('gene sets', fontsize=30)
ax.set_ylabel('peak sets', fontsize=30)
plt.title('rna&atac correlation matrix', fontsize=20,x=15,y=1.5)

[144]:

Text(15, 1.5, 'rna&atac correlation matrix')

../_images/notebooks_PBMC-DOGMA_27_1.png

[145]:

sns.set(font_scale=1)
g=sns.clustermap(np.abs(I2),figsize=(5, 5),
    cbar_pos=(0.1, .2, .03, .4), metric='correlation',yticklabels=False,xticklabels=False)
ax = g.ax_heatmap
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
ax.set_xlabel('protein sets', fontsize=30)
ax.set_ylabel('gene sets', fontsize=30)
plt.title('rna&adt correlation matrix', fontsize=20,x=15,y=1.5)

[145]:

Text(15, 1.5, 'rna&adt correlation matrix')

../_images/notebooks_PBMC-DOGMA_28_1.png

[146]:

sns.set(font_scale=1)
g=sns.clustermap(np.abs(I3),figsize=(5, 5),
    cbar_pos=(0.1, .2, .03, .4), metric='correlation',yticklabels=False,xticklabels=False)
ax = g.ax_heatmap
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
ax.set_xlabel('protein setspeak sets', fontsize=30)
ax.set_ylabel('', fontsize=30)
plt.title('atac&adt correlation matrix', fontsize=20,x=15,y=1.5)

[146]:

Text(15, 1.5, 'atac&adt correlation matrix')

../_images/notebooks_PBMC-DOGMA_29_1.png

[ ]: