import scanpy as sc
import anndata as ad
import numpy as np
import pandas as pd
import episcanpy.api as epi
sc.settings.set_figure_params(dpi=300, color_map='gist_earth')
adata = sc.read('./data/heart_lv_SM-JF1NY.h5ad')
adata
AnnData object with n_obs × n_vars = 9110 × 305315 obs: 'n_fragment', 'frac_dup', 'frac_mito', 'sample', 'cell_type', 'organ', 'tissue', 'batch' var: 'peaks' uns: 'peaks', 'peaks_new', 'reference_sequences' obsm: 'fragment_paired'
print(adata.obs['cell_type'].value_counts())
cell_type Ventricular Cardiomyocyte 2813 Cardiac Fibroblasts 2230 Endothelial Cell (Myocardial) 1351 Macrophage (General) 1043 Cardiac Pericyte 1 886 Cardiac Pericyte 2 225 Schwann Cell (General) 152 T Lymphocyte 1 (CD8+) 93 Endothelial Cell (General) 1 90 Natural Killer T Cell 63 Endothelial Cell (General) 2 51 Macrophage (General,Alveolar) 25 Fibroblast (General) 18 Pericyte (General) 2 15 Pericyte (General) 1 9 Adipocyte 8 Smooth Muscle (General) 7 Peripheral Nerve Stromal 6 Cardiac Pericyte 4 5 Pericyte (General) 3 4 Cardiac Pericyte 3 4 Lymphatic Endothelial Cell 3 Pericyte (General) 4 3 Vascular Smooth Muscle 2 2 Naive T cell 1 Atrial Cardiomyocyte 1 T lymphocyte 2 (CD4+) 1 Mast Cell 1 Name: count, dtype: int64
threshold = 100
num = adata.obs['cell_type'].value_counts()
idx = np.where(num>threshold)[0]
types_kept = num.index[idx]
adata = adata[adata.obs['cell_type'].isin(types_kept)]
adata
View of AnnData object with n_obs × n_vars = 8700 × 305315 obs: 'n_fragment', 'frac_dup', 'frac_mito', 'sample', 'cell_type', 'organ', 'tissue', 'batch' var: 'peaks' uns: 'peaks', 'peaks_new', 'reference_sequences' obsm: 'fragment_paired'
epi.pp.filter_cells(adata, min_features=1)
epi.pp.filter_features(adata, min_cells=1)
epi.pl.violin(adata, ['nb_features'])
ImplicitModificationWarning:/home/jiangqun/anaconda3/envs/episcanpy/lib/python3.8/site-packages/scanpy/preprocessing/_simple.py:139: Trying to modify attribute `.obs` of view, initializing view as actual. FutureWarning:/home/jiangqun/anaconda3/envs/episcanpy/lib/python3.8/site-packages/scanpy/plotting/_anndata.py:839: The `scale` parameter has been renamed and will be removed in v0.15.0. Pass `density_norm='width'` for the same effect.
nb_feature_selected = 50000
adata = epi.pp.select_var_feature(adata,
nb_features=nb_feature_selected,
show=False,
copy=True)
adata
View of AnnData object with n_obs × n_vars = 8700 × 50043 obs: 'n_fragment', 'frac_dup', 'frac_mito', 'sample', 'cell_type', 'organ', 'tissue', 'batch', 'nb_features' var: 'peaks', 'n_cells', 'prop_shared_cells', 'variability_score' uns: 'peaks', 'peaks_new', 'reference_sequences' obsm: 'fragment_paired'
import scipy
def tfidf2(count_mat):
tf_mat = 1.0 * count_mat / np.tile(np.sum(count_mat,axis=0), (count_mat.shape[0],1))
signac_mat = np.log(1 + np.multiply(1e4*tf_mat, np.tile((1.0 * count_mat.shape[1] / np.sum(count_mat,axis=1)).reshape(-1,1), (1,count_mat.shape[1]))))
return scipy.sparse.csr_matrix(signac_mat)
adata = sc.AnnData(adata.X, obs=adata.obs, var=adata.var, uns=adata.uns, dtype='float32')
adata.X = tfidf2(adata.X)
epi.pp.pca(adata, n_comps=10)
epi.pp.neighbors(adata)
epi.tl.umap(adata)
sc.pl.umap(adata, color=['nb_features', 'cell_type'], wspace=0.4)
TqdmWarning:/home/jiangqun/anaconda3/envs/episcanpy/lib/python3.8/site-packages/tqdm/auto.py:21: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
WARNING: .obsp["connectivities"] have not been computed using umap
UserWarning:/home/jiangqun/anaconda3/envs/episcanpy/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:394: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
epi.tl.rank_features(adata, 'cell_type', omic='ATAC', use_raw=False)
epi.pl.rank_feat_groups(adata)
epi.pl.rank_feat_groups_matrixplot(adata)
WARNING: dendrogram data not found (using key=dendrogram_cell_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
var_list = []
for i in adata.var_names:
var_list.append(i.replace(":", "_").replace("-","_"))
adata.var_names = var_list
adata_gene = epi.tl.geneactivity(adata,
gtf_file='./annotation/gencode.v38.annotation.gtf',
key_added='gene',
upstream=2000,
feature_type='gene',
annotation='HAVANA',
layer_name='geneactivity',
raw=False,
copy=True)
UserWarning:/home/jiangqun/anaconda3/envs/episcanpy/lib/python3.8/site-packages/anndata/_core/anndata.py:1840: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
adata_gene.write("./data/genes/HGCA_lv_SM-JF1NY.h5ad")
sc.pp.normalize_total(adata_gene, target_sum=1e5)
sc.pp.log1p(adata_gene)
sc.pp.highly_variable_genes(adata_gene, mn_top_genes=5000)
sc.tl.pca(adata_gene, svd_solver='arpack')
sc.pp.neighbors(adata_gene)
adata_gene
AnnData object with n_obs × n_vars = 8700 × 18172 obs: 'n_fragment', 'frac_dup', 'frac_mito', 'sample', 'cell_type', 'organ', 'tissue', 'batch', 'nb_features' var: 'gene_id', 'transcript_id', 'gene_type', 'gene_name', 'transcript_type', 'transcript_name', 'protein_id', 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'peaks', 'peaks_new', 'reference_sequences', 'pca', 'neighbors', 'umap', 'cell_type_colors', 'rank_features_groups', 'dendrogram_cell_type', 'log1p', 'hvg' obsm: 'X_pca', 'X_umap' varm: 'PCs' obsp: 'distances', 'connectivities'
sc.tl.rank_genes_groups(adata_gene, groupby='cell_type', method='wilcoxon')
sc.pl.rank_genes_groups(adata_gene, n_genes=25, sharey=False)
sc.pl.umap(adata_gene, color=['cell_type', 'TBX18'],wspace=0.8)
UserWarning:/home/jiangqun/anaconda3/envs/episcanpy/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:394: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored