This repository has been archived by the owner on Oct 5, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 83a8ac8
Showing
2 changed files
with
208 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,207 @@ | ||
##### DUODENUM CELL ANALYSIS SCRIPT | ||
# Josh Wu | ||
# 7 June, 2019 | ||
# Contains analysis for Yu-Hwai Tsai | ||
# Analysis of duodenum samples | ||
|
||
import sys | ||
sys.path.insert(0,'C:/Users/Josh/Desktop/sca_run/sca_run') | ||
from sca_run import * | ||
|
||
figdir = './figures_091020/' | ||
an_run = sca_run() | ||
|
||
############################################################################# | ||
## Change this to point toward your mount location for our MiStorage share ## | ||
############################################################################# | ||
an_run.storage_mount_point = 'Z:/' | ||
|
||
# # IDs of samples as represented in the metadata table | ||
# sca_dict.update(sample_list = ['2598-31', # 47 | ||
# '2757-2', # 59 | ||
# '2250-1','2250-2', # 127 | ||
# '2292-2','2321-5', # Adult | ||
# '2511-2', # 101 | ||
# '2598-24', # 80 | ||
# '2598-28', #132 | ||
# '2856-1']) # 72 | ||
|
||
# an_run.sample_list = ['150-1','150-2','150-3','150-4','150-6','150-7','150-8'] | ||
# run_save = pickle.load(open(''.join([figdir,'adata_save.p']))) | ||
an_run.sample_list = ['2598-31', # 47 | ||
'2757-2', # 59 | ||
'2250-1','2250-2', # 127 | ||
'2292-2','2321-5', # Adult | ||
'2511-2', # 101 | ||
'2598-24', # 80 | ||
'2598-28', #132 | ||
'2856-1'] # 72 | ||
|
||
## List of interesting genes | ||
an_run.add_gene_list(markers=['EPCAM'], | ||
label='EPCAM_plot') | ||
|
||
an_run.add_gene_list(markers = ['CDX2','OLFM4','LGR5','SOX9','ELF3','VIL1','MUC2','CHGA','DEFA5', | ||
'LYZ','DPP4','MKI67','TPI1','SPDEF','MGAM','PDGFA'],#'IS','NGN3''IAP', | ||
label='gene_list_1') | ||
|
||
an_run.add_gene_list(markers = ['NOTCH1','NOTCH2','NOTCH3', | ||
'NOTCH4','DLL1','DLL3','DLL4','JAG1','JAG2'], | ||
label='notch_components') | ||
|
||
an_run.add_gene_list(markers = ['OLFM4','LGR5','SMOC2','IGFBP4','BMI1','LRIG1','TERT','MSI1','PHLDA1','PROM1', | ||
'TNFRSF19','EPHB2','POFUT1','SOX9','ASCL2'], | ||
label='gene_list_2') | ||
|
||
an_run.add_gene_list(markers = ['LGR5'], | ||
label='LGR5_plot') | ||
|
||
an_run.add_gene_list(markers = ['OLFM4'], | ||
label='OLFM4_plot') | ||
|
||
an_run.add_gene_list(markers = ['CDX2','OLFM4','LGR5','NOTCH1','NOTCH2','NOTCH3', | ||
'NOTCH4','DLL1','DLL3','DLL4','JAG1','JAG2'], | ||
label='basic_list') | ||
|
||
an_run.add_gene_list(markers = ['HES1','HES5','HEY1','HEY2','HEYL','CCND1','CDKN1A', | ||
'CDKN1B','CDKN2A','GATA3','PTCRA','DTX1'],#'MYRC' | ||
label='target_genes') | ||
|
||
an_run.add_gene_list(markers = ['NUMB','YAP1','ADAM10','ADAM17','RBPJ','MAML1', | ||
'MAML2','MAML3'], | ||
label='related_genes') | ||
|
||
an_run.add_gene_list(markers=['HES1'], | ||
label='HES1_plot') | ||
|
||
## Parameters used to filter the data - Mainly used to get rid of bad cells | ||
an_run.set_filter_params(min_cells = 0, # Filter out genes expressed in fewer cells | ||
min_genes = 500, # Filter out cells with fewer genes to remove dead cells | ||
max_genes = 7000, # Filter out cells with more genes to remove most doublets | ||
max_counts = 30000, # Filter out cells with more UMIs to catch a few remaining doublets | ||
max_mito = 0.1) # Filter out cells with high mitochondrial gene content | ||
|
||
## Parameters used for initial clustering analysis | ||
an_run.set_analysis_params(n_neighbors = 30, # Size of the local neighborhood used for manifold approximation | ||
n_pcs = 20, # Number of principle components to use in construction of neighborhood graph | ||
spread = 1, # In combination with min_dist determines how clumped embedded points are | ||
min_dist = 0.4, # Minimum distance between points on the umap graph | ||
resolution = 0.5) # High resolution attempts to increases # of clusters identified | ||
|
||
an_run.set_plot_params(size = 10, | ||
umap_obs = ['louvain','age','tissue','sampleName'], | ||
exp_grouping = ['louvain','age'], | ||
final_quality=True) | ||
|
||
##### Trying out DeepImpute ##### | ||
# adata = an_run.load_data() | ||
# adata = an_run.filter_data(adata) | ||
# counts_df = pd.DataFrame(adata.X.toarray()) | ||
# print(counts_df) | ||
|
||
|
||
# from deepimpute.multinet import MultiNet | ||
# model = MultiNet() | ||
# model.fit(counts_df,NN_lim=10000) | ||
# imputed_data = model.predict(counts_df) | ||
|
||
# adata.X = imputed_data.values | ||
# print(adata.X) | ||
|
||
# pickle.dump(adata,open(''.join([figdir,'imputed_data.p']),"wb"),protocol=4) | ||
|
||
# adata_postFiltered = pickle.load(open(''.join([figdir,'imputed_data.p']),"rb")) | ||
# from scipy.sparse import csr_matrix | ||
# adata_postFiltered.X = csr_matrix(adata_postFiltered.X) | ||
|
||
# adata = an_run.preprocess_data(adata_postFiltered) | ||
# an_run.adata_postFiltered = adata_postFiltered | ||
# adata = an_run.run_analysis(adata) | ||
# adata = an_run.plot_sca(adata,figdir=figdir) | ||
# an_run.adata = adata.copy() | ||
|
||
# pickle.dump(an_run,open(''.join([figdir,'adata_save.p']),"wb"),protocol=4) | ||
# an_run.write_summary(figdir=figdir) | ||
|
||
## Basic pipeline for analysis - will filter data, process, cluster, etc. and output relevant figures | ||
# an_run.pipe_basic(figdir)#load_save='adata_save.p') | ||
|
||
## If you find some interesting clusters that you want to "zoom in" on and recluster, you can use the following code | ||
# Will update | ||
# New analysis parameters for the subset of parameters | ||
analysis_params_ext = dict(n_neighbors = 15, | ||
n_pcs = 11, | ||
spread = 1, | ||
min_dist = 0.4, | ||
resolution = 0.4) | ||
|
||
an_run.size=20 | ||
an_run.pipe_ext(analysis_params_ext, figdir=figdir, label='epithelium_7_12', | ||
extracted=['7','12'],final_quality=True, load_save='adata_save.p') | ||
|
||
# an_run.pipe_ext(analysis_params_ext, figdir=figdir, label='epithelium_8_9_19', | ||
# extracted=['8','9','19'],final_quality=True, load_save='adata_save.p') | ||
|
||
# an_run.pipe_ext(analysis_params_ext, figdir=''.join([figdir,'extracted/epithelium_8_9_19/']), label='LGR5_OLFM4_cells', | ||
# extracted=['0','2','7','8','9','12'],final_quality=True, load_save='adata_save.p') | ||
|
||
# an_run.pipe_ext(analysis_params_ext, figdir=''.join([figdir,'extracted/epithelium_8_9_19/']), label='LGR5_cells', | ||
# extracted=['0','2','7','8','9'],final_quality=True, load_save='adata_save.p') | ||
|
||
# an_run.pipe_ext(analysis_params_ext, figdir=''.join([figdir,'extracted/epithelium_8_9_19/']), label='OLFM4_cells', | ||
# extracted=['0','8','12'],final_quality=True, load_save='adata_save.p') | ||
|
||
# an_run.pipe_ext(analysis_params_ext, figdir=''.join([figdir,'extracted/epithelium_8_9_19/']), label='C0', | ||
# extracted=['0'],final_quality=True, load_save='adata_save.p') | ||
|
||
# an_run.pipe_ext(analysis_params_ext, figdir=''.join([figdir,'extracted/epithelium_8_9_19/']), label='C2', | ||
# extracted=['2'],final_quality=True, load_save='adata_save.p') | ||
|
||
# an_run.pipe_ext(analysis_params_ext, figdir=''.join([figdir,'extracted/epithelium_8_9_19/']), label='C7', | ||
# extracted=['7'],final_quality=True, load_save='adata_save.p') | ||
|
||
# an_run.pipe_ext(analysis_params_ext, figdir=''.join([figdir,'extracted/epithelium_8_9_19/']), label='C8', | ||
# extracted=['8'],final_quality=True, load_save='adata_save.p') | ||
|
||
# an_run.pipe_ext(analysis_params_ext, figdir=''.join([figdir,'extracted/epithelium_8_9_19/']), label='C9', | ||
# extracted=['9'],final_quality=True, load_save='adata_save.p') | ||
|
||
an_run.resolution = 1 | ||
|
||
# an_run.pipe_ext(analysis_params_ext, figdir=''.join([figdir,'extracted/epithelium_8_9_19/']), label='C12', | ||
# extracted=['12'],final_quality=True, load_save='adata_save.p')#, preprocess=False) | ||
|
||
|
||
# run_save = pickle.load(open(''.join([figdir,'extracted/epithelium_8_9_19/adata_save.p']),"rb")) | ||
# adata = run_save.adata.copy() | ||
# adata.obs['OLFM4+'] = ['OLFM4+' if cluster else False for cluster in adata.obs['louvain'].isin(['0','8','12'])] | ||
# # adata.obs['OLFM4+, LGR5+, SOX9+'] = ['OLFM4+, LGR5+, SOX9+' if cluster else False for cluster in adata.obs['louvain'].isin(['0','8'])] | ||
# adata.obs['OLFM4+, LGR5+'] = ['OLFM4+, LGR5+' if cluster else False for cluster in adata.obs['louvain'].isin(['0','2','7','8','9','12'])] | ||
# adata.obs['LGR5+'] = ['LGR5+' if cluster else False for cluster in adata.obs['louvain'].isin(['0','2','7','8','9'])] | ||
# # adata.obs['SOX9+'] = ['SOX9+' for cluster in adata.obs['louvain']] | ||
|
||
# gene_groupings = ['OLFM4+','OLFM4+, LGR5+','LGR5+']#,'SOX9+'] | ||
# notch_components = ['NOTCH1','NOTCH2','NOTCH3','NOTCH4','DLL1','DLL3','DLL4','JAG1','JAG2'] | ||
# for grouping in gene_groupings: | ||
# adata_plot = adata[adata.obs[grouping]==grouping].copy() | ||
# feature_colors = [(210,210,210), (210,210,210), (245,245,200), (100,200,225), (0,45,125)] | ||
# position=[0, 0.019999, 0.02, 0.55, 1] | ||
# my_feature_cmap = an_run.make_cmap(feature_colors,bit=True,position=position) | ||
# sc.pl.dotplot(adata_plot,notch_components,groupby=grouping, save=''.join(['_',grouping,'.pdf']), show=False, | ||
# color_map=my_feature_cmap, use_raw=True, dendrogram=False) | ||
# #, figsize=(4,6))#, dot_max=0.4)#, dendrogram=True) | ||
|
||
|
||
figdir = './figures_103119/LWRN-E/' | ||
|
||
#an_run.sample_list = ['150-5'] | ||
an_run.sample_list = ['3011-6'] | ||
## Parameters used for initial clustering analysis | ||
an_run.set_analysis_params(n_neighbors = 15, # Size of the local neighborhood used for manifold approximation | ||
n_pcs = 11, # Number of principle components to use in construction of neighborhood graph | ||
spread = 1, # In combination with min_dist determines how clumped embedded points are | ||
min_dist = 0.4, # Minimum distance between points on the umap graph | ||
resolution = 0.5) # High resolution attempts to increases # of clusters identified | ||
|
||
## Basic pipeline for analysis - will filter data, process, cluster, etc. and output relevant figures | ||
# an_run.pipe_basic(figdir)#,load_save='adata_save.p') |
Submodule sca_run
added at
e1af54