terra-st 0.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- terra_st-0.0.0/.codecov.yaml +17 -0
- terra_st-0.0.0/.editorconfig +18 -0
- terra_st-0.0.0/.github/ISSUE_TEMPLATE/bug_report.yml +89 -0
- terra_st-0.0.0/.github/ISSUE_TEMPLATE/config.yml +5 -0
- terra_st-0.0.0/.github/ISSUE_TEMPLATE/feature_request.yml +11 -0
- terra_st-0.0.0/.github/workflows/build.yaml +29 -0
- terra_st-0.0.0/.github/workflows/release.yaml +29 -0
- terra_st-0.0.0/.github/workflows/test.yaml +67 -0
- terra_st-0.0.0/.gitignore +33 -0
- terra_st-0.0.0/.pre-commit-config.yaml +42 -0
- terra_st-0.0.0/.readthedocs.yaml +16 -0
- terra_st-0.0.0/CHANGELOG.md +15 -0
- terra_st-0.0.0/LICENSE +28 -0
- terra_st-0.0.0/PKG-INFO +146 -0
- terra_st-0.0.0/README.md +70 -0
- terra_st-0.0.0/configs/job/chl59-8b_1p/chl59-8b_1p.yaml +30 -0
- terra_st-0.0.0/configs/job/hst_corpus_110m/hst_corpus_110m.yaml +30 -0
- terra_st-0.0.0/configs/job/hst_corpus_110m/hst_corpus_110m_1.yaml +30 -0
- terra_st-0.0.0/configs/job/hst_corpus_110m/hst_corpus_110m_2.yaml +30 -0
- terra_st-0.0.0/configs/job/hst_corpus_110m/hst_corpus_110m_5k.yaml +30 -0
- terra_st-0.0.0/configs/job/hst_corpus_80m/hst_corpus_80m_gtbase.yaml +28 -0
- terra_st-0.0.0/configs/job/hst_corpus_80m/hst_corpus_80m_gtlarge.yaml +27 -0
- terra_st-0.0.0/configs/job/hst_corpus_80m/hst_corpus_80m_gtsmall_counts.yaml +28 -0
- terra_st-0.0.0/configs/job/hst_corpus_80m/hst_corpus_80m_gtsmall_lfs.yaml +27 -0
- terra_st-0.0.0/configs/job/hst_corpus_80m/hst_corpus_80m_gttiny_sanger.yaml +28 -0
- terra_st-0.0.0/configs/job/hst_corpus_90m/hst_corpus_90m_gtsmall_sanger.yaml +28 -0
- terra_st-0.0.0/configs/job/hst_corpus_90m/hst_corpus_90m_gttiny_sanger.yaml +28 -0
- terra_st-0.0.0/configs/job/kidney_disease_integration/kidney_disease_integration.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_1p.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_1p_2.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_1p_counts.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned_2.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned_3.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned_4.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned_old.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_neigh.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_neigh_2.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_neigh_3.yaml +30 -0
- terra_st-0.0.0/configs/job/mmb0-1b_smb1-1b_xmb2-1b/mmb0-1b_smb1-1b_xmb2-1b_3p.yaml +30 -0
- terra_st-0.0.0/configs/job/mst_corpus_10m/mst_corpus_10m.yaml +30 -0
- terra_st-0.0.0/configs/job/xhk1007-CV1-41b_1p/xhk1007-CV1-41b_1p.yaml +30 -0
- terra_st-0.0.0/configs/job/xhs1000-39b_1p/xhs1000-39b_1p.yaml +30 -0
- terra_st-0.0.0/configs/job/xhs49-1b_1p/xhs49-1b_1p.yaml +30 -0
- terra_st-0.0.0/configs/model/chl59-8b_1p/chl59-8b_1p.yaml +87 -0
- terra_st-0.0.0/configs/model/chl59-8b_1p/chl59-8b_1p_protein_init.yaml +102 -0
- terra_st-0.0.0/configs/model/hst_corpus_110m/hst_corpus_110m copy.yaml +140 -0
- terra_st-0.0.0/configs/model/hst_corpus_110m/hst_corpus_110m.yaml +137 -0
- terra_st-0.0.0/configs/model/hst_corpus_110m/hst_corpus_110m_1.yaml +136 -0
- terra_st-0.0.0/configs/model/hst_corpus_110m/hst_corpus_110m_2.yaml +136 -0
- terra_st-0.0.0/configs/model/hst_corpus_110m/hst_corpus_110m_5k.yaml +96 -0
- terra_st-0.0.0/configs/model/hst_corpus_80m/hst_corpus_80m_gtbase_aws.yaml +73 -0
- terra_st-0.0.0/configs/model/hst_corpus_80m/hst_corpus_80m_gtbase_sanger.yaml +77 -0
- terra_st-0.0.0/configs/model/hst_corpus_80m/hst_corpus_80m_gtlarge.yaml +70 -0
- terra_st-0.0.0/configs/model/hst_corpus_80m/hst_corpus_80m_gtsmall_aws.yaml +73 -0
- terra_st-0.0.0/configs/model/hst_corpus_80m/hst_corpus_80m_gtsmall_lfs.yaml +72 -0
- terra_st-0.0.0/configs/model/hst_corpus_80m/hst_corpus_80m_gttiny_sanger.yaml +77 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gtbase_aws.yaml +83 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gtbase_reordered_70m_aws.yaml +83 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gtbase_reordered_aws.yaml +83 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gtcustom_aws.yaml +85 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gtlarge_reordered_aws.yaml +83 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gtsmall_aws.yaml +86 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gtsmall_reordered_aws.yaml +84 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gtsmall_sanger.yaml +80 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gttiny_aws.yaml +86 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gttiny_reordered_aws.yaml +84 -0
- terra_st-0.0.0/configs/model/hst_corpus_90m/hst_corpus_90m_gttiny_sanger.yaml +80 -0
- terra_st-0.0.0/configs/model/kidney_disease_integration/kidney_disease_integration.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_1p.yaml +97 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_1p_2.yaml +95 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_1p_best.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_1p_counts.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p.yaml +97 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_256_seq_len_best.yaml +95 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_32_seq_len.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_64_seq_len_best.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_64_seq_len_rank_best.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned.yaml +142 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned_2.yaml +141 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned_3.yaml +135 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned_4.yaml +135 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned_best.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_coord_aligned_old.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_neigh.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_neigh_2.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_neigh_3.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_nonorm_256_seq_len.yaml +95 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_nonorm_64_seq_len.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_nonorm_64_seq_len_best.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_read_depth_256_seq_len_best.yaml +95 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b/mmb0-1b_smb1-1b_2p_read_depth_64_seq_len_best.yaml +96 -0
- terra_st-0.0.0/configs/model/mmb0-1b_smb1-1b_xmb2-1b/mmb0-1b_smb1-1b_xmb2-1b_3p.yaml +96 -0
- terra_st-0.0.0/configs/model/mst_corpus_10m/mst_corpus_10m.yaml +97 -0
- terra_st-0.0.0/configs/model/mst_corpus_10m/mst_corpus_10m_best.yaml +96 -0
- terra_st-0.0.0/configs/model/xhk1007-CV1-41b_1p/xhk1007-CV1-41b_1p.yaml +87 -0
- terra_st-0.0.0/configs/model/xhs1000-39b_1p/xhs1000-39b_1p.yaml +87 -0
- terra_st-0.0.0/configs/model/xhs49-1b_1p/xhs49-1b_1p.yaml +87 -0
- terra_st-0.0.0/docs/Makefile +20 -0
- terra_st-0.0.0/docs/_static/.gitkeep +0 -0
- terra_st-0.0.0/docs/_static/css/custom.css +4 -0
- terra_st-0.0.0/docs/_templates/.gitkeep +0 -0
- terra_st-0.0.0/docs/_templates/autosummary/class.rst +61 -0
- terra_st-0.0.0/docs/api.md +38 -0
- terra_st-0.0.0/docs/changelog.md +3 -0
- terra_st-0.0.0/docs/conf.py +129 -0
- terra_st-0.0.0/docs/contributing.md +161 -0
- terra_st-0.0.0/docs/extensions/typed_returns.py +32 -0
- terra_st-0.0.0/docs/index.md +16 -0
- terra_st-0.0.0/docs/notebooks/Terra.ipynb +4774 -0
- terra_st-0.0.0/docs/notebooks/example.ipynb +171 -0
- terra_st-0.0.0/docs/references.bib +10 -0
- terra_st-0.0.0/docs/references.md +5 -0
- terra_st-0.0.0/docs/template_usage.md +443 -0
- terra_st-0.0.0/environment.yaml +10 -0
- terra_st-0.0.0/pyproject.toml +157 -0
- terra_st-0.0.0/src/app/inference/__init__.py +11 -0
- terra_st-0.0.0/src/app/inference/embed.py +1315 -0
- terra_st-0.0.0/src/app/inference/harmonize.py +185 -0
- terra_st-0.0.0/src/app/inference/infer.py +836 -0
- terra_st-0.0.0/src/app/inference/main.py +63 -0
- terra_st-0.0.0/src/app/inference/perturb.py +289 -0
- terra_st-0.0.0/src/app/inference/score.py +178 -0
- terra_st-0.0.0/src/app/inference/tokenize.py +159 -0
- terra_st-0.0.0/src/app/training/__init__.py +1 -0
- terra_st-0.0.0/src/app/training/decode.py +2392 -0
- terra_st-0.0.0/src/app/training/finetune.py +400 -0
- terra_st-0.0.0/src/app/training/main.py +190 -0
- terra_st-0.0.0/src/app/training/train.py +1696 -0
- terra_st-0.0.0/src/app/training/train_model_lsf_mpirun_sanger.sh +29 -0
- terra_st-0.0.0/src/app/training/train_model_slurm_torchrun_aws.sh +69 -0
- terra_st-0.0.0/src/app/training/train_model_slurm_torchrun_hmgu.sh +40 -0
- terra_st-0.0.0/src/app/utils/__init__.py +5 -0
- terra_st-0.0.0/src/app/utils/bsub_script.sh +110 -0
- terra_st-0.0.0/src/app/utils/bsub_script_tiger.sh +116 -0
- terra_st-0.0.0/src/app/utils/bsub_script_tiger_normal_reservation.sh +117 -0
- terra_st-0.0.0/src/app/utils/bsub_script_tiger_parallel_reservation.sh +117 -0
- terra_st-0.0.0/src/app/utils/bsub_submit.sh +3 -0
- terra_st-0.0.0/src/app/utils/bsub_submit_tiger.sh +3 -0
- terra_st-0.0.0/src/app/utils/bsub_submit_tiger_normal_reservation.sh +3 -0
- terra_st-0.0.0/src/app/utils/bsub_submit_tiger_parallel_reservation.sh +3 -0
- terra_st-0.0.0/src/app/utils/helper.py +826 -0
- terra_st-0.0.0/src/terra/__init__.py +6 -0
- terra_st-0.0.0/src/terra/datasets/__init__.py +7 -0
- terra_st-0.0.0/src/terra/datasets/cell_datasets.py +923 -0
- terra_st-0.0.0/src/terra/datasets/dataloaders.py +238 -0
- terra_st-0.0.0/src/terra/datasets/utils.py +153 -0
- terra_st-0.0.0/src/terra/evaluation/__init__.py +6 -0
- terra_st-0.0.0/src/terra/evaluation/knn.py +73 -0
- terra_st-0.0.0/src/terra/evaluation/linear.py +326 -0
- terra_st-0.0.0/src/terra/evaluation/lr_interactions.py +135 -0
- terra_st-0.0.0/src/terra/evaluation/utils.py +60 -0
- terra_st-0.0.0/src/terra/masks/__init__.py +3 -0
- terra_st-0.0.0/src/terra/masks/block_masking.py +327 -0
- terra_st-0.0.0/src/terra/masks/cell_masking.py +266 -0
- terra_st-0.0.0/src/terra/masks/utils.py +40 -0
- terra_st-0.0.0/src/terra/models/__init__.py +21 -0
- terra_st-0.0.0/src/terra/models/adaln.py +127 -0
- terra_st-0.0.0/src/terra/models/batch_classifier.py +163 -0
- terra_st-0.0.0/src/terra/models/batch_labels.py +207 -0
- terra_st-0.0.0/src/terra/models/count_decoder.py +110 -0
- terra_st-0.0.0/src/terra/models/cycle_consistency.py +208 -0
- terra_st-0.0.0/src/terra/models/distribution_alignment.py +251 -0
- terra_st-0.0.0/src/terra/models/gene_transformers.py +3171 -0
- terra_st-0.0.0/src/terra/models/modules.py +484 -0
- terra_st-0.0.0/src/terra/models/multimask.py +90 -0
- terra_st-0.0.0/src/terra/models/protein_init.py +626 -0
- terra_st-0.0.0/src/terra/models/rda.py +213 -0
- terra_st-0.0.0/src/terra/models/rope2d.py +159 -0
- terra_st-0.0.0/src/terra/models/special_token_moe.py +272 -0
- terra_st-0.0.0/src/terra/models/utils.py +249 -0
- terra_st-0.0.0/src/terra/preprocessors/__init__.py +9 -0
- terra_st-0.0.0/src/terra/preprocessors/filters.py +34 -0
- terra_st-0.0.0/src/terra/preprocessors/graph.py +144 -0
- terra_st-0.0.0/src/terra/preprocessors/normalizers.py +393 -0
- terra_st-0.0.0/src/terra/tokenizers/__init__.py +4 -0
- terra_st-0.0.0/src/terra/tokenizers/cell_tokenizers.py +1847 -0
- terra_st-0.0.0/src/terra/tokenizers/tokenize.py +108 -0
- terra_st-0.0.0/src/terra/utils/__init__.py +6 -0
- terra_st-0.0.0/src/terra/utils/config.py +76 -0
- terra_st-0.0.0/src/terra/utils/distributed.py +51 -0
- terra_st-0.0.0/src/terra/utils/embedding.py +569 -0
- terra_st-0.0.0/src/terra/utils/evaluation.py +350 -0
- terra_st-0.0.0/src/terra/utils/logging.py +102 -0
- terra_st-0.0.0/src/terra/utils/schedulers.py +111 -0
- terra_st-0.0.0/tests/test_datasets/test_cell_neighborhood_dataset.ipynb +587 -0
- terra_st-0.0.0/tests/test_masks/test_multigene.ipynb +211 -0
- terra_st-0.0.0/tests/test_masks/test_utils.ipynb +141 -0
- terra_st-0.0.0/tests/test_models/test_batch_conditioning.py +450 -0
- terra_st-0.0.0/tests/test_models/test_batch_labels.py +54 -0
- terra_st-0.0.0/tests/test_models/test_cycle_consistency.py +211 -0
- terra_st-0.0.0/tests/test_models/test_distribution_alignment.py +253 -0
- terra_st-0.0.0/tests/test_models/test_gene_transformer.ipynb +1018 -0
- terra_st-0.0.0/tests/test_models/test_protein_init.py +531 -0
- terra_st-0.0.0/tests/test_models/test_rda.py +168 -0
- terra_st-0.0.0/tests/test_models/test_spatial_encoding.py +598 -0
- terra_st-0.0.0/tests/test_models/test_special_token_moe.py +263 -0
- terra_st-0.0.0/tests/test_preprocessors/test_aggregate_by_sum_of_neighbours.py +43 -0
- terra_st-0.0.0/tests/test_preprocessors/test_analytic_pearson_residuals.py +36 -0
- terra_st-0.0.0/tests/test_preprocessors/test_cell_area.py +31 -0
- terra_st-0.0.0/tests/test_preprocessors/test_filter_poor_quality_cells.py +32 -0
- terra_st-0.0.0/tests/test_preprocessors/test_mean_normalize_by_gene.py +51 -0
- terra_st-0.0.0/tests/test_preprocessors/test_non_zero_median.py +27 -0
- terra_st-0.0.0/tests/test_preprocessors/test_read_depth.py +29 -0
- terra_st-0.0.0/tests/test_preprocessors/test_seurat_v3.py +40 -0
- terra_st-0.0.0/tests/test_utils/compute_mean_unmasked_emb.py +49 -0
- terra_st-0.0.0/tests/test_utils/compute_unmasked_rank_based_weights.py +70 -0
- terra_st-0.0.0/tests/test_utils/create_binary_selection_mask.py +125 -0
- terra_st-0.0.0/tests/test_utils/retrieve_gene_emb.py +53 -0
- terra_st-0.0.0/uv.lock +9084 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Based on pydata/xarray
|
|
2
|
+
codecov:
|
|
3
|
+
require_ci_to_pass: no
|
|
4
|
+
|
|
5
|
+
coverage:
|
|
6
|
+
status:
|
|
7
|
+
project:
|
|
8
|
+
default:
|
|
9
|
+
# Require 1% coverage, i.e., always succeed
|
|
10
|
+
target: 1
|
|
11
|
+
patch: false
|
|
12
|
+
changes: false
|
|
13
|
+
|
|
14
|
+
comment:
|
|
15
|
+
layout: diff, flags, files
|
|
16
|
+
behavior: once
|
|
17
|
+
require_base: no
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
root = true
|
|
2
|
+
|
|
3
|
+
[*]
|
|
4
|
+
indent_style = space
|
|
5
|
+
indent_size = 4
|
|
6
|
+
end_of_line = lf
|
|
7
|
+
charset = utf-8
|
|
8
|
+
trim_trailing_whitespace = true
|
|
9
|
+
insert_final_newline = true
|
|
10
|
+
|
|
11
|
+
[*.{yml,yaml}]
|
|
12
|
+
indent_size = 2
|
|
13
|
+
|
|
14
|
+
[.cruft.json]
|
|
15
|
+
indent_size = 2
|
|
16
|
+
|
|
17
|
+
[Makefile]
|
|
18
|
+
indent_style = tab
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
name: Bug report
|
|
2
|
+
description: Report something that is broken or incorrect
|
|
3
|
+
labels: bug
|
|
4
|
+
body:
|
|
5
|
+
- type: markdown
|
|
6
|
+
attributes:
|
|
7
|
+
value: |
|
|
8
|
+
**Note**: Please read [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports)
|
|
9
|
+
detailing how to provide the necessary information for us to reproduce your bug. In brief:
|
|
10
|
+
* Please provide exact steps how to reproduce the bug in a clean Python environment.
|
|
11
|
+
* In case it's not clear what's causing this bug, please provide the data or the data generation procedure.
|
|
12
|
+
* Sometimes it is not possible to share the data, but usually it is possible to replicate problems on publicly
|
|
13
|
+
available datasets or to share a subset of your data.
|
|
14
|
+
|
|
15
|
+
- type: textarea
|
|
16
|
+
id: report
|
|
17
|
+
attributes:
|
|
18
|
+
label: Report
|
|
19
|
+
description: A clear and concise description of what the bug is.
|
|
20
|
+
validations:
|
|
21
|
+
required: true
|
|
22
|
+
|
|
23
|
+
- type: textarea
|
|
24
|
+
id: versions
|
|
25
|
+
attributes:
|
|
26
|
+
label: Version information
|
|
27
|
+
description: |
|
|
28
|
+
Please paste below the output of
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import session_info
|
|
32
|
+
session_info.show(html=False, dependencies=True)
|
|
33
|
+
```
|
|
34
|
+
placeholder: |
|
|
35
|
+
-----
|
|
36
|
+
anndata 0.8.0rc2.dev27+ge524389
|
|
37
|
+
session_info 1.0.0
|
|
38
|
+
-----
|
|
39
|
+
asttokens NA
|
|
40
|
+
awkward 1.8.0
|
|
41
|
+
backcall 0.2.0
|
|
42
|
+
cython_runtime NA
|
|
43
|
+
dateutil 2.8.2
|
|
44
|
+
debugpy 1.6.0
|
|
45
|
+
decorator 5.1.1
|
|
46
|
+
entrypoints 0.4
|
|
47
|
+
executing 0.8.3
|
|
48
|
+
h5py 3.7.0
|
|
49
|
+
ipykernel 6.15.0
|
|
50
|
+
jedi 0.18.1
|
|
51
|
+
mpl_toolkits NA
|
|
52
|
+
natsort 8.1.0
|
|
53
|
+
numpy 1.22.4
|
|
54
|
+
packaging 21.3
|
|
55
|
+
pandas 1.4.2
|
|
56
|
+
parso 0.8.3
|
|
57
|
+
pexpect 4.8.0
|
|
58
|
+
pickleshare 0.7.5
|
|
59
|
+
pkg_resources NA
|
|
60
|
+
prompt_toolkit 3.0.29
|
|
61
|
+
psutil 5.9.1
|
|
62
|
+
ptyprocess 0.7.0
|
|
63
|
+
pure_eval 0.2.2
|
|
64
|
+
pydev_ipython NA
|
|
65
|
+
pydevconsole NA
|
|
66
|
+
pydevd 2.8.0
|
|
67
|
+
pydevd_file_utils NA
|
|
68
|
+
pydevd_plugins NA
|
|
69
|
+
pydevd_tracing NA
|
|
70
|
+
pygments 2.12.0
|
|
71
|
+
pytz 2022.1
|
|
72
|
+
scipy 1.8.1
|
|
73
|
+
setuptools 62.5.0
|
|
74
|
+
setuptools_scm NA
|
|
75
|
+
six 1.16.0
|
|
76
|
+
stack_data 0.3.0
|
|
77
|
+
tornado 6.1
|
|
78
|
+
traitlets 5.3.0
|
|
79
|
+
wcwidth 0.2.5
|
|
80
|
+
zmq 23.1.0
|
|
81
|
+
-----
|
|
82
|
+
IPython 8.4.0
|
|
83
|
+
jupyter_client 7.3.4
|
|
84
|
+
jupyter_core 4.10.0
|
|
85
|
+
-----
|
|
86
|
+
Python 3.9.13 | packaged by conda-forge | (main, May 27 2022, 16:58:50) [GCC 10.3.0]
|
|
87
|
+
Linux-5.18.6-arch1-1-x86_64-with-glibc2.35
|
|
88
|
+
-----
|
|
89
|
+
Session information updated at 2022-07-07 17:55
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
name: Feature request
|
|
2
|
+
description: Propose a new feature for cookiecutter-scverse-instance
|
|
3
|
+
labels: enhancement
|
|
4
|
+
body:
|
|
5
|
+
- type: textarea
|
|
6
|
+
id: description
|
|
7
|
+
attributes:
|
|
8
|
+
label: Description of feature
|
|
9
|
+
description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered.
|
|
10
|
+
validations:
|
|
11
|
+
required: true
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: Check Build
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
concurrency:
|
|
10
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
11
|
+
cancel-in-progress: true
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
package:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v3
|
|
18
|
+
- name: Set up Python 3.10
|
|
19
|
+
uses: actions/setup-python@v4
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.10"
|
|
22
|
+
cache: "pip"
|
|
23
|
+
cache-dependency-path: "**/pyproject.toml"
|
|
24
|
+
- name: Install build dependencies
|
|
25
|
+
run: python -m pip install --upgrade pip wheel twine build
|
|
26
|
+
- name: Build package
|
|
27
|
+
run: python -m build
|
|
28
|
+
- name: Check package
|
|
29
|
+
run: twine check --strict dist/*.whl
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
# Use "trusted publishing", see https://docs.pypi.org/trusted-publishers/
|
|
8
|
+
jobs:
|
|
9
|
+
release:
|
|
10
|
+
name: Upload release to PyPI
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
environment:
|
|
13
|
+
name: pypi
|
|
14
|
+
url: https://pypi.org/p/{{ cookiecutter.package_name }}
|
|
15
|
+
permissions:
|
|
16
|
+
id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
with:
|
|
20
|
+
filter: blob:none
|
|
21
|
+
fetch-depth: 0
|
|
22
|
+
- uses: actions/setup-python@v4
|
|
23
|
+
with:
|
|
24
|
+
python-version: "3.x"
|
|
25
|
+
cache: "pip"
|
|
26
|
+
- run: pip install build
|
|
27
|
+
- run: python -m build
|
|
28
|
+
- name: Publish package distributions to PyPI
|
|
29
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
name: Test
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
schedule:
|
|
9
|
+
- cron: "0 5 1,15 * *"
|
|
10
|
+
|
|
11
|
+
concurrency:
|
|
12
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
13
|
+
cancel-in-progress: true
|
|
14
|
+
|
|
15
|
+
jobs:
|
|
16
|
+
test:
|
|
17
|
+
runs-on: ${{ matrix.os }}
|
|
18
|
+
defaults:
|
|
19
|
+
run:
|
|
20
|
+
shell: bash -e {0} # -e to fail on error
|
|
21
|
+
|
|
22
|
+
strategy:
|
|
23
|
+
fail-fast: false
|
|
24
|
+
matrix:
|
|
25
|
+
include:
|
|
26
|
+
- os: ubuntu-latest
|
|
27
|
+
python: "3.9"
|
|
28
|
+
- os: ubuntu-latest
|
|
29
|
+
python: "3.11"
|
|
30
|
+
- os: ubuntu-latest
|
|
31
|
+
python: "3.11"
|
|
32
|
+
pip-flags: "--pre"
|
|
33
|
+
name: PRE-RELEASE DEPENDENCIES
|
|
34
|
+
|
|
35
|
+
name: ${{ matrix.name }} Python ${{ matrix.python }}
|
|
36
|
+
|
|
37
|
+
env:
|
|
38
|
+
OS: ${{ matrix.os }}
|
|
39
|
+
PYTHON: ${{ matrix.python }}
|
|
40
|
+
|
|
41
|
+
steps:
|
|
42
|
+
- uses: actions/checkout@v3
|
|
43
|
+
- name: Set up Python ${{ matrix.python }}
|
|
44
|
+
uses: actions/setup-python@v4
|
|
45
|
+
with:
|
|
46
|
+
python-version: ${{ matrix.python }}
|
|
47
|
+
cache: "pip"
|
|
48
|
+
cache-dependency-path: "**/pyproject.toml"
|
|
49
|
+
|
|
50
|
+
- name: Install test dependencies
|
|
51
|
+
run: |
|
|
52
|
+
python -m pip install --upgrade pip wheel
|
|
53
|
+
- name: Install dependencies
|
|
54
|
+
run: |
|
|
55
|
+
pip install ${{ matrix.pip-flags }} ".[dev,test]"
|
|
56
|
+
- name: Test
|
|
57
|
+
env:
|
|
58
|
+
MPLBACKEND: agg
|
|
59
|
+
PLATFORM: ${{ matrix.os }}
|
|
60
|
+
DISPLAY: :42
|
|
61
|
+
run: |
|
|
62
|
+
coverage run -m pytest -v --color=yes
|
|
63
|
+
- name: Report coverage
|
|
64
|
+
run: |
|
|
65
|
+
coverage report
|
|
66
|
+
- name: Upload coverage
|
|
67
|
+
uses: codecov/codecov-action@v3
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Temp files
|
|
2
|
+
.DS_Store
|
|
3
|
+
*~
|
|
4
|
+
buck-out/
|
|
5
|
+
|
|
6
|
+
# Compiled files
|
|
7
|
+
.venv/
|
|
8
|
+
__pycache__/
|
|
9
|
+
.mypy_cache/
|
|
10
|
+
.ruff_cache/
|
|
11
|
+
|
|
12
|
+
# Distribution / packaging
|
|
13
|
+
/build/
|
|
14
|
+
/dist/
|
|
15
|
+
/*.egg-info/
|
|
16
|
+
|
|
17
|
+
# Tests and coverage
|
|
18
|
+
/.pytest_cache/
|
|
19
|
+
/.cache/
|
|
20
|
+
/data/
|
|
21
|
+
|
|
22
|
+
# docs
|
|
23
|
+
/docs/generated/
|
|
24
|
+
/docs/_build/
|
|
25
|
+
|
|
26
|
+
# IDEs
|
|
27
|
+
/.idea/
|
|
28
|
+
/.vscode/
|
|
29
|
+
|
|
30
|
+
logs/
|
|
31
|
+
wandb/
|
|
32
|
+
.ipynb_checkpoints
|
|
33
|
+
artifacts/
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
fail_fast: false
|
|
2
|
+
default_language_version:
|
|
3
|
+
python: python3
|
|
4
|
+
default_stages:
|
|
5
|
+
- commit
|
|
6
|
+
- push
|
|
7
|
+
minimum_pre_commit_version: 2.16.0
|
|
8
|
+
repos:
|
|
9
|
+
- repo: https://github.com/pre-commit/mirrors-prettier
|
|
10
|
+
rev: v4.0.0-alpha.8
|
|
11
|
+
hooks:
|
|
12
|
+
- id: prettier
|
|
13
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
14
|
+
rev: v0.3.5
|
|
15
|
+
hooks:
|
|
16
|
+
- id: ruff
|
|
17
|
+
types_or: [python, pyi, jupyter]
|
|
18
|
+
args: [--fix, --exit-non-zero-on-fix]
|
|
19
|
+
- id: ruff-format
|
|
20
|
+
types_or: [python, pyi, jupyter]
|
|
21
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
22
|
+
rev: v4.6.0
|
|
23
|
+
hooks:
|
|
24
|
+
- id: detect-private-key
|
|
25
|
+
- id: check-ast
|
|
26
|
+
- id: end-of-file-fixer
|
|
27
|
+
- id: mixed-line-ending
|
|
28
|
+
args: [--fix=lf]
|
|
29
|
+
- id: trailing-whitespace
|
|
30
|
+
- id: check-case-conflict
|
|
31
|
+
# Check that there are no merge conflicts (could be generated by template sync)
|
|
32
|
+
- id: check-merge-conflict
|
|
33
|
+
args: [--assume-in-merge]
|
|
34
|
+
- repo: local
|
|
35
|
+
hooks:
|
|
36
|
+
- id: forbid-to-commit
|
|
37
|
+
name: Don't commit rej files
|
|
38
|
+
entry: |
|
|
39
|
+
Cannot commit .rej files. These indicate merge conflicts that arise during automated template updates.
|
|
40
|
+
Fix the merge conflicts manually and remove the .rej files.
|
|
41
|
+
language: fail
|
|
42
|
+
files: '.*\.rej$'
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# https://docs.readthedocs.io/en/stable/config-file/v2.html
|
|
2
|
+
version: 2
|
|
3
|
+
build:
|
|
4
|
+
os: ubuntu-20.04
|
|
5
|
+
tools:
|
|
6
|
+
python: "3.10"
|
|
7
|
+
sphinx:
|
|
8
|
+
configuration: docs/conf.py
|
|
9
|
+
# disable this for more lenient docs builds
|
|
10
|
+
fail_on_warning: true
|
|
11
|
+
python:
|
|
12
|
+
install:
|
|
13
|
+
- method: pip
|
|
14
|
+
path: .
|
|
15
|
+
extra_requirements:
|
|
16
|
+
- doc
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog][],
|
|
6
|
+
and this project adheres to [Semantic Versioning][].
|
|
7
|
+
|
|
8
|
+
[keep a changelog]: https://keepachangelog.com/en/1.0.0/
|
|
9
|
+
[semantic versioning]: https://semver.org/spec/v2.0.0.html
|
|
10
|
+
|
|
11
|
+
## [Unreleased]
|
|
12
|
+
|
|
13
|
+
### Added
|
|
14
|
+
|
|
15
|
+
- Basic tool, preprocessing and plotting functions
|
terra_st-0.0.0/LICENSE
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024, Lotfollahi-lab
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
terra_st-0.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: terra-st
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: Spatial transcriptomics foundation model
|
|
5
|
+
Project-URL: Documentation, https://terra.readthedocs.io/
|
|
6
|
+
Project-URL: Source, https://github.com/Lotfollahi-lab/terra
|
|
7
|
+
Project-URL: Home-page, https://github.com/Lotfollahi-lab/terra
|
|
8
|
+
Author: Sebastian Birk, Mohammad Vali Sanian
|
|
9
|
+
Maintainer-email: Sebastian Birk <sebastian.birk@helmholtz-munich.de>, Mohammad Vali Sanian <mohammad.sanian@helsinki.fi>
|
|
10
|
+
License: BSD 3-Clause License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2024, Lotfollahi-lab
|
|
13
|
+
|
|
14
|
+
Redistribution and use in source and binary forms, with or without
|
|
15
|
+
modification, are permitted provided that the following conditions are met:
|
|
16
|
+
|
|
17
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
18
|
+
list of conditions and the following disclaimer.
|
|
19
|
+
|
|
20
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
21
|
+
this list of conditions and the following disclaimer in the documentation
|
|
22
|
+
and/or other materials provided with the distribution.
|
|
23
|
+
|
|
24
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
25
|
+
contributors may be used to endorse or promote products derived from
|
|
26
|
+
this software without specific prior written permission.
|
|
27
|
+
|
|
28
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
29
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
30
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
31
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
32
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
33
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
34
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
35
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
36
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
37
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
38
|
+
License-File: LICENSE
|
|
39
|
+
Requires-Python: >=3.10
|
|
40
|
+
Requires-Dist: anndata
|
|
41
|
+
Requires-Dist: datasets
|
|
42
|
+
Requires-Dist: ipykernel
|
|
43
|
+
Requires-Dist: jupyterlab
|
|
44
|
+
Requires-Dist: leidenalg
|
|
45
|
+
Requires-Dist: peft
|
|
46
|
+
Requires-Dist: pyensembl
|
|
47
|
+
Requires-Dist: scib-metrics
|
|
48
|
+
Requires-Dist: scikit-misc
|
|
49
|
+
Requires-Dist: session-info
|
|
50
|
+
Requires-Dist: squidpy
|
|
51
|
+
Requires-Dist: torch
|
|
52
|
+
Requires-Dist: transformers
|
|
53
|
+
Requires-Dist: wandb
|
|
54
|
+
Provides-Extra: dev
|
|
55
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
|
56
|
+
Requires-Dist: twine>=4.0.2; extra == 'dev'
|
|
57
|
+
Provides-Extra: doc
|
|
58
|
+
Requires-Dist: docutils!=0.18.*,!=0.19.*,>=0.8; extra == 'doc'
|
|
59
|
+
Requires-Dist: ipykernel; extra == 'doc'
|
|
60
|
+
Requires-Dist: ipython; extra == 'doc'
|
|
61
|
+
Requires-Dist: myst-nb; extra == 'doc'
|
|
62
|
+
Requires-Dist: pandas; extra == 'doc'
|
|
63
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == 'doc'
|
|
64
|
+
Requires-Dist: sphinx-book-theme>=1.0.0; extra == 'doc'
|
|
65
|
+
Requires-Dist: sphinx-copybutton; extra == 'doc'
|
|
66
|
+
Requires-Dist: sphinx>=4; extra == 'doc'
|
|
67
|
+
Requires-Dist: sphinxcontrib-bibtex>=1.0.0; extra == 'doc'
|
|
68
|
+
Requires-Dist: sphinxext-opengraph; extra == 'doc'
|
|
69
|
+
Provides-Extra: eval
|
|
70
|
+
Requires-Dist: cellphonedb; extra == 'eval'
|
|
71
|
+
Requires-Dist: omnipath; extra == 'eval'
|
|
72
|
+
Provides-Extra: test
|
|
73
|
+
Requires-Dist: coverage; extra == 'test'
|
|
74
|
+
Requires-Dist: pytest; extra == 'test'
|
|
75
|
+
Description-Content-Type: text/markdown
|
|
76
|
+
|
|
77
|
+
# TERRA
|
|
78
|
+
|
|
79
|
+
## Installation
|
|
80
|
+
|
|
81
|
+
To install the project and its dependencies, run:
|
|
82
|
+
|
|
83
|
+
```shell
|
|
84
|
+
pip install -e .
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Repository Structure
|
|
88
|
+
1. **`main.py`**
|
|
89
|
+
The main entry point for the project, which supports running training and evaluation sweeps. It includes command-line arguments for customization and handles multi-GPU setups.
|
|
90
|
+
|
|
91
|
+
2. **`configs/$DATASET.yaml`**
|
|
92
|
+
Configuration file that defines the dataset specific hyperparameters and settings used during the training process, such as model architecture, data handling, and optimization settings (```$DATASET``` is the name of the dataset, e.g. ```merfish_300k```).
|
|
93
|
+
|
|
94
|
+
3. **`src/terra/models/gene_transformer.py`**
|
|
95
|
+
Contains the model definition for the gene transformer, implementing the core architecture that will be trained and evaluated.
|
|
96
|
+
|
|
97
|
+
4. **`src/terra/train.py`**
|
|
98
|
+
Handles the training process in a distributed setting. This script contains the logic for executing the training loop and logging results.
|
|
99
|
+
|
|
100
|
+
5. **`src/terra/infer.py`**
|
|
101
|
+
Manages the evaluation process. It evaluates the trained model on the specified tasks and logs the performance metrics.
|
|
102
|
+
|
|
103
|
+
6. **`src/terra/utils/config.py`**
|
|
104
|
+
Includes helper functions to setup the model and batch size params.
|
|
105
|
+
|
|
106
|
+
7. **`src/terra/utils/embedding.py`**
|
|
107
|
+
Provides utility functions for handling and loading embeddings required by the model during training and inference.
|
|
108
|
+
|
|
109
|
+
8. **`src/terra/utils/evaluation.py`**
|
|
110
|
+
Includes helper functions to streamline the evaluation process, such as metrics calculations and data preparation.
|
|
111
|
+
|
|
112
|
+
9. **`src/terra/datasets/cell_neighborhood_dataset.py`**
|
|
113
|
+
Includes helper functions to create torch datasets for data loading.
|
|
114
|
+
|
|
115
|
+
10. **`tests`**
|
|
116
|
+
Includes test cases for different functionalities.
|
|
117
|
+
|
|
118
|
+
## Usage
|
|
119
|
+
|
|
120
|
+
### Training
|
|
121
|
+
|
|
122
|
+
To start training with a single GPU, use the following command:
|
|
123
|
+
|
|
124
|
+
```shell
|
|
125
|
+
python -m pdb main.py --fname configs/$DATASET.yaml --devices cuda:0
|
|
126
|
+
```
|
|
127
|
+
where ```$DATASET``` is the name of the dataset, e.g. ```merfish_300k```.
|
|
128
|
+
|
|
129
|
+
To start training with multiple GPUs, use the following command:
|
|
130
|
+
|
|
131
|
+
```shell
|
|
132
|
+
python -m pdb main.py --fname configs/$DATASET.yaml --devices cuda:0 cuda:1
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
To perform a sweep during training, use:
|
|
136
|
+
|
|
137
|
+
```shell
|
|
138
|
+
python -m pdb main.py --fname configs/$DATASET.yaml --devices cuda:0 --do_sweep
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
For multi-node training, first configure the required settings in your job_config file.
|
|
142
|
+
Then, execute the following command:
|
|
143
|
+
|
|
144
|
+
```shell
|
|
145
|
+
bsub_mn_mg_yaml configs/job/hst_corpus_70m_test.yaml
|
|
146
|
+
```
|
terra_st-0.0.0/README.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# TERRA
|
|
2
|
+
|
|
3
|
+
## Installation
|
|
4
|
+
|
|
5
|
+
To install the project and its dependencies, run:
|
|
6
|
+
|
|
7
|
+
```shell
|
|
8
|
+
pip install -e .
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Repository Structure
|
|
12
|
+
1. **`main.py`**
|
|
13
|
+
The main entry point for the project, which supports running training and evaluation sweeps. It includes command-line arguments for customization and handles multi-GPU setups.
|
|
14
|
+
|
|
15
|
+
2. **`configs/$DATASET.yaml`**
|
|
16
|
+
Configuration file that defines the dataset specific hyperparameters and settings used during the training process, such as model architecture, data handling, and optimization settings (```$DATASET``` is the name of the dataset, e.g. ```merfish_300k```).
|
|
17
|
+
|
|
18
|
+
3. **`src/terra/models/gene_transformer.py`**
|
|
19
|
+
Contains the model definition for the gene transformer, implementing the core architecture that will be trained and evaluated.
|
|
20
|
+
|
|
21
|
+
4. **`src/terra/train.py`**
|
|
22
|
+
Handles the training process in a distributed setting. This script contains the logic for executing the training loop and logging results.
|
|
23
|
+
|
|
24
|
+
5. **`src/terra/infer.py`**
|
|
25
|
+
Manages the evaluation process. It evaluates the trained model on the specified tasks and logs the performance metrics.
|
|
26
|
+
|
|
27
|
+
6. **`src/terra/utils/config.py`**
|
|
28
|
+
Includes helper functions to setup the model and batch size params.
|
|
29
|
+
|
|
30
|
+
7. **`src/terra/utils/embedding.py`**
|
|
31
|
+
Provides utility functions for handling and loading embeddings required by the model during training and inference.
|
|
32
|
+
|
|
33
|
+
8. **`src/terra/utils/evaluation.py`**
|
|
34
|
+
Includes helper functions to streamline the evaluation process, such as metrics calculations and data preparation.
|
|
35
|
+
|
|
36
|
+
9. **`src/terra/datasets/cell_neighborhood_dataset.py`**
|
|
37
|
+
Includes helper functions to create torch datasets for data loading.
|
|
38
|
+
|
|
39
|
+
10. **`tests`**
|
|
40
|
+
Includes test cases for different functionalities.
|
|
41
|
+
|
|
42
|
+
## Usage
|
|
43
|
+
|
|
44
|
+
### Training
|
|
45
|
+
|
|
46
|
+
To start training with a single GPU, use the following command:
|
|
47
|
+
|
|
48
|
+
```shell
|
|
49
|
+
python -m pdb main.py --fname configs/$DATASET.yaml --devices cuda:0
|
|
50
|
+
```
|
|
51
|
+
where ```$DATASET``` is the name of the dataset, e.g. ```merfish_300k```.
|
|
52
|
+
|
|
53
|
+
To start training with multiple GPUs, use the following command:
|
|
54
|
+
|
|
55
|
+
```shell
|
|
56
|
+
python -m pdb main.py --fname configs/$DATASET.yaml --devices cuda:0 cuda:1
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
To perform a sweep during training, use:
|
|
60
|
+
|
|
61
|
+
```shell
|
|
62
|
+
python -m pdb main.py --fname configs/$DATASET.yaml --devices cuda:0 --do_sweep
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
For multi-node training, first configure the required settings in your job_config file.
|
|
66
|
+
Then, execute the following command:
|
|
67
|
+
|
|
68
|
+
```shell
|
|
69
|
+
bsub_mn_mg_yaml configs/job/hst_corpus_70m_test.yaml
|
|
70
|
+
```
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# All keys in this YAML file must be in capital letters since
|
|
2
|
+
# they will be treated as environmental variables.
|
|
3
|
+
|
|
4
|
+
job_requirement:
|
|
5
|
+
LSB_DEFAULT_USERGROUP: team361 # The user group to be used for scheduling the job.
|
|
6
|
+
QUEUE: "training-parallel" # "gpu-lotfollahi", "gpu-lotfollahi-train" # The job queue to be used for scheduling the job.
|
|
7
|
+
NUM_NODES: 1 # Number of compute nodes required for the job.
|
|
8
|
+
NUM_GPUS_NODE: 1 # Number of GPUs per node.
|
|
9
|
+
NUM_PROCESSES_NODE: 16 # Number of processes per node.
|
|
10
|
+
MEM_NODE: 64 # Memory allocated per node (in GB).
|
|
11
|
+
|
|
12
|
+
infiniband:
|
|
13
|
+
NCCL_IB_DISABLE: 1 # 1: Disable InfiniBand, 0: Enable InfiniBand
|
|
14
|
+
|
|
15
|
+
environment:
|
|
16
|
+
ENV_PATH: "/nfs/users/nfs_s/sb75/.venvs/terra" # Path to the environment to be activated.
|
|
17
|
+
|
|
18
|
+
artifact:
|
|
19
|
+
ARTIFACT_LOCATION: "/nfs/team361/sb75/terra-reproducibility/artifacts/chl59-8b_1p" # Path to store the artifacts.
|
|
20
|
+
WAREHOUSE_PATH: "/nfs/team361/sb75/terra-reproducibility/artifacts/chl59-8b_1p" # Path to store the artifacts.
|
|
21
|
+
OUTPUT_DIR: "/nfs/team361/sb75/terra-reproducibility/artifacts/chl59-8b_1p" # Path to store the artifacts.
|
|
22
|
+
LOG_DIR: "/nfs/team361/sb75/terra-reproducibility/artifacts/chl59-8b_1p"
|
|
23
|
+
EXPERIMENT_NAME: "terra_chl59-8b_1p" # Name of the experiment.
|
|
24
|
+
RUN_NAME: "run4" # Name of the run.
|
|
25
|
+
|
|
26
|
+
runner_script:
|
|
27
|
+
RUNNER_SCRIPT: "/nfs/team361/sb75/terra/src/app/training/train_model_lsf_mpirun_sanger.sh" # Path to the runner script.
|
|
28
|
+
TRAINING_SCRIPT_PATH: "/nfs/team361/sb75/terra/src/app/training/main.py" # Path to the training script.
|
|
29
|
+
BACKEND: "nccl" # Backend for distributed training.
|
|
30
|
+
CONFIG_FILE: "/nfs/team361/sb75/terra/configs/model/chl59-8b_1p/chl59-8b_1p.yaml"
|