trackcell 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+ Metadata-Version: 2.3
2
+ Name: trackcell
3
+ Version: 0.1.1
4
+ Summary: A Python package for processing single-cell and spatial transcriptomics data
5
+ Author: Zan Yuan
6
+ Author-email: yfinddream@gmail.com
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Dist: geopandas (>=1.1.1,<2.0.0)
14
+ Requires-Dist: imageio (>=2.31.0,<3.0.0)
15
+ Requires-Dist: numpy (>=1.24.0,<2.0.0)
16
+ Requires-Dist: pandas (>=2.1.0,<3.0.0)
17
+ Requires-Dist: scanpy (>=1.9.0,<2.0.0)
18
+ Requires-Dist: shapely (>=2.0.0,<3.0.0)
19
+ Description-Content-Type: text/markdown
20
+
21
+ # TrackCell
22
+
23
+ A Python package for processing and vis single-cell and spatial transcriptomics data.
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ pip install trackcell
29
+ ```
30
+
31
+ ## Usage
32
+
33
+ ### Reading SpaceRanger Output
34
+
35
+ ```python
36
+ import trackcell as tcl
37
+
38
+ # Read SpaceRanger output
39
+ adata = tcl.io.read_hd_cellseg(
40
+ datapath="SpaceRanger4.0/Cse1/outs/segmented_outputs",
41
+ sample="Cse1"
42
+ )
43
+
44
+ # The resulting AnnData object contains:
45
+ # - Expression matrix in .X
46
+ # - Cell metadata in .obs
47
+ # - Gene metadata in .var
48
+ # - Spatial coordinates in .obsm["spatial"]
49
+ # - Tissue images in .uns["spatial"][sample]["images"]
50
+ # - Scalefactors in .uns["spatial"][sample]["scalefactors"]
51
+ ```
52
+
53
+
54
+ ## Development
55
+
56
+
57
+ ## License
58
+
59
+
@@ -0,0 +1,38 @@
1
+ # TrackCell
2
+
3
+ A Python package for processing and vis single-cell and spatial transcriptomics data.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install trackcell
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ### Reading SpaceRanger Output
14
+
15
+ ```python
16
+ import trackcell as tcl
17
+
18
+ # Read SpaceRanger output
19
+ adata = tcl.io.read_hd_cellseg(
20
+ datapath="SpaceRanger4.0/Cse1/outs/segmented_outputs",
21
+ sample="Cse1"
22
+ )
23
+
24
+ # The resulting AnnData object contains:
25
+ # - Expression matrix in .X
26
+ # - Cell metadata in .obs
27
+ # - Gene metadata in .var
28
+ # - Spatial coordinates in .obsm["spatial"]
29
+ # - Tissue images in .uns["spatial"][sample]["images"]
30
+ # - Scalefactors in .uns["spatial"][sample]["scalefactors"]
31
+ ```
32
+
33
+
34
+ ## Development
35
+
36
+
37
+ ## License
38
+
@@ -0,0 +1,36 @@
1
+ [tool.poetry]
2
+ name = "trackcell"
3
+ version = "0.1.1"
4
+ description = "A Python package for processing single-cell and spatial transcriptomics data"
5
+ authors = ["Zan Yuan <yfinddream@gmail.com>"]
6
+ readme = "README.md"
7
+ packages = [{include = "trackcell"}]
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.10"
11
+ scanpy = "^1.9.0"
12
+ geopandas = "^1.1.1"
13
+ pandas = "^2.1.0"
14
+ shapely = "^2.0.0"
15
+ imageio = "^2.31.0"
16
+ numpy = "^1.24.0"
17
+
18
+ [tool.poetry.group.dev.dependencies]
19
+ pytest = "^7.0.0"
20
+ black = "^23.0.0"
21
+ flake8 = "^6.0.0"
22
+ mypy = "^1.0.0"
23
+
24
+ [build-system]
25
+ requires = ["poetry-core"]
26
+ build-backend = "poetry.core.masonry.api"
27
+
28
+ [tool.black]
29
+ line-length = 88
30
+ target-version = ['py310']
31
+
32
+ [tool.mypy]
33
+ python_version = "3.10"
34
+ warn_return_any = true
35
+ warn_unused_configs = true
36
+ disallow_untyped_defs = true
@@ -0,0 +1,18 @@
1
+ """
2
+ TrackCell: A Python package for processing single-cell and spatial transcriptomics data.
3
+
4
+ This package provides tools for:
5
+ - IO: Data input/output operations
6
+ - PL: Processing and analysis tools
7
+ - TL: Utility and helper functions
8
+ """
9
+
10
+ __version__ = "0.1.1"
11
+ __author__ = "Zan Yuan"
12
+ __email__ = "yfinddream@gmail.com"
13
+
14
+ from . import io
15
+ from . import pl
16
+ from . import tl
17
+
18
+ __all__ = ["io", "pl", "tl"]
@@ -0,0 +1,9 @@
1
+ """
2
+ IO module for TrackCell package.
3
+
4
+ This module provides functions for reading and writing single-cell and spatial transcriptomics data.
5
+ """
6
+
7
+ from .spatial import read_hd_cellseg
8
+
9
+ __all__ = ["read_hd_cellseg"]
@@ -0,0 +1,182 @@
1
+ """
2
+ Spatial transcriptomics data input/output functions.
3
+
4
+ This module provides functions for reading and writing spatial transcriptomics data,
5
+ particularly from SpaceRanger output.
6
+ """
7
+
8
+ import scanpy as sc
9
+ import geopandas as gpd
10
+ import pandas as pd
11
+ from shapely import wkt, geometry
12
+ import numpy as np
13
+ import json
14
+ import imageio.v3 as iio
15
+ import os
16
+ from typing import Optional
17
+ import ast
18
+
19
+ def read_hd_cellseg(
20
+ datapath: str,
21
+ sample: Optional[str] = None,
22
+ cell_segmentations_file: str = "graphclust_annotated_cell_segmentations.geojson",
23
+ matrix_file: str = "filtered_feature_cell_matrix.h5",
24
+ hires_image_file: str = "spatial/tissue_hires_image.png",
25
+ lowres_image_file: str = "spatial/tissue_lowres_image.png",
26
+ scalefactors_file: str = "spatial/scalefactors_json.json"
27
+ ) -> sc.AnnData:
28
+ """
29
+ Read 10X HD SpaceRanger cell segmentation output and create an AnnData object with spatial information.
30
+
31
+ This function reads the output from SpaceRanger pipeline and creates an AnnData object
32
+ that includes spatial coordinates, cell segmentations, and tissue images.
33
+
34
+ Parameters
35
+ ----------
36
+ datapath : str or Path
37
+ Path to the SpaceRanger output directory containing segmented outputs.
38
+ sample : str, optional
39
+ Sample name. If None, will be inferred from the path.
40
+ cell_segmentations_file : str, default "cell_segmentations.geojson"
41
+ Name of the cell segmentations file.
42
+ matrix_file : str, default "filtered_feature_cell_matrix.h5"
43
+ Name of the filtered feature-cell matrix file.
44
+ hires_image_file : str, default "spatial/tissue_hires_image.png"
45
+ Path to the high-resolution tissue image relative to datapath.
46
+ lowres_image_file : str, default "spatial/tissue_lowres_image.png"
47
+ Path to the low-resolution tissue image relative to datapath.
48
+ scalefactors_file : str, default "scalefactors_json.json"
49
+ Name of the scalefactors JSON file.
50
+
51
+ Returns
52
+ -------
53
+ sc.AnnData
54
+ AnnData object containing:
55
+ - Expression matrix in .X
56
+ - Cell metadata in .obs
57
+ - Gene metadata in .var
58
+ - Spatial coordinates in .obsm["spatial"]
59
+ - Tissue images in .uns["spatial"][sample]["images"]
60
+ - Scalefactors in .uns["spatial"][sample]["scalefactors"]
61
+
62
+ Examples
63
+ --------
64
+ >>> import trackcell.io as tcio
65
+ >>> adata = tcio.read_hd_cellseg("SpaceRanger4.0/Case1/outs/segmented_outputs", sample="Cas1")
66
+ >>> print(adata)
67
+ AnnData object with n_obs × n_vars = 1000 × 2000
68
+ obs: 'cellid'
69
+ obsm: 'spatial'
70
+ uns: 'spatial'
71
+
72
+ Notes
73
+ -----
74
+ This function expects the SpaceRanger output to have the following structure:
75
+ - cell_segmentations.geojson: Cell segmentation polygons
76
+ - filtered_feature_cell_matrix.h5: Expression matrix
77
+ - spatial/tissue_hires_image.png: High-resolution tissue image
78
+ - spatial/tissue_lowres_image.png: Low-resolution tissue image
79
+ - scalefactors_json.json: Image scaling factors
80
+ """
81
+
82
+ # Convert to Path object for easier handling
83
+ datapath = os.path.abspath(datapath)
84
+
85
+ # If sample is not provided, try to infer from path
86
+ if sample is None:
87
+ sample = 'sample'
88
+
89
+ # Read cell segmentations
90
+ gdf_seg = gpd.read_file(f'{datapath}/{cell_segmentations_file}')
91
+ df = pd.DataFrame(gdf_seg)
92
+ df['cellid'] = df['cell_id'].apply(lambda x: f"cellid_{str(x).zfill(9)}-1")
93
+
94
+ # Read expression matrix
95
+ adata = sc.read_10x_h5(f'{datapath}/{matrix_file}')
96
+
97
+ # Align cell segmentations with expression data
98
+ adata = adata[adata.obs_names.isin(df['cellid']),:]
99
+
100
+ df = df.set_index("cellid").loc[adata.obs_names].reset_index()
101
+
102
+ # Convert geometry strings to shapely objects if needed
103
+ if isinstance(df["geometry"].iloc[0], str):
104
+ df["geometry"] = df["geometry"].apply(wkt.loads)
105
+
106
+ # Extract centroid coordinates
107
+ df["x"] = df["geometry"].apply(lambda poly: poly.centroid.x)
108
+ df["y"] = df["geometry"].apply(lambda poly: poly.centroid.y)
109
+
110
+ # Store spatial coordinates
111
+ adata.obsm["spatial"] = np.array(df[["x", "y"]])
112
+
113
+ if 'classification' in df.columns:
114
+ if isinstance(df['classification'].iloc[0], str):
115
+ classifications = df['classification'].apply(ast.literal_eval)
116
+ else:
117
+ classifications = df['classification']
118
+ adata.obs['classification'] = [i['name'] for i in classifications]
119
+ adata.uns['classification_colors'] = convert_classification_to_color_dict(df, 'classification')
120
+
121
+ # Read tissue images
122
+ try:
123
+ hires_img = iio.imread(f'{datapath}/{hires_image_file}')
124
+ lowres_img = iio.imread(f'{datapath}/{lowres_image_file}')
125
+ except FileNotFoundError as e:
126
+ print(f"Warning: Could not load tissue images: {e}")
127
+ hires_img = None
128
+ lowres_img = None
129
+
130
+ # Initialize spatial metadata
131
+ adata.uns["spatial"] = {}
132
+ adata.uns["spatial"][sample] = {}
133
+
134
+ # Load scalefactors
135
+ try:
136
+ with open(f'{datapath}/{scalefactors_file}', 'r', encoding='utf-8') as file:
137
+ scalefactor = json.load(file)
138
+ except FileNotFoundError as e:
139
+ print(f"Warning: Could not load scalefactors: {e}")
140
+ scalefactor = {}
141
+
142
+ # Store images and scalefactors
143
+ if hires_img is not None and lowres_img is not None:
144
+ adata.uns["spatial"][sample]["images"] = {
145
+ "hires": hires_img,
146
+ "lowres": lowres_img
147
+ }
148
+
149
+ adata.uns["spatial"][sample]["scalefactors"] = scalefactor
150
+
151
+ return adata
152
+
153
+
154
+
155
+ def convert_classification_to_color_dict(df, classification_col='classification'):
156
+ """
157
+ 将包含分类信息的DataFrame列转换为颜色字典
158
+
159
+ 参数:
160
+ df: pandas DataFrame
161
+ classification_col: 包含分类信息的列名 (默认'classification')
162
+
163
+ 返回:
164
+ dict: {分类名称: 十六进制颜色代码}
165
+ """
166
+ # 确保数据是字典格式(如果是字符串则转换为字典)
167
+ classifications = df[classification_col]
168
+
169
+ # 获取唯一的分类
170
+ unique_classes = classifications.explode().unique()
171
+
172
+ # 创建颜色字典
173
+ color_dict = {}
174
+ for cls in unique_classes:
175
+ if isinstance(cls, dict): # 确保是字典格式
176
+ name = cls['name']
177
+ rgb = cls['color']
178
+ # 将RGB列表转换为十六进制颜色代码
179
+ hex_color = '#{:02x}{:02x}{:02x}'.format(*rgb)
180
+ color_dict[name] = hex_color
181
+
182
+ return color_dict
@@ -0,0 +1,8 @@
1
+ """
2
+ PL (Processing) module for TrackCell package.
3
+
4
+ This module provides functions for processing and analyzing single-cell and spatial transcriptomics data.
5
+ """
6
+
7
+ # Add processing functions here as they are implemented
8
+ __all__ = []
@@ -0,0 +1,8 @@
1
+ """
2
+ TL (Tools) module for TrackCell package.
3
+
4
+ This module provides utility and helper functions for single-cell and spatial transcriptomics data analysis.
5
+ """
6
+
7
+ # Add utility functions here as they are implemented
8
+ __all__ = []