trackcell 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trackcell-0.1.1/PKG-INFO +59 -0
- trackcell-0.1.1/README.md +38 -0
- trackcell-0.1.1/pyproject.toml +36 -0
- trackcell-0.1.1/trackcell/__init__.py +18 -0
- trackcell-0.1.1/trackcell/io/__init__.py +9 -0
- trackcell-0.1.1/trackcell/io/spatial.py +182 -0
- trackcell-0.1.1/trackcell/pl/__init__.py +8 -0
- trackcell-0.1.1/trackcell/tl/__init__.py +8 -0
trackcell-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: trackcell
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: A Python package for processing single-cell and spatial transcriptomics data
|
|
5
|
+
Author: Zan Yuan
|
|
6
|
+
Author-email: yfinddream@gmail.com
|
|
7
|
+
Requires-Python: >=3.10,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Requires-Dist: geopandas (>=1.1.1,<2.0.0)
|
|
14
|
+
Requires-Dist: imageio (>=2.31.0,<3.0.0)
|
|
15
|
+
Requires-Dist: numpy (>=1.24.0,<2.0.0)
|
|
16
|
+
Requires-Dist: pandas (>=2.1.0,<3.0.0)
|
|
17
|
+
Requires-Dist: scanpy (>=1.9.0,<2.0.0)
|
|
18
|
+
Requires-Dist: shapely (>=2.0.0,<3.0.0)
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# TrackCell
|
|
22
|
+
|
|
23
|
+
A Python package for processing and vis single-cell and spatial transcriptomics data.
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install trackcell
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
|
|
33
|
+
### Reading SpaceRanger Output
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import trackcell as tcl
|
|
37
|
+
|
|
38
|
+
# Read SpaceRanger output
|
|
39
|
+
adata = tcl.io.read_hd_cellseg(
|
|
40
|
+
datapath="SpaceRanger4.0/Cse1/outs/segmented_outputs",
|
|
41
|
+
sample="Cse1"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# The resulting AnnData object contains:
|
|
45
|
+
# - Expression matrix in .X
|
|
46
|
+
# - Cell metadata in .obs
|
|
47
|
+
# - Gene metadata in .var
|
|
48
|
+
# - Spatial coordinates in .obsm["spatial"]
|
|
49
|
+
# - Tissue images in .uns["spatial"][sample]["images"]
|
|
50
|
+
# - Scalefactors in .uns["spatial"][sample]["scalefactors"]
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
## Development
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
## License
|
|
58
|
+
|
|
59
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# TrackCell
|
|
2
|
+
|
|
3
|
+
A Python package for processing and vis single-cell and spatial transcriptomics data.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install trackcell
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
### Reading SpaceRanger Output
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import trackcell as tcl
|
|
17
|
+
|
|
18
|
+
# Read SpaceRanger output
|
|
19
|
+
adata = tcl.io.read_hd_cellseg(
|
|
20
|
+
datapath="SpaceRanger4.0/Cse1/outs/segmented_outputs",
|
|
21
|
+
sample="Cse1"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# The resulting AnnData object contains:
|
|
25
|
+
# - Expression matrix in .X
|
|
26
|
+
# - Cell metadata in .obs
|
|
27
|
+
# - Gene metadata in .var
|
|
28
|
+
# - Spatial coordinates in .obsm["spatial"]
|
|
29
|
+
# - Tissue images in .uns["spatial"][sample]["images"]
|
|
30
|
+
# - Scalefactors in .uns["spatial"][sample]["scalefactors"]
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
## Development
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
## License
|
|
38
|
+
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "trackcell"
|
|
3
|
+
version = "0.1.1"
|
|
4
|
+
description = "A Python package for processing single-cell and spatial transcriptomics data"
|
|
5
|
+
authors = ["Zan Yuan <yfinddream@gmail.com>"]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
packages = [{include = "trackcell"}]
|
|
8
|
+
|
|
9
|
+
[tool.poetry.dependencies]
|
|
10
|
+
python = "^3.10"
|
|
11
|
+
scanpy = "^1.9.0"
|
|
12
|
+
geopandas = "^1.1.1"
|
|
13
|
+
pandas = "^2.1.0"
|
|
14
|
+
shapely = "^2.0.0"
|
|
15
|
+
imageio = "^2.31.0"
|
|
16
|
+
numpy = "^1.24.0"
|
|
17
|
+
|
|
18
|
+
[tool.poetry.group.dev.dependencies]
|
|
19
|
+
pytest = "^7.0.0"
|
|
20
|
+
black = "^23.0.0"
|
|
21
|
+
flake8 = "^6.0.0"
|
|
22
|
+
mypy = "^1.0.0"
|
|
23
|
+
|
|
24
|
+
[build-system]
|
|
25
|
+
requires = ["poetry-core"]
|
|
26
|
+
build-backend = "poetry.core.masonry.api"
|
|
27
|
+
|
|
28
|
+
[tool.black]
|
|
29
|
+
line-length = 88
|
|
30
|
+
target-version = ['py310']
|
|
31
|
+
|
|
32
|
+
[tool.mypy]
|
|
33
|
+
python_version = "3.10"
|
|
34
|
+
warn_return_any = true
|
|
35
|
+
warn_unused_configs = true
|
|
36
|
+
disallow_untyped_defs = true
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TrackCell: A Python package for processing single-cell and spatial transcriptomics data.
|
|
3
|
+
|
|
4
|
+
This package provides tools for:
|
|
5
|
+
- IO: Data input/output operations
|
|
6
|
+
- PL: Processing and analysis tools
|
|
7
|
+
- TL: Utility and helper functions
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__version__ = "0.1.1"
|
|
11
|
+
__author__ = "Zan Yuan"
|
|
12
|
+
__email__ = "yfinddream@gmail.com"
|
|
13
|
+
|
|
14
|
+
from . import io
|
|
15
|
+
from . import pl
|
|
16
|
+
from . import tl
|
|
17
|
+
|
|
18
|
+
__all__ = ["io", "pl", "tl"]
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Spatial transcriptomics data input/output functions.
|
|
3
|
+
|
|
4
|
+
This module provides functions for reading and writing spatial transcriptomics data,
|
|
5
|
+
particularly from SpaceRanger output.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import scanpy as sc
|
|
9
|
+
import geopandas as gpd
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from shapely import wkt, geometry
|
|
12
|
+
import numpy as np
|
|
13
|
+
import json
|
|
14
|
+
import imageio.v3 as iio
|
|
15
|
+
import os
|
|
16
|
+
from typing import Optional
|
|
17
|
+
import ast
|
|
18
|
+
|
|
19
|
+
def read_hd_cellseg(
|
|
20
|
+
datapath: str,
|
|
21
|
+
sample: Optional[str] = None,
|
|
22
|
+
cell_segmentations_file: str = "graphclust_annotated_cell_segmentations.geojson",
|
|
23
|
+
matrix_file: str = "filtered_feature_cell_matrix.h5",
|
|
24
|
+
hires_image_file: str = "spatial/tissue_hires_image.png",
|
|
25
|
+
lowres_image_file: str = "spatial/tissue_lowres_image.png",
|
|
26
|
+
scalefactors_file: str = "spatial/scalefactors_json.json"
|
|
27
|
+
) -> sc.AnnData:
|
|
28
|
+
"""
|
|
29
|
+
Read 10X HD SpaceRanger cell segmentation output and create an AnnData object with spatial information.
|
|
30
|
+
|
|
31
|
+
This function reads the output from SpaceRanger pipeline and creates an AnnData object
|
|
32
|
+
that includes spatial coordinates, cell segmentations, and tissue images.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
datapath : str or Path
|
|
37
|
+
Path to the SpaceRanger output directory containing segmented outputs.
|
|
38
|
+
sample : str, optional
|
|
39
|
+
Sample name. If None, will be inferred from the path.
|
|
40
|
+
cell_segmentations_file : str, default "cell_segmentations.geojson"
|
|
41
|
+
Name of the cell segmentations file.
|
|
42
|
+
matrix_file : str, default "filtered_feature_cell_matrix.h5"
|
|
43
|
+
Name of the filtered feature-cell matrix file.
|
|
44
|
+
hires_image_file : str, default "spatial/tissue_hires_image.png"
|
|
45
|
+
Path to the high-resolution tissue image relative to datapath.
|
|
46
|
+
lowres_image_file : str, default "spatial/tissue_lowres_image.png"
|
|
47
|
+
Path to the low-resolution tissue image relative to datapath.
|
|
48
|
+
scalefactors_file : str, default "scalefactors_json.json"
|
|
49
|
+
Name of the scalefactors JSON file.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
sc.AnnData
|
|
54
|
+
AnnData object containing:
|
|
55
|
+
- Expression matrix in .X
|
|
56
|
+
- Cell metadata in .obs
|
|
57
|
+
- Gene metadata in .var
|
|
58
|
+
- Spatial coordinates in .obsm["spatial"]
|
|
59
|
+
- Tissue images in .uns["spatial"][sample]["images"]
|
|
60
|
+
- Scalefactors in .uns["spatial"][sample]["scalefactors"]
|
|
61
|
+
|
|
62
|
+
Examples
|
|
63
|
+
--------
|
|
64
|
+
>>> import trackcell.io as tcio
|
|
65
|
+
>>> adata = tcio.read_hd_cellseg("SpaceRanger4.0/Case1/outs/segmented_outputs", sample="Cas1")
|
|
66
|
+
>>> print(adata)
|
|
67
|
+
AnnData object with n_obs × n_vars = 1000 × 2000
|
|
68
|
+
obs: 'cellid'
|
|
69
|
+
obsm: 'spatial'
|
|
70
|
+
uns: 'spatial'
|
|
71
|
+
|
|
72
|
+
Notes
|
|
73
|
+
-----
|
|
74
|
+
This function expects the SpaceRanger output to have the following structure:
|
|
75
|
+
- cell_segmentations.geojson: Cell segmentation polygons
|
|
76
|
+
- filtered_feature_cell_matrix.h5: Expression matrix
|
|
77
|
+
- spatial/tissue_hires_image.png: High-resolution tissue image
|
|
78
|
+
- spatial/tissue_lowres_image.png: Low-resolution tissue image
|
|
79
|
+
- scalefactors_json.json: Image scaling factors
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
# Convert to Path object for easier handling
|
|
83
|
+
datapath = os.path.abspath(datapath)
|
|
84
|
+
|
|
85
|
+
# If sample is not provided, try to infer from path
|
|
86
|
+
if sample is None:
|
|
87
|
+
sample = 'sample'
|
|
88
|
+
|
|
89
|
+
# Read cell segmentations
|
|
90
|
+
gdf_seg = gpd.read_file(f'{datapath}/{cell_segmentations_file}')
|
|
91
|
+
df = pd.DataFrame(gdf_seg)
|
|
92
|
+
df['cellid'] = df['cell_id'].apply(lambda x: f"cellid_{str(x).zfill(9)}-1")
|
|
93
|
+
|
|
94
|
+
# Read expression matrix
|
|
95
|
+
adata = sc.read_10x_h5(f'{datapath}/{matrix_file}')
|
|
96
|
+
|
|
97
|
+
# Align cell segmentations with expression data
|
|
98
|
+
adata = adata[adata.obs_names.isin(df['cellid']),:]
|
|
99
|
+
|
|
100
|
+
df = df.set_index("cellid").loc[adata.obs_names].reset_index()
|
|
101
|
+
|
|
102
|
+
# Convert geometry strings to shapely objects if needed
|
|
103
|
+
if isinstance(df["geometry"].iloc[0], str):
|
|
104
|
+
df["geometry"] = df["geometry"].apply(wkt.loads)
|
|
105
|
+
|
|
106
|
+
# Extract centroid coordinates
|
|
107
|
+
df["x"] = df["geometry"].apply(lambda poly: poly.centroid.x)
|
|
108
|
+
df["y"] = df["geometry"].apply(lambda poly: poly.centroid.y)
|
|
109
|
+
|
|
110
|
+
# Store spatial coordinates
|
|
111
|
+
adata.obsm["spatial"] = np.array(df[["x", "y"]])
|
|
112
|
+
|
|
113
|
+
if 'classification' in df.columns:
|
|
114
|
+
if isinstance(df['classification'].iloc[0], str):
|
|
115
|
+
classifications = df['classification'].apply(ast.literal_eval)
|
|
116
|
+
else:
|
|
117
|
+
classifications = df['classification']
|
|
118
|
+
adata.obs['classification'] = [i['name'] for i in classifications]
|
|
119
|
+
adata.uns['classification_colors'] = convert_classification_to_color_dict(df, 'classification')
|
|
120
|
+
|
|
121
|
+
# Read tissue images
|
|
122
|
+
try:
|
|
123
|
+
hires_img = iio.imread(f'{datapath}/{hires_image_file}')
|
|
124
|
+
lowres_img = iio.imread(f'{datapath}/{lowres_image_file}')
|
|
125
|
+
except FileNotFoundError as e:
|
|
126
|
+
print(f"Warning: Could not load tissue images: {e}")
|
|
127
|
+
hires_img = None
|
|
128
|
+
lowres_img = None
|
|
129
|
+
|
|
130
|
+
# Initialize spatial metadata
|
|
131
|
+
adata.uns["spatial"] = {}
|
|
132
|
+
adata.uns["spatial"][sample] = {}
|
|
133
|
+
|
|
134
|
+
# Load scalefactors
|
|
135
|
+
try:
|
|
136
|
+
with open(f'{datapath}/{scalefactors_file}', 'r', encoding='utf-8') as file:
|
|
137
|
+
scalefactor = json.load(file)
|
|
138
|
+
except FileNotFoundError as e:
|
|
139
|
+
print(f"Warning: Could not load scalefactors: {e}")
|
|
140
|
+
scalefactor = {}
|
|
141
|
+
|
|
142
|
+
# Store images and scalefactors
|
|
143
|
+
if hires_img is not None and lowres_img is not None:
|
|
144
|
+
adata.uns["spatial"][sample]["images"] = {
|
|
145
|
+
"hires": hires_img,
|
|
146
|
+
"lowres": lowres_img
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
adata.uns["spatial"][sample]["scalefactors"] = scalefactor
|
|
150
|
+
|
|
151
|
+
return adata
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def convert_classification_to_color_dict(df, classification_col='classification'):
|
|
156
|
+
"""
|
|
157
|
+
将包含分类信息的DataFrame列转换为颜色字典
|
|
158
|
+
|
|
159
|
+
参数:
|
|
160
|
+
df: pandas DataFrame
|
|
161
|
+
classification_col: 包含分类信息的列名 (默认'classification')
|
|
162
|
+
|
|
163
|
+
返回:
|
|
164
|
+
dict: {分类名称: 十六进制颜色代码}
|
|
165
|
+
"""
|
|
166
|
+
# 确保数据是字典格式(如果是字符串则转换为字典)
|
|
167
|
+
classifications = df[classification_col]
|
|
168
|
+
|
|
169
|
+
# 获取唯一的分类
|
|
170
|
+
unique_classes = classifications.explode().unique()
|
|
171
|
+
|
|
172
|
+
# 创建颜色字典
|
|
173
|
+
color_dict = {}
|
|
174
|
+
for cls in unique_classes:
|
|
175
|
+
if isinstance(cls, dict): # 确保是字典格式
|
|
176
|
+
name = cls['name']
|
|
177
|
+
rgb = cls['color']
|
|
178
|
+
# 将RGB列表转换为十六进制颜色代码
|
|
179
|
+
hex_color = '#{:02x}{:02x}{:02x}'.format(*rgb)
|
|
180
|
+
color_dict[name] = hex_color
|
|
181
|
+
|
|
182
|
+
return color_dict
|