spatools 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Tutorials/manual_cuts.py +7 -0
- scripts/convert_point_in_barcodes.py +36 -0
- scripts/main.py +354 -0
- scripts/process_img.py +169 -0
- spatools/__init__.py +8 -0
- spatools/constants.py +87 -0
- spatools/plotting/__init__.py +24 -0
- spatools/plotting/pl.py +1004 -0
- spatools/processing/__init__.py +12 -0
- spatools/processing/core.py +83 -0
- spatools/processing/preprocessing_.py +105 -0
- spatools/processing/processing_.py +102 -0
- spatools/processing/utils.py +86 -0
- spatools/reading/__init__.py +7 -0
- spatools/reading/read_.py +238 -0
- spatools/tools/__init__.py +20 -0
- spatools/tools/tl.py +1106 -0
- spatools-0.2.0.dist-info/METADATA +119 -0
- spatools-0.2.0.dist-info/RECORD +23 -0
- spatools-0.2.0.dist-info/WHEEL +5 -0
- spatools-0.2.0.dist-info/entry_points.txt +2 -0
- spatools-0.2.0.dist-info/licenses/LICENCE +20 -0
- spatools-0.2.0.dist-info/top_level.txt +3 -0
Tutorials/manual_cuts.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import scanpy as sc
|
|
3
|
+
from spatools.tools.tl import correlate_distances
|
|
4
|
+
|
|
5
|
+
adata = sc.read("/media/SATA/My_decon_package/redo_/data/output2/concat/scvi_5000_man_cuts.h5ad")
|
|
6
|
+
adata = correlate_distances(adata, is_concatenated=True, cluster_col="clusters_0.6", batch_key="batch")
|
|
7
|
+
|
|
8
|
+
def spot_mean_by_neighbors(adata):
|
|
9
|
+
df = adata.uns["spatools"]
|
|
10
|
+
df_final = pd.DataFrame({})
|
|
11
|
+
for i, n in enumerate(df["point_name"].unique()):
|
|
12
|
+
df_subset_point = df[df["point_name"] == i]
|
|
13
|
+
if df_subset_point["color"].nunique() > 1:
|
|
14
|
+
print("Existe pelo menos um valor diferente")
|
|
15
|
+
|
|
16
|
+
elif df["color"].nunique() == 1:
|
|
17
|
+
c = df["color"].unique()[0]
|
|
18
|
+
df["color_neigh"] = [int(i) for i in df["color_neigh"]]
|
|
19
|
+
df_counts = pd.DataFrame(df["color_neigh"].value_counts(normalize=True))
|
|
20
|
+
df_counts["color"] = c
|
|
21
|
+
df_final["color"].iloc[n]
|
|
22
|
+
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
spot_mean_by_neighbors(adata=adata)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def point_name_to_barcodes(df_barcodes: pd.DataFrame | list, df_point_name: pd.DataFrame | list):
|
|
30
|
+
# convertendo se necessario
|
|
31
|
+
if type(df_barcodes) == list:
|
|
32
|
+
df_barcodes = pd.DataFrame(df_barcodes)
|
|
33
|
+
if type(df_point_name) == list:
|
|
34
|
+
df_point_name = pd.DataFrame(df_point_name)
|
|
35
|
+
|
|
36
|
+
pass
|
scripts/main.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import scanpy as sc
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from anndata import AnnData
|
|
5
|
+
from scipy.spatial.distance import cdist
|
|
6
|
+
import matplotlib
|
|
7
|
+
|
|
8
|
+
matplotlib.use("Qt5Agg")
|
|
9
|
+
print(matplotlib.get_backend())
|
|
10
|
+
########################################################################################
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# loading bdata
|
|
14
|
+
bdata = sc.read("/mnt/SATA/Cell2_loucura/data/input/bdata_normal_free.h5ad")
|
|
15
|
+
bdata.obs.rename(columns={"batch": "sample"}, inplace=True)
|
|
16
|
+
|
|
17
|
+
# correcting names
|
|
18
|
+
bdata.obsm["q05_cell_abundance_w_sf"].columns = bdata.uns["mod"]["factor_names"]
|
|
19
|
+
|
|
20
|
+
# normalizing rows
|
|
21
|
+
df = bdata.obsm["q05_cell_abundance_w_sf"]
|
|
22
|
+
df = df.div(df.sum(axis=1), axis=0)
|
|
23
|
+
bdata.obsm["q05_cell_abundance_w_sf"] = df.copy()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
########################################################################################
|
|
27
|
+
|
|
28
|
+
# apenas um check que eh totalmente necessario
|
|
29
|
+
def spatools_check(
|
|
30
|
+
adata: AnnData,
|
|
31
|
+
deconv: bool = False
|
|
32
|
+
):
|
|
33
|
+
|
|
34
|
+
# changing uns key
|
|
35
|
+
if not deconv:
|
|
36
|
+
uns_value = "spatools"
|
|
37
|
+
else:
|
|
38
|
+
uns_value = "deconv_spatools"
|
|
39
|
+
|
|
40
|
+
# checking if uns value already exists
|
|
41
|
+
if not uns_value in adata.uns:
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
print("Overwriting old analysis!")
|
|
45
|
+
input = input("Do you want to proceed? [y or n] ").strip().lower()
|
|
46
|
+
if input == "n":
|
|
47
|
+
raise Exception("Operation canceled by the user.")
|
|
48
|
+
elif input == "y":
|
|
49
|
+
return
|
|
50
|
+
else:
|
|
51
|
+
raise Exception("Invalid response. Use 'y' for yes or 'n' for no.")
|
|
52
|
+
|
|
53
|
+
# pega os vizinhos com base em um limite
|
|
54
|
+
def measure_neighbors_filtered(
|
|
55
|
+
adata: AnnData,
|
|
56
|
+
freq_threshold: float = 0.2,
|
|
57
|
+
distance_factor: float = 1.1,
|
|
58
|
+
df_freq: pd.DataFrame = None
|
|
59
|
+
):
|
|
60
|
+
|
|
61
|
+
# Creating a DataFrame with barcodes and spatial coordenates
|
|
62
|
+
df = pd.DataFrame(
|
|
63
|
+
adata.obsm["spatial"],
|
|
64
|
+
index=adata.obs_names,
|
|
65
|
+
columns=["x", "y"]
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Making sure df_freq and df are maching
|
|
69
|
+
if df_freq:
|
|
70
|
+
common = df.index.intersection(df_freq.index)
|
|
71
|
+
df = df.loc[common]
|
|
72
|
+
df_freq = df_freq.loc[common]
|
|
73
|
+
else:
|
|
74
|
+
try:
|
|
75
|
+
df_freq = adata.obsm["q05_cell_abundance_w_sf"]
|
|
76
|
+
df_freq.columns = adata.uns["mod"]["factor_names"]
|
|
77
|
+
except KeyError:
|
|
78
|
+
raise "'q05_cell_abundance_w_sf' does not exist inside obsm, try to give a 'df_freq' next time"
|
|
79
|
+
common = df.index.intersection(df_freq.index)
|
|
80
|
+
df = df.loc[common]
|
|
81
|
+
df_freq = df_freq.loc[common]
|
|
82
|
+
|
|
83
|
+
# dict to store the amount out spots per celltype analysed
|
|
84
|
+
result = dict()
|
|
85
|
+
|
|
86
|
+
# Creating dfs for all the neighbors and initiating loop
|
|
87
|
+
all_neighbors_dfs = []
|
|
88
|
+
for cell_type in df_freq.columns:
|
|
89
|
+
|
|
90
|
+
# Filter spots with low frequence > threshold
|
|
91
|
+
selected = df_freq[cell_type] > freq_threshold
|
|
92
|
+
selected_spatial = df[selected]
|
|
93
|
+
|
|
94
|
+
# Skip if no spots found for this cell type
|
|
95
|
+
if len(selected_spatial) == 0:
|
|
96
|
+
# print(f"⚠️ Skipping {cell_type}: no spots with abundance > {freq_threshold}")
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
# Creating an object with all corrds
|
|
100
|
+
all_points = df[["x", "y"]].values
|
|
101
|
+
|
|
102
|
+
# Creating an oblect with only the selected points
|
|
103
|
+
selected_points = selected_spatial[["x", "y"]].values
|
|
104
|
+
|
|
105
|
+
# Calculate a matrix of distance between selected and all points
|
|
106
|
+
dist_matrix = cdist(selected_points, all_points)
|
|
107
|
+
|
|
108
|
+
# Find lower global distance and calculating the threshold distance
|
|
109
|
+
# Filter out zero distances and check if there are any non-zero distances
|
|
110
|
+
non_zero_distances = dist_matrix[dist_matrix > 0]
|
|
111
|
+
if len(non_zero_distances) == 0:
|
|
112
|
+
# print(f"⚠️ Skipping {cell_type}: no non-zero distances found")
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
min_distance = np.min(non_zero_distances)
|
|
116
|
+
threshold_distance = min_distance * distance_factor
|
|
117
|
+
|
|
118
|
+
# Creating a list of neighbors
|
|
119
|
+
neighbors_list = []
|
|
120
|
+
|
|
121
|
+
for i, barcode in enumerate(selected_spatial.index):
|
|
122
|
+
|
|
123
|
+
distances = dist_matrix[i]
|
|
124
|
+
|
|
125
|
+
neighbors_idx = np.where(
|
|
126
|
+
(distances < threshold_distance) &
|
|
127
|
+
(distances > 0)
|
|
128
|
+
)[0]
|
|
129
|
+
|
|
130
|
+
x_i, y_i = selected_spatial.loc[barcode, ["x", "y"]]
|
|
131
|
+
|
|
132
|
+
for j in neighbors_idx:
|
|
133
|
+
|
|
134
|
+
neigh_barcode = df.index[j]
|
|
135
|
+
x_j, y_j = df.iloc[j][["x", "y"]]
|
|
136
|
+
|
|
137
|
+
neighbors_list.append([
|
|
138
|
+
barcode,
|
|
139
|
+
x_i,
|
|
140
|
+
y_i,
|
|
141
|
+
neigh_barcode,
|
|
142
|
+
x_j,
|
|
143
|
+
y_j,
|
|
144
|
+
distances[j]
|
|
145
|
+
])
|
|
146
|
+
|
|
147
|
+
neighbors_df = pd.DataFrame(
|
|
148
|
+
neighbors_list,
|
|
149
|
+
columns=[
|
|
150
|
+
"barcode",
|
|
151
|
+
"x",
|
|
152
|
+
"y",
|
|
153
|
+
"neighbor_barcode",
|
|
154
|
+
"x_neighbor",
|
|
155
|
+
"y_neighbor",
|
|
156
|
+
"distance"
|
|
157
|
+
]
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Creating result dict
|
|
161
|
+
n_spots = len(neighbors_df.barcode.unique())
|
|
162
|
+
# print(f"✓ {n_spots} spots found with more than {freq_threshold} for {cell_type}")
|
|
163
|
+
result.update([(cell_type, n_spots)])
|
|
164
|
+
|
|
165
|
+
# appending to a big dict
|
|
166
|
+
neighbors_df["cell_type"] = cell_type
|
|
167
|
+
all_neighbors_dfs.append(neighbors_df)
|
|
168
|
+
|
|
169
|
+
adata.uns["deconv_result"] = result.copy()
|
|
170
|
+
|
|
171
|
+
return pd.concat(all_neighbors_dfs, ignore_index=True)
|
|
172
|
+
|
|
173
|
+
# Calcula abundancia media de tipos celulares dos vizinhos por amostra
|
|
174
|
+
def get_neighbors_celltype_composition_by_sample(adata: AnnData,
|
|
175
|
+
sample_id: str = 'sample'):
|
|
176
|
+
"""
|
|
177
|
+
Calcula a composição média dos vizinhos de cada tipo celular,
|
|
178
|
+
armazenando um dicionário por amostra em adata.uns["neighbors_composition"].
|
|
179
|
+
|
|
180
|
+
Saída:
|
|
181
|
+
adata.uns["neighbors_composition"] = dict[sample] -> DataFrame (linhas=cell_types, colunas=abundância média)
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
abundance_matrix = adata.obsm['q05_cell_abundance_w_sf']
|
|
185
|
+
spatools_deconv_df = adata.uns["deconv_spatools"]
|
|
186
|
+
|
|
187
|
+
# identificar amostras e tipos celulares
|
|
188
|
+
samples = spatools_deconv_df['sample'].unique() if 'sample' in spatools_deconv_df.columns else [None]
|
|
189
|
+
cell_types = spatools_deconv_df['cell_type'].unique()
|
|
190
|
+
|
|
191
|
+
# dicionário final
|
|
192
|
+
composition_by_sample = {}
|
|
193
|
+
|
|
194
|
+
for sample in samples:
|
|
195
|
+
# DataFrame temporário: linhas = tipos celulares, colunas = abundâncias médias dos vizinhos
|
|
196
|
+
temp_df = pd.DataFrame(index=cell_types, columns=abundance_matrix.columns, dtype=float)
|
|
197
|
+
|
|
198
|
+
for cell_type in cell_types:
|
|
199
|
+
# subset dos vizinhos do tipo celular na amostra
|
|
200
|
+
if sample is None:
|
|
201
|
+
subset_df = spatools_deconv_df[spatools_deconv_df['cell_type'] == cell_type]
|
|
202
|
+
else:
|
|
203
|
+
# Filtro da amostra e tipo celular relevante
|
|
204
|
+
subset_df = spatools_deconv_df[
|
|
205
|
+
(spatools_deconv_df['cell_type'] == cell_type) &
|
|
206
|
+
(spatools_deconv_df[sample_id] == sample)
|
|
207
|
+
]
|
|
208
|
+
|
|
209
|
+
# converte barcodes para indices numericos
|
|
210
|
+
neighbors_of_type = subset_df['neighbor_barcode'].unique()
|
|
211
|
+
neighbor_indices = [adata.obs_names.get_loc(barcode)
|
|
212
|
+
for barcode in neighbors_of_type if barcode in adata.obs_names]
|
|
213
|
+
|
|
214
|
+
# Se existirem vizinhos, faz o calculo da media das abundancias
|
|
215
|
+
if len(neighbor_indices) > 0:
|
|
216
|
+
neighbors_abundance = abundance_matrix.iloc[neighbor_indices]
|
|
217
|
+
mean_composition = neighbors_abundance.mean(axis=0)
|
|
218
|
+
else:
|
|
219
|
+
mean_composition = pd.Series(0.0, index=abundance_matrix.columns)
|
|
220
|
+
|
|
221
|
+
# salvar no DataFrame temporário
|
|
222
|
+
temp_df.loc[cell_type] = mean_composition
|
|
223
|
+
|
|
224
|
+
# salvar o DataFrame da amostra no dicionário
|
|
225
|
+
composition_by_sample[sample if sample is not None else 'ALL'] = temp_df
|
|
226
|
+
|
|
227
|
+
# armazenar no AnnData
|
|
228
|
+
adata.uns["neighbors_composition"] = composition_by_sample
|
|
229
|
+
|
|
230
|
+
return adata
|
|
231
|
+
|
|
232
|
+
# rodar measure_neighbors_filtered mas levando em consideracao que podem existir varias amostras
|
|
233
|
+
def deconv_correlate_distances(adata: AnnData,
|
|
234
|
+
multi_sample: bool = True,
|
|
235
|
+
sample_key: str = "sample",
|
|
236
|
+
freq_threshold : int = 0.05):
|
|
237
|
+
|
|
238
|
+
# verifying if the analysis has already being done
|
|
239
|
+
spatools_check(adata, deconv=True)
|
|
240
|
+
|
|
241
|
+
if multi_sample:
|
|
242
|
+
merged_df = []
|
|
243
|
+
for i in adata.obs[sample_key].unique():
|
|
244
|
+
subset = adata[adata.obs[sample_key] == i].copy()
|
|
245
|
+
all_neighbors_dfs = measure_neighbors_filtered(adata=subset, freq_threshold=freq_threshold)
|
|
246
|
+
all_neighbors_dfs[sample_key] = i # add batch column
|
|
247
|
+
merged_df.append(all_neighbors_dfs)
|
|
248
|
+
|
|
249
|
+
# Concatenating individual DataFrames before storing
|
|
250
|
+
adata.uns["deconv_spatools"] = pd.concat(merged_df, ignore_index=True)
|
|
251
|
+
adata = get_neighbors_celltype_composition_by_sample(adata=adata)
|
|
252
|
+
|
|
253
|
+
else:
|
|
254
|
+
all_neighbors_dfs = measure_neighbors_filtered(adata=adata, freq_threshold=freq_threshold)
|
|
255
|
+
adata.uns["deconv_spatools"] = all_neighbors_dfs
|
|
256
|
+
|
|
257
|
+
return adata
|
|
258
|
+
|
|
259
|
+
bdata = deconv_correlate_distances(adata = bdata)
|
|
260
|
+
print(bdata.uns["neighbors_composition"])
|
|
261
|
+
|
|
262
|
+
##### plot
|
|
263
|
+
import matplotlib.pyplot as plt
|
|
264
|
+
from matplotlib.colors import LinearSegmentedColormap, Normalize
|
|
265
|
+
import seaborn as sns
|
|
266
|
+
import numpy as np
|
|
267
|
+
|
|
268
|
+
mask = bdata.uns["neighbors_composition"].index.get_level_values(1).str.split("_").str[1] == "GOR"
|
|
269
|
+
df_filtered = neighbors_composition_df[mask]# type: ignore
|
|
270
|
+
df_filtered
|
|
271
|
+
|
|
272
|
+
# plot dos resultados filtrados por resposta ao tratamento
|
|
273
|
+
def plot_neighbors_composition(composition_matrix: pd.DataFrame,
|
|
274
|
+
show=True,
|
|
275
|
+
title: str = "Neighbor Cell Type Composition",
|
|
276
|
+
mask_upper=True,
|
|
277
|
+
return_object=False):
|
|
278
|
+
"""
|
|
279
|
+
Plot the neighbors composition matrix as a heatmap.
|
|
280
|
+
|
|
281
|
+
Parameters:
|
|
282
|
+
-----------
|
|
283
|
+
composition_matrix : pd.DataFrame
|
|
284
|
+
DataFrame com tipos celulares como index e colunas
|
|
285
|
+
show : bool
|
|
286
|
+
Se deve mostrar o plot
|
|
287
|
+
title : str
|
|
288
|
+
Título do plot
|
|
289
|
+
mask_upper : bool
|
|
290
|
+
Se deve mascarar o triângulo superior (remove duplicatas simétricas)
|
|
291
|
+
return_object : bool
|
|
292
|
+
Se deve retornar o objeto do plot
|
|
293
|
+
"""
|
|
294
|
+
|
|
295
|
+
corr_matrix = composition_matrix.copy()
|
|
296
|
+
|
|
297
|
+
# --- Create mask if needed ---
|
|
298
|
+
mask = None
|
|
299
|
+
if mask_upper:
|
|
300
|
+
mask = np.triu(np.ones_like(corr_matrix, dtype=bool), k=1)
|
|
301
|
+
|
|
302
|
+
# --- Create custom colormap ---
|
|
303
|
+
vmax = corr_matrix.values.max()
|
|
304
|
+
vmin = corr_matrix.values.min()
|
|
305
|
+
|
|
306
|
+
# Handle case where all values are the same
|
|
307
|
+
if vmin == vmax:
|
|
308
|
+
vmin = vmax - 1
|
|
309
|
+
|
|
310
|
+
norm_range = vmax - vmin
|
|
311
|
+
zero_pos = max(0.01, min(0.99, (0 - vmin) / norm_range if norm_range != 0 else 0.5))
|
|
312
|
+
|
|
313
|
+
colors = [(0.0, '#0000FF'), (zero_pos, '#FFFFFF'), (1.0, '#FF0000')]
|
|
314
|
+
cmap = LinearSegmentedColormap.from_list('custom_bwr', colors)
|
|
315
|
+
|
|
316
|
+
# --- Plot with seaborn ---
|
|
317
|
+
plt.figure(figsize=(18, 12))
|
|
318
|
+
ax = sns.heatmap(corr_matrix,
|
|
319
|
+
mask=mask,
|
|
320
|
+
cmap=cmap,
|
|
321
|
+
annot=True,
|
|
322
|
+
fmt='.2f',
|
|
323
|
+
cbar_kws={'label': 'Mean Abundance'},
|
|
324
|
+
vmin=vmin,
|
|
325
|
+
vmax=vmax,
|
|
326
|
+
linewidths=0.5,
|
|
327
|
+
linecolor='lightgray')
|
|
328
|
+
|
|
329
|
+
# --- Axis and title ---
|
|
330
|
+
ax.tick_params(axis="x", labelsize=11)
|
|
331
|
+
ax.tick_params(axis="y", labelsize=11)
|
|
332
|
+
|
|
333
|
+
plt.title(title, fontsize=20, pad=20)
|
|
334
|
+
ax.set_xlabel('Cell Types in Neighbors', fontsize=16)
|
|
335
|
+
ax.set_ylabel('Query Cell Types', fontsize=16)
|
|
336
|
+
|
|
337
|
+
for label in ax.get_xticklabels():
|
|
338
|
+
x, y = label.get_position()
|
|
339
|
+
label.set_x(x + 0.5) # adiciona metade do valor do tick
|
|
340
|
+
|
|
341
|
+
plt.xticks(rotation=90, ha='right')
|
|
342
|
+
plt.yticks(rotation=0)
|
|
343
|
+
|
|
344
|
+
if show:
|
|
345
|
+
plt.tight_layout()
|
|
346
|
+
plt.show()
|
|
347
|
+
|
|
348
|
+
if return_object:
|
|
349
|
+
return corr_matrix
|
|
350
|
+
|
|
351
|
+
# Plotar composição de vizinhos (mantém apenas triângulo inferior)
|
|
352
|
+
plot_neighbors_composition(composition_matrix=df_filtered,
|
|
353
|
+
title="Mean Cell Type Composition of Neighbors",
|
|
354
|
+
mask_upper=True)
|
scripts/process_img.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import cv2 as cv
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from anndata import AnnData
|
|
6
|
+
from PIL import Image
|
|
7
|
+
from skimage import io
|
|
8
|
+
from spatools import constants as con
|
|
9
|
+
from typing import List, Any, Optional
|
|
10
|
+
from pybiomart import Server
|
|
11
|
+
import matplotlib.pyplot as plt
|
|
12
|
+
from scipy.spatial import distance
|
|
13
|
+
from multiprocessing import Pool, cpu_count
|
|
14
|
+
|
|
15
|
+
def process_image(input_image_path,
|
|
16
|
+
output_dir: str,
|
|
17
|
+
minDist=50,
|
|
18
|
+
param1=50,
|
|
19
|
+
param2=0.2,
|
|
20
|
+
minRadius=50,
|
|
21
|
+
maxRadius=100):
|
|
22
|
+
"""
|
|
23
|
+
Process an input image to detect circles using Hough Transform.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
input_image_path : str
|
|
28
|
+
The path to the input image file.
|
|
29
|
+
output_dir : str
|
|
30
|
+
The directory to save the output files.
|
|
31
|
+
minDist : int, default=50
|
|
32
|
+
Minimum distance between detected circles.
|
|
33
|
+
param1 : int, default=50
|
|
34
|
+
First method-specific parameter for the Hough Transform (higher threshold).
|
|
35
|
+
param2 : float, default=0.2
|
|
36
|
+
Second method-specific parameter for the Hough Transform (accumulator threshold).
|
|
37
|
+
minRadius : int, default=50
|
|
38
|
+
Minimum circle radius to be detected.
|
|
39
|
+
maxRadius : int, default=100
|
|
40
|
+
Maximum circle radius to be detected.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
output_image: png
|
|
45
|
+
Image containing the detected circles outlined by lines generated with Matplotlib.
|
|
46
|
+
output_excel : XLSX
|
|
47
|
+
Path to the Excel file in XLSX format containing a dataframe with the following columns:
|
|
48
|
+
- Center_X: X-coordinate of the center point.
|
|
49
|
+
- Center_Y: Y-coordinate of the center point.
|
|
50
|
+
- Center_Color: Color value of the center point.
|
|
51
|
+
- Neighbor_X: X-coordinate of the neighboring point.
|
|
52
|
+
- Neighbor_Y: Y-coordinate of the neighboring point.
|
|
53
|
+
- Distance: Distance between the center point and the neighboring point.
|
|
54
|
+
- Point_Name: Name of the point in the format "Point_X_Y".
|
|
55
|
+
- Color_Code: Mapped color code from the dictionary.
|
|
56
|
+
- Proximity: Categorization of the distance as 'close' or 'far'.
|
|
57
|
+
- Neighbor_Cluster: Cluster of the neighboring point.
|
|
58
|
+
- Combination: Tuple of sorted color codes of center and neighbor points.
|
|
59
|
+
"""
|
|
60
|
+
# Aumentar o limite de pixels
|
|
61
|
+
Image.MAX_IMAGE_PIXELS = None
|
|
62
|
+
|
|
63
|
+
# Carregar a imagem
|
|
64
|
+
image = io.imread(input_image_path)
|
|
65
|
+
|
|
66
|
+
# Converter RGBA para RGB (ignorando o canal alfa)
|
|
67
|
+
if image.shape[2] == 4:
|
|
68
|
+
image_rgb = image[:, :, :3]
|
|
69
|
+
else:
|
|
70
|
+
image_rgb = image
|
|
71
|
+
|
|
72
|
+
# Converter a imagem RGB para escala de cinza
|
|
73
|
+
gray_image = cv.cvtColor(image_rgb, cv.COLOR_BGR2GRAY)
|
|
74
|
+
|
|
75
|
+
# Detectar círculos usando a Transformada de Hough
|
|
76
|
+
circles = cv.HoughCircles(
|
|
77
|
+
gray_image,
|
|
78
|
+
cv.HOUGH_GRADIENT_ALT,
|
|
79
|
+
dp=1,
|
|
80
|
+
minDist=minDist,
|
|
81
|
+
param1=param1,
|
|
82
|
+
param2=param2,
|
|
83
|
+
minRadius=minRadius,
|
|
84
|
+
maxRadius=maxRadius
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if circles is not None:
|
|
88
|
+
circles = np.uint16(np.around(circles[0, :])).astype("int")
|
|
89
|
+
|
|
90
|
+
# Obter a cor do centro de cada círculo e seus raios
|
|
91
|
+
centers_colors = [(x, y, image_rgb[y, x]) for x, y, _ in circles]
|
|
92
|
+
radii = circles[:, 2]
|
|
93
|
+
|
|
94
|
+
# Calcular a média dos raios e definir a distância limite baseada no círculo e seus 6 vizinhos mais próximos
|
|
95
|
+
mean_radii_with_neighbors = []
|
|
96
|
+
for i, (x, y, r) in enumerate(circles):
|
|
97
|
+
# Calcular a distância para todos os outros círculos
|
|
98
|
+
distances = np.array([distance.euclidean((x, y), (x2, y2)) for (x2, y2, _) in circles if (x2, y2) != (x, y)])
|
|
99
|
+
# Obter os índices dos 6 círculos mais próximos
|
|
100
|
+
nearest_indices = np.argsort(distances)[:6]
|
|
101
|
+
# Calcular a média dos raios desses 6 círculos mais o círculo atual
|
|
102
|
+
mean_radius = np.mean(np.append(radii[nearest_indices], r))
|
|
103
|
+
mean_radii_with_neighbors.append(mean_radius)
|
|
104
|
+
|
|
105
|
+
# Definir a distância limite baseada na média dos raios com os vizinhos
|
|
106
|
+
threshold_distance = 2 * np.mean(mean_radii_with_neighbors) * np.sqrt(3) * 0.9
|
|
107
|
+
|
|
108
|
+
# Preparar argumentos para paralelização
|
|
109
|
+
args = [(centers_colors, i, threshold_distance) for i in range(len(centers_colors))]
|
|
110
|
+
|
|
111
|
+
# Usar Pool para paralelizar o cálculo das distâncias
|
|
112
|
+
with Pool(cpu_count()) as pool:
|
|
113
|
+
results = pool.map(calculate_distances, args)
|
|
114
|
+
|
|
115
|
+
# Combinar os resultados
|
|
116
|
+
data = [item for sublist in results for item in sublist]
|
|
117
|
+
|
|
118
|
+
df = pd.DataFrame(data, columns=['Center_X', 'Center_Y', 'Center_Color', 'Neighbor_X', 'Neighbor_Y', 'Distance'])
|
|
119
|
+
|
|
120
|
+
# Adicionar a coluna 'Point_Name'
|
|
121
|
+
df['Point_Name'] = df.apply(lambda row: f"Point_{row['Center_X']}_{row['Center_Y']}", axis=1)
|
|
122
|
+
|
|
123
|
+
# Função para mapear a cor do centro para o dicionário
|
|
124
|
+
def map_color_to_dict(color):
|
|
125
|
+
for key, value in con.COLORS_23.items():
|
|
126
|
+
if tuple(color) == value:
|
|
127
|
+
return key
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
# Adicionar a coluna 'Color_Code'
|
|
131
|
+
df['Color_Code'] = df['Center_Color'].apply(map_color_to_dict)
|
|
132
|
+
|
|
133
|
+
# Adicionar a coluna 'proximity'
|
|
134
|
+
df['proximity'] = df['Distance'].apply(lambda d: 'close' if d < threshold_distance else 'far')
|
|
135
|
+
|
|
136
|
+
# Criar um dicionário para mapear as coordenadas dos vizinhos para seus clusters
|
|
137
|
+
neighbor_clusters = {f"{x}_{y}": map_color_to_dict(color) for x, y, color in centers_colors}
|
|
138
|
+
|
|
139
|
+
# Adicionar a coluna 'Neighbor_Cluster'
|
|
140
|
+
df['Neighbor_Cluster'] = df.apply(lambda row: neighbor_clusters.get(f"{row['Neighbor_X']}_{row['Neighbor_Y']}"), axis=1)#type:ignore
|
|
141
|
+
|
|
142
|
+
# Adicionar a coluna 'combination'
|
|
143
|
+
df['combination'] = df.apply(lambda row: tuple(sorted((row['Color_Code'], row['Neighbor_Cluster']))), axis=1)
|
|
144
|
+
|
|
145
|
+
# Salvar o dataframe em Excel
|
|
146
|
+
output_excel_path = os.path.join(output_dir, "output_data.xlsx")
|
|
147
|
+
df.to_excel(output_excel_path, index=False)
|
|
148
|
+
|
|
149
|
+
# Plotar a imagem e os círculos detectados
|
|
150
|
+
fig, ax = plt.subplots(figsize=(10, 10))
|
|
151
|
+
ax.imshow(image_rgb)
|
|
152
|
+
|
|
153
|
+
# Desenhar os círculos
|
|
154
|
+
for (x, y, r) in circles:
|
|
155
|
+
circle = plt.Circle((x, y), r, color='black', fill=False, linewidth=0.2)#type:ignore
|
|
156
|
+
ax.add_patch(circle)
|
|
157
|
+
|
|
158
|
+
ax.set_title('Círculos Detectados')
|
|
159
|
+
plt.axis('off')
|
|
160
|
+
|
|
161
|
+
# Salvar a imagem
|
|
162
|
+
output_image_path = os.path.join(output_dir, "detected_circles.png")
|
|
163
|
+
plt.savefig(output_image_path, format="png", dpi=1000)
|
|
164
|
+
plt.close()
|
|
165
|
+
|
|
166
|
+
return output_image_path, output_excel_path
|
|
167
|
+
else:
|
|
168
|
+
print("Nenhum círculo foi detectado.")
|
|
169
|
+
return None, None
|
spatools/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Importa os submódulos do pacote
|
|
2
|
+
from . import plotting as pl
|
|
3
|
+
from . import processing as pp
|
|
4
|
+
from . import tools as tl
|
|
5
|
+
from .reading import read
|
|
6
|
+
from . import constants as con
|
|
7
|
+
# Define os módulos exportados ao importar o pacote
|
|
8
|
+
__all__ = ["pl", "pp", "tl", "read", "con"]
|
spatools/constants.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
# dictionary of colors
|
|
4
|
+
COLORS_23 = {
|
|
5
|
+
0: (0, 32, 111),
|
|
6
|
+
1: (230, 214, 71),
|
|
7
|
+
2: (170, 96, 40),
|
|
8
|
+
3: (19, 177, 81),
|
|
9
|
+
4: (89, 0, 88),
|
|
10
|
+
5: (197, 223, 119),
|
|
11
|
+
6: (1, 135, 226),
|
|
12
|
+
7: (255, 161, 97),
|
|
13
|
+
8: (136, 101, 195),
|
|
14
|
+
9: (252, 80, 111),
|
|
15
|
+
10: (118, 232, 195),
|
|
16
|
+
11: (126, 0, 46),
|
|
17
|
+
12: (42, 82, 0),
|
|
18
|
+
13: (255, 127, 202),
|
|
19
|
+
14: (129, 62, 115),
|
|
20
|
+
15: (255, 135, 143),
|
|
21
|
+
16: (34, 222, 230),
|
|
22
|
+
17: (201, 78, 60),
|
|
23
|
+
18: (131, 110, 32),
|
|
24
|
+
19: (159, 150, 56),
|
|
25
|
+
20: (177, 73, 161),
|
|
26
|
+
21: (128, 77, 17),
|
|
27
|
+
22: (129, 47, 25)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# dictionary of colors in hex
|
|
31
|
+
COLORS_23_HEX = [
|
|
32
|
+
"#00206F", # 0
|
|
33
|
+
"#E6D647", # 1
|
|
34
|
+
"#AA6028", # 2
|
|
35
|
+
"#13B151", # 3
|
|
36
|
+
"#590058", # 4
|
|
37
|
+
"#C5DF77", # 5
|
|
38
|
+
"#0187E2", # 6
|
|
39
|
+
"#FFA161", # 7
|
|
40
|
+
"#8865C3", # 8
|
|
41
|
+
"#FC506F", # 9
|
|
42
|
+
"#76E8C3", # 10
|
|
43
|
+
"#7E002E", # 11
|
|
44
|
+
"#2A5200", # 12
|
|
45
|
+
"#FF7FCA", # 13
|
|
46
|
+
"#813E73", # 14
|
|
47
|
+
"#FF878F", # 15
|
|
48
|
+
"#22DEE6", # 16
|
|
49
|
+
"#C94E3C", # 17
|
|
50
|
+
"#836E20", # 18
|
|
51
|
+
"#9F9638", # 19
|
|
52
|
+
"#B149A1", # 20
|
|
53
|
+
"#804D11", # 21
|
|
54
|
+
"#812F19" # 22
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
BATCH_COLORS = ["#d9b85c",
|
|
59
|
+
"#46ca79",
|
|
60
|
+
"#bd4eaa",
|
|
61
|
+
"#7db94c",
|
|
62
|
+
"#572f83",
|
|
63
|
+
"#b6b638",
|
|
64
|
+
"#606ed6",
|
|
65
|
+
"#808026",
|
|
66
|
+
"#de6259",
|
|
67
|
+
"#3c6f24",
|
|
68
|
+
"#cc4282",
|
|
69
|
+
"#4da260",
|
|
70
|
+
"#852a67",
|
|
71
|
+
"#ae75d7",
|
|
72
|
+
"#5558a1",
|
|
73
|
+
"#cc8c33",
|
|
74
|
+
"#669ce4",
|
|
75
|
+
"#34d6e0",
|
|
76
|
+
"#43c8ac",
|
|
77
|
+
"#aebc65",
|
|
78
|
+
"#cd4b37",
|
|
79
|
+
"#8c2343",
|
|
80
|
+
"#8c5d1b",
|
|
81
|
+
"#df88cc",
|
|
82
|
+
"#cc8b52",
|
|
83
|
+
"#d75f77",
|
|
84
|
+
"#873319",
|
|
85
|
+
"#dd7b67",
|
|
86
|
+
"#ca3d4e"
|
|
87
|
+
]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from .pl import (
|
|
2
|
+
plot_bar_by_batch,
|
|
3
|
+
plot_bar_by_group,
|
|
4
|
+
plot_bar,
|
|
5
|
+
plot_single_spatial_image,
|
|
6
|
+
plot_spatial_clusters,
|
|
7
|
+
plot_clusters_quality_violin_boxplot,
|
|
8
|
+
outlier_quality,
|
|
9
|
+
z_score_matrixplot,
|
|
10
|
+
boxplot_cluster_correlations
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
# Defina as funções que devem ser acessíveis a partir de 'plotting'
|
|
14
|
+
__all__ = [
|
|
15
|
+
'plot_bar_by_batch',
|
|
16
|
+
'plot_bar_by_group',
|
|
17
|
+
'plot_bar',
|
|
18
|
+
'plot_single_spatial_image',
|
|
19
|
+
'plot_spatial_clusters',
|
|
20
|
+
'plot_clusters_quality_violin_boxplot',
|
|
21
|
+
'outlier_quality',
|
|
22
|
+
'z_score_matrixplot',
|
|
23
|
+
'boxplot_cluster_correlations'
|
|
24
|
+
]
|