vitessce 3.7.7__tar.gz → 3.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {vitessce-3.7.7 → vitessce-3.8.1}/.coveragerc_omit +3 -1
  2. {vitessce-3.7.7 → vitessce-3.8.1}/PKG-INFO +1 -1
  3. {vitessce-3.7.7 → vitessce-3.8.1}/pyproject.toml +3 -1
  4. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/data_utils/__init__.py +11 -0
  5. vitessce-3.8.1/src/vitessce/data_utils/spatialdata_points_zorder.py +514 -0
  6. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/file_def_utils.py +5 -1
  7. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/widget.py +5 -5
  8. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/wrappers.py +16 -2
  9. vitessce-3.8.1/tests/create_xenium_filtered_points.py +55 -0
  10. vitessce-3.8.1/tests/test_sdata_points_zorder.py +183 -0
  11. {vitessce-3.7.7 → vitessce-3.8.1}/tests/test_wrappers.py +4 -1
  12. {vitessce-3.7.7 → vitessce-3.8.1}/uv.lock +5 -1
  13. {vitessce-3.7.7 → vitessce-3.8.1}/.coveragerc_real +0 -0
  14. {vitessce-3.7.7 → vitessce-3.8.1}/.devcontainer/devcontainer.json +0 -0
  15. {vitessce-3.7.7 → vitessce-3.8.1}/.gitignore +0 -0
  16. {vitessce-3.7.7 → vitessce-3.8.1}/.python-version +0 -0
  17. {vitessce-3.7.7 → vitessce-3.8.1}/CHANGELOG.md +0 -0
  18. {vitessce-3.7.7 → vitessce-3.8.1}/LICENSE +0 -0
  19. {vitessce-3.7.7 → vitessce-3.8.1}/MANIFEST.in +0 -0
  20. {vitessce-3.7.7 → vitessce-3.8.1}/Makefile +0 -0
  21. {vitessce-3.7.7 → vitessce-3.8.1}/README.md +0 -0
  22. {vitessce-3.7.7 → vitessce-3.8.1}/setup.cfg +0 -0
  23. {vitessce-3.7.7 → vitessce-3.8.1}/setup.py +0 -0
  24. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/__init__.py +0 -0
  25. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/config.py +0 -0
  26. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/config_converter.py +0 -0
  27. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/constants.py +0 -0
  28. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/data_utils/anndata.py +0 -0
  29. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/data_utils/entities.py +0 -0
  30. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/data_utils/multivec.py +0 -0
  31. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/data_utils/ome.py +0 -0
  32. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/export.py +0 -0
  33. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/repr.py +0 -0
  34. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/responses.py +0 -0
  35. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/routes.py +0 -0
  36. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/utils.py +0 -0
  37. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/widget_plugins/__init__.py +0 -0
  38. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/widget_plugins/demo_plugin.py +0 -0
  39. {vitessce-3.7.7 → vitessce-3.8.1}/src/vitessce/widget_plugins/spatial_query.py +0 -0
  40. {vitessce-3.7.7 → vitessce-3.8.1}/tests/__init__.py +0 -0
  41. {vitessce-3.7.7 → vitessce-3.8.1}/tests/create_test_data.py +0 -0
  42. {vitessce-3.7.7 → vitessce-3.8.1}/tests/data/test.ome.tif +0 -0
  43. {vitessce-3.7.7 → vitessce-3.8.1}/tests/test_anndata_utils.py +0 -0
  44. {vitessce-3.7.7 → vitessce-3.8.1}/tests/test_config.py +0 -0
  45. {vitessce-3.7.7 → vitessce-3.8.1}/tests/test_config_converter.py +0 -0
  46. {vitessce-3.7.7 → vitessce-3.8.1}/tests/test_config_updates.py +0 -0
  47. {vitessce-3.7.7 → vitessce-3.8.1}/tests/test_entities.py +0 -0
  48. {vitessce-3.7.7 → vitessce-3.8.1}/tests/test_ome_utils.py +0 -0
  49. {vitessce-3.7.7 → vitessce-3.8.1}/tests-widget/example.spec.js +0 -0
  50. {vitessce-3.7.7 → vitessce-3.8.1}/tests-widget/package.json +0 -0
  51. {vitessce-3.7.7 → vitessce-3.8.1}/tests-widget/playwright.config.js +0 -0
  52. {vitessce-3.7.7 → vitessce-3.8.1}/tests-widget/pnpm-lock.yaml +0 -0
@@ -12,5 +12,7 @@ omit =
12
12
  src/vitessce/data_utils/ome.py
13
13
  src/vitessce/data_utils/entities.py
14
14
  src/vitessce/data_utils/multivec.py
15
+ src/vitessce/data_utils/spatialdata_points_zorder.py
15
16
  src/vitessce/widget_plugins/demo_plugin.py
16
- src/vitessce/widget_plugins/spatial_query.py
17
+ src/vitessce/widget_plugins/spatial_query.py
18
+ tests/*.py
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vitessce
3
- Version: 3.7.7
3
+ Version: 3.8.1
4
4
  Summary: Jupyter widget facilitating interactive visualization of spatial single-cell data with Vitessce
5
5
  Project-URL: repository, https://github.com/vitessce/vitessce-python
6
6
  Author-email: Mark Keller <mark_keller@hms.harvard.edu>
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "vitessce"
7
- version = "3.7.7"
7
+ version = "3.8.1"
8
8
  authors = [
9
9
  { name="Mark Keller", email="mark_keller@hms.harvard.edu" },
10
10
  ]
@@ -114,6 +114,8 @@ dev = [
114
114
  'boto3>=1.16.30',
115
115
  'scikit-misc>=0.1.3',
116
116
  'autopep8>=2.0.2',
117
+ 'spatialdata>=0.3.0',
118
+ 'dask[dataframe]==2024.11.1',
117
119
  ]
118
120
 
119
121
  [tool.uv]
@@ -17,3 +17,14 @@ from .ome import (
17
17
  from .multivec import (
18
18
  adata_to_multivec_zarr,
19
19
  )
20
+ from .spatialdata_points_zorder import (
21
+ # Function for computing codes and sorting
22
+ sdata_morton_sort_points,
23
+ # Other helper functions
24
+ sdata_points_process_columns,
25
+ sdata_points_write_bounding_box_attrs,
26
+ sdata_points_modify_row_group_size,
27
+ # Functions for querying
28
+ sdata_morton_query_rect,
29
+ row_ranges_to_row_indices,
30
+ )
@@ -0,0 +1,514 @@
1
+ from typing import Tuple, List, Optional
2
+
3
+ import os
4
+ from os.path import join
5
+ from bisect import bisect_left, bisect_right
6
+ import pandas as pd
7
+ import numpy as np
8
+
9
+
10
+ from spatialdata import get_element_annotators
11
+ import dask.dataframe as dd
12
+ import zarr
13
+
14
+
15
+ MORTON_CODE_NUM_BITS = 32 # Resulting morton codes will be stored as uint32.
16
+ MORTON_CODE_VALUE_MIN = 0
17
+ MORTON_CODE_VALUE_MAX = 2**(MORTON_CODE_NUM_BITS / 2) - 1
18
+
19
+ # --------------------------
20
+ # Functions for computing Morton codes for SpatialData points (2D).
21
+ # --------------------------
22
+
23
+
24
+ def norm_series_to_uint(series, v_min, v_max):
25
+ """
26
+ Scale numeric Series (int or float) to integer grid [0, 2^bits-1], handling NaNs.
27
+ """
28
+ # Cast to float64
29
+ series_f64 = series.astype("float64")
30
+ # Normalize the array values to be between 0.0 and 1.0
31
+ norm_series_f64 = (series_f64 - v_min) / (v_max - v_min)
32
+ # Clip to ensure no values are outside 0/1 range
33
+ clipped_norm_series_f64 = np.clip(norm_series_f64, 0.0, 1.0)
34
+ # Multiply by the morton code max-value to scale from [0,1] to [0,65535]
35
+ out = (clipped_norm_series_f64 * MORTON_CODE_VALUE_MAX).astype(np.uint32)
36
+ # Set NaNs to 0.
37
+ out = out.fillna(0)
38
+ return out
39
+
40
+
41
+ def norm_ddf_to_uint(ddf):
42
+ [x_min, x_max, y_min, y_max] = [ddf["x"].min().compute(), ddf["x"].max().compute(), ddf["y"].min().compute(), ddf["y"].max().compute()]
43
+ ddf["x_uint"] = norm_series_to_uint(ddf["x"], x_min, x_max)
44
+ ddf["y_uint"] = norm_series_to_uint(ddf["y"], y_min, y_max)
45
+
46
+ # Insert the bounding box as metadata for the sdata.points[element] Points element dataframe.
47
+ # TODO: does anything special need to be done to ensure this is saved to disk?
48
+ ddf.attrs["bounding_box"] = {
49
+ "x_min": float(x_min),
50
+ "x_max": float(x_max),
51
+ "y_min": float(y_min),
52
+ "y_max": float(y_max),
53
+ }
54
+
55
+ return ddf
56
+
57
+
58
+ def _part1by1_16(x):
59
+ """
60
+ Spread each 16-bit value into 32 bits by inserting zeros between bits.
61
+ Input: uint32 array (values must fit in 16 bits)
62
+ Output: uint32 array (bit-spread)
63
+ """
64
+
65
+ assert x.dtype.name == 'uint32'
66
+
67
+ # Mask away any bits above 16 (just in case input wasn't clean).
68
+ x = x & np.uint32(0x0000FFFF)
69
+
70
+ # First spread: shift left by 8 bits, OR with original, then mask.
71
+ # After this, groups of 8 bits are separated by 8 zeros.
72
+ x = (x | np.left_shift(x, 8)) & np.uint32(0x00FF00FF)
73
+
74
+ # Spread further: now groups of 4 bits separated by 4 zeros.
75
+ x = (x | np.left_shift(x, 4)) & np.uint32(0x0F0F0F0F)
76
+
77
+ # Spread further: groups of 2 bits separated by 2 zeros.
78
+ x = (x | np.left_shift(x, 2)) & np.uint32(0x33333333)
79
+
80
+ # Final spread: single bits separated by a zero bit.
81
+ # Now each original bit is in every other position (positions 0,2,4,...).
82
+ x = (x | np.left_shift(x, 1)) & np.uint32(0x55555555)
83
+
84
+ return x
85
+
86
+
87
+ def _part1by1_32(x):
88
+ """
89
+ Spread each 32-bit value into 64 bits by inserting zeros between bits.
90
+ Input: uint64 array (values must fit in 32 bits)
91
+ Output: uint64 array (bit-spread)
92
+ """
93
+
94
+ assert x.dtype.name == 'uint64'
95
+
96
+ # Mask away any bits above 32 (safety).
97
+ x = x.astype(np.uint64) & np.uint64(0x00000000FFFFFFFF)
98
+
99
+ # First spread: separate into 16-bit chunks spaced out.
100
+ x = (x | np.left_shift(x, 16)) & np.uint64(0x0000FFFF0000FFFF)
101
+
102
+ # Spread further: each 8-bit chunk separated.
103
+ x = (x | np.left_shift(x, 8)) & np.uint64(0x00FF00FF00FF00FF)
104
+
105
+ # Spread further: each 4-bit nibble separated.
106
+ x = (x | np.left_shift(x, 4)) & np.uint64(0x0F0F0F0F0F0F0F0F)
107
+
108
+ # Spread further: 2-bit groups separated.
109
+ x = (x | np.left_shift(x, 2)) & np.uint64(0x3333333333333333)
110
+
111
+ # Final spread: single bits separated by zeros.
112
+ # Now each original bit occupies every other position (0,2,4,...).
113
+ x = (x | np.left_shift(x, 1)) & np.uint64(0x5555555555555555)
114
+
115
+ return x
116
+
117
+
118
+ def morton_interleave(ddf):
119
+ """
120
+ Vectorized Morton interleave for integer arrays xi, yi
121
+ already scaled to [0, 2^bits - 1].
122
+ Returns Morton codes as uint32 (if bits<=16) or uint64 (if bits<=32).
123
+ """
124
+
125
+ xi = ddf["x_uint"]
126
+ yi = ddf["y_uint"]
127
+
128
+ # Spread x and y bits into even (x) and odd (y) positions.
129
+ xs = _part1by1_16(xi)
130
+ ys = _part1by1_16(yi)
131
+
132
+ # Interleave: shift y bits left by 1 so they go into odd positions,
133
+ # then OR with x bits in even positions.
134
+ code = np.left_shift(ys.astype(np.uint64), 1) | xs.astype(np.uint64)
135
+
136
+ # Fits in 32 bits since we only had 16+16 input bits.
137
+ return code.astype(np.uint32)
138
+
139
+
140
+ def sdata_morton_sort_points(sdata, element):
141
+ ddf = sdata.points[element]
142
+
143
+ # Compute morton codes
144
+ ddf = norm_ddf_to_uint(ddf)
145
+ ddf["morton_code_2d"] = morton_interleave(ddf)
146
+
147
+ if "z" in ddf.columns:
148
+ num_unique_z = ddf["z"].unique().shape[0].compute()
149
+ if num_unique_z < 100:
150
+ # Heuristic for interpreting the 3D data as 2.5D
151
+ # Reference: https://github.com/scverse/spatialdata/issues/961
152
+ sorted_ddf = ddf.sort_values(by=["z", "morton_code_2d"], ascending=True)
153
+ else:
154
+ # TODO: include z as a dimension in the morton code in the 3D case?
155
+
156
+ # For now, just return the data sorted by 2D code.
157
+ sorted_ddf = ddf.sort_values(by="morton_code_2d", ascending=True)
158
+ else:
159
+ sorted_ddf = ddf.sort_values(by="morton_code_2d", ascending=True)
160
+ sdata.points[element] = sorted_ddf
161
+
162
+ # annotating_tables = get_element_annotators(sdata, element)
163
+
164
+ # TODO: Sort any annotating table(s) as well.
165
+
166
+ return sdata
167
+
168
+
169
+ def sdata_morton_query_rect_aux(sdata, element, orig_rect):
170
+ # orig_rect = [[50, 50], [100, 150]] # [[x0, y0], [x1, y1]]
171
+ # norm_rect = [
172
+ # orig_coord_to_norm_coord(orig_rect[0], orig_x_min=0, orig_x_max=100, orig_y_min=0, orig_y_max=200),
173
+ # orig_coord_to_norm_coord(orig_rect[1], orig_x_min=0, orig_x_max=100, orig_y_min=0, orig_y_max=200)
174
+ # ]
175
+
176
+ sorted_ddf = sdata.points[element]
177
+
178
+ # TODO: fail if no morton_code_2d column
179
+ # TODO: fail if not sorted as expected
180
+ # TODO: fail if no bounding box metadata
181
+
182
+ bounding_box = sorted_ddf.attrs["bounding_box"]
183
+ x_min = bounding_box["x_min"]
184
+ x_max = bounding_box["x_max"]
185
+ y_min = bounding_box["y_min"]
186
+ y_max = bounding_box["y_max"]
187
+
188
+ norm_rect = [
189
+ orig_coord_to_norm_coord(orig_rect[0], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max),
190
+ orig_coord_to_norm_coord(orig_rect[1], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max)
191
+ ]
192
+
193
+ # Get a list of morton code intervals that cover this rectangle region
194
+ # [ (morton_start, morton_end), ... ]
195
+ morton_intervals = zcover_rectangle(
196
+ rx0=norm_rect[0][0], ry0=norm_rect[0][1],
197
+ rx1=norm_rect[1][0], ry1=norm_rect[1][1],
198
+ bits=16,
199
+ stop_level=None,
200
+ merge=True,
201
+ )
202
+
203
+ return morton_intervals
204
+
205
+
206
+ def sdata_morton_query_rect(sdata, element, orig_rect):
207
+ sorted_ddf = sdata.points[element]
208
+
209
+ # TODO: generalize to 3D morton codes
210
+
211
+ morton_intervals = sdata_morton_query_rect_aux(sdata, element, orig_rect)
212
+
213
+ # Get morton code column as a list of integers
214
+ morton_sorted = sorted_ddf["morton_code_2d"].compute().values.tolist()
215
+
216
+ # Get a list of row ranges that match the morton intervals.
217
+ # (This uses binary searches internally to find the matching row indices).
218
+ # [ (row_start, row_end), ... ]
219
+ matching_row_ranges = zquery_rows(morton_sorted, morton_intervals, merge=True)
220
+
221
+ return matching_row_ranges
222
+
223
+
224
+ def sdata_morton_query_rect_debug(sdata, element, orig_rect):
225
+ # This is the same as the above sdata_morton_query_rect function,
226
+ # but it also returns the list of row indices that were checked
227
+ # during the binary searches.
228
+ sorted_ddf = sdata.points[element]
229
+ morton_intervals = sdata_morton_query_rect_aux(sdata, element, orig_rect)
230
+ morton_sorted = sorted_ddf["morton_code_2d"].compute().values.tolist()
231
+ matching_row_ranges, rows_checked = zquery_rows_aux(morton_sorted, morton_intervals, merge=True)
232
+ return matching_row_ranges, rows_checked
233
+
234
+ # --------------------------
235
+ # Functions for rectangle queries.
236
+ # --------------------------
237
+
238
+ # Convert a coordinate from the normalized [0, 65535] space to the original space.
239
+
240
+
241
+ def norm_coord_to_orig_coord(norm_coord, orig_x_min, orig_x_max, orig_y_min, orig_y_max):
242
+ [norm_x, norm_y] = norm_coord
243
+ orig_x_range = orig_x_max - orig_x_min
244
+ orig_y_range = orig_y_max - orig_y_min
245
+ return [
246
+ (orig_x_min + (norm_x / MORTON_CODE_VALUE_MAX) * orig_x_range),
247
+ (orig_y_min + (norm_y / MORTON_CODE_VALUE_MAX) * orig_y_range),
248
+ ]
249
+
250
+ # Convert a coordinate from the original space to the [0, 65535] normalized space.
251
+
252
+
253
+ def orig_coord_to_norm_coord(orig_coord, orig_x_min, orig_x_max, orig_y_min, orig_y_max):
254
+ [orig_x, orig_y] = orig_coord
255
+ orig_x_range = orig_x_max - orig_x_min
256
+ orig_y_range = orig_y_max - orig_y_min
257
+ return [
258
+ np.float64(((orig_x - orig_x_min) / orig_x_range) * MORTON_CODE_VALUE_MAX).astype(np.uint32),
259
+ np.float64(((orig_y - orig_y_min) / orig_y_range) * MORTON_CODE_VALUE_MAX).astype(np.uint32),
260
+ ]
261
+
262
+ # --------------------------
263
+ # Quadtree / Z-interval helpers
264
+ # --------------------------
265
+
266
+
267
+ def intersects(ax0: int, ay0: int, ax1: int, ay1: int,
268
+ bx0: int, by0: int, bx1: int, by1: int) -> bool:
269
+ """Axis-aligned box intersection (inclusive integer bounds)."""
270
+ return not (ax1 < bx0 or bx1 < ax0 or ay1 < by0 or by1 < ay0)
271
+
272
+
273
+ def contained(ix0: int, iy0: int, ix1: int, iy1: int,
274
+ ox0: int, oy0: int, ox1: int, oy1: int) -> bool:
275
+ """Is inner box entirely inside outer box? (inclusive integer bounds)"""
276
+ return (ox0 <= ix0 <= ix1 <= ox1) and (oy0 <= iy0 <= iy1 <= oy1)
277
+
278
+
279
+ def point_inside(x: int, y: int, rx0: int, ry0: int, rx1: int, ry1: int) -> bool:
280
+ return (rx0 <= x <= rx1) and (ry0 <= y <= ry1)
281
+
282
+
283
+ def cell_range(prefix: int, level: int, bits: int) -> Tuple[int, int]:
284
+ """
285
+ All Morton codes in a quadtree cell share the same prefix (2*level bits).
286
+ Fill the remaining lower bits with 0s (lo) or 1s (hi).
287
+ """
288
+ shift = 2 * (bits - level)
289
+ lo = prefix << shift
290
+ hi = ((prefix + 1) << shift) - 1
291
+ return lo, hi
292
+
293
+
294
+ def merge_adjacent(intervals: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
295
+ """Merge overlapping or directly adjacent intervals."""
296
+ if not intervals:
297
+ return []
298
+ intervals.sort(key=lambda t: t[0])
299
+ merged = [intervals[0]]
300
+ for lo, hi in intervals[1:]:
301
+ mlo, mhi = merged[-1]
302
+ if lo <= mhi + 1:
303
+ merged[-1] = (mlo, max(mhi, hi))
304
+ else:
305
+ merged.append((lo, hi))
306
+ return merged
307
+
308
+ # --------------------------
309
+ # Rectangle -> list of Morton intervals
310
+ # --------------------------
311
+
312
+
313
+ def zcover_rectangle(rx0: int, ry0: int, rx1: int, ry1: int, bits: int, stop_level: Optional[int] = None, merge: bool = True) -> List[Tuple[int, int]]:
314
+ """
315
+ Compute a (near-)minimal set of Morton code ranges covering the rectangle
316
+ [rx0..rx1] x [ry0..ry1] on an integer grid [0..2^bits-1]^2.
317
+
318
+ - If stop_level is None: exact cover (descend to exact containment).
319
+ - If stop_level is set (0..bits): stop descending at that level, adding
320
+ partially-overlapping cells as whole ranges (superset cover).
321
+ """
322
+ if not (0 <= rx0 <= rx1 <= (1 << bits) - 1 and 0 <= ry0 <= ry1 <= (1 << bits) - 1):
323
+ raise ValueError("Rectangle out of bounds for given bits.")
324
+
325
+ intervals: List[Tuple[int, int]] = []
326
+
327
+ # stack entries: (prefix, level, xmin, ymin, xmax, ymax)
328
+ stack = [(0, 0, 0, 0, (1 << bits) - 1, (1 << bits) - 1)]
329
+
330
+ while stack:
331
+ prefix, level, xmin, ymin, xmax, ymax = stack.pop()
332
+
333
+ if not intersects(xmin, ymin, xmax, ymax, rx0, ry0, rx1, ry1):
334
+ continue
335
+
336
+ # If we stop at this level for a loose cover, add full cell range.
337
+ if stop_level is not None and level == stop_level:
338
+ intervals.append(cell_range(prefix, level, bits))
339
+ continue
340
+
341
+ # Fully contained: add full cell range.
342
+ if contained(xmin, ymin, xmax, ymax, rx0, ry0, rx1, ry1):
343
+ intervals.append(cell_range(prefix, level, bits))
344
+ continue
345
+
346
+ # Leaf cell: single lattice point (only happens when level==bits)
347
+ if level == bits:
348
+ if point_inside(xmin, ymin, rx0, ry0, rx1, ry1):
349
+ intervals.append(cell_range(prefix, level, bits))
350
+ continue
351
+
352
+ # Otherwise, split into 4 children (Morton order: 00,01,10,11)
353
+ midx = (xmin + xmax) // 2
354
+ midy = (ymin + ymax) // 2
355
+
356
+ # q0: (x<=midx, y<=midy) -> child code 0b00
357
+ stack.append(((prefix << 2) | 0,
358
+ level + 1,
359
+ xmin, ymin, midx, midy))
360
+ # q1: (x>midx, y<=midy) -> child code 0b01
361
+ stack.append(((prefix << 2) | 1,
362
+ level + 1,
363
+ midx + 1, ymin, xmax, midy))
364
+ # q2: (x<=midx, y>midy) -> child code 0b10
365
+ stack.append(((prefix << 2) | 2,
366
+ level + 1,
367
+ xmin, midy + 1, midx, ymax))
368
+ # q3: (x>midx, y>midy) -> child code 0b11
369
+ stack.append(((prefix << 2) | 3,
370
+ level + 1,
371
+ midx + 1, midy + 1, xmax, ymax))
372
+
373
+ return merge_adjacent(intervals) if merge else intervals
374
+
375
+
376
+ # --------------------------
377
+ # Morton intervals -> row ranges in a Morton-sorted column
378
+ # --------------------------
379
+
380
+ def zquery_rows_aux(morton_sorted: List[int], intervals: List[Tuple[int, int]], merge: bool = True) -> Tuple[List[Tuple[int, int]], List[int]]:
381
+ """
382
+ For each Z-interval [zlo, zhi], binary-search in the sorted Morton column
383
+ and return row index half-open ranges [i, j) to scan.
384
+ """
385
+
386
+ # Keep track of which keys were looked at during the binary searches.
387
+ # This is used for analysis / debugging, for instance, to enable
388
+ # evaluating how many HTTP requests would be needed in network-based case
389
+ # (which will also depend on Arrow row group size).
390
+ recorded_keys = []
391
+
392
+ def record_key_check(k: int) -> int:
393
+ # TODO: Does recorded_keys need to be marked as a global here?
394
+ recorded_keys.append(k)
395
+ return k
396
+
397
+ ranges: List[Tuple[int, int]] = []
398
+ # TODO: can these multiple binary searches be optimized?
399
+ # Since we are doing many searches in the same array, and in each search we learn where more elements are located.
400
+ for zlo, zhi in intervals:
401
+ i = bisect_left(morton_sorted, zlo, key=record_key_check)
402
+ # TODO: use lo=i in bisect_right to limit the search range?
403
+ # TODO: can the second binary search be further optimized since we just did a binary search via bisect_left?
404
+ j = bisect_right(morton_sorted, zhi, key=record_key_check)
405
+ if i < j:
406
+ ranges.append((i, j))
407
+
408
+ result = merge_adjacent(ranges) if merge else ranges
409
+ return result, recorded_keys
410
+
411
+
412
+ def zquery_rows(morton_sorted: List[int], intervals: List[Tuple[int, int]], merge: bool = True) -> List[Tuple[int, int]]:
413
+ """
414
+ For each Z-interval [zlo, zhi], binary-search in the sorted Morton column
415
+ and return row index half-open ranges [i, j) to scan.
416
+ """
417
+ return zquery_rows_aux(morton_sorted, intervals, merge=merge)[0]
418
+
419
+
420
+ def row_ranges_to_row_indices(intervals: List[Tuple[int, int]]) -> List[int]:
421
+ """
422
+ Convert row ranges [i, j) to a list of row indices.
423
+ Then, can index into pandas DataFrame using df.iloc[indices, :]
424
+ """
425
+ indices: List[int] = []
426
+ for i, j in intervals:
427
+ indices.extend(list(range(i, j)))
428
+ return indices
429
+
430
+
431
+ # More helper functions.
432
+ def sdata_points_process_columns(sdata, element, var_name_col=None, table_name=None) -> dd.DataFrame:
433
+ ddf = sdata.points[element]
434
+
435
+ if var_name_col is None:
436
+ # We can try to get it from the spatialdata_attrs metadata.
437
+ var_name_col = sdata.points[element].attrs["spatialdata_attrs"].get("feature_key")
438
+
439
+ # Appending codes for dictionary-encoded feature_name column.
440
+ if table_name is None and var_name_col is not None:
441
+ annotating_tables = get_element_annotators(sdata, element)
442
+ if len(annotating_tables) == 1:
443
+ table_name = annotating_tables[0]
444
+ elif len(annotating_tables) == 0:
445
+ raise ValueError(f"No annotating table found for Points element {element}, please specify table_name explicitly.")
446
+ else:
447
+ raise ValueError(f"Multiple annotating tables found for Points element {element}, please specify table_name explicitly.")
448
+
449
+ if var_name_col is not None:
450
+ var_df = sdata.tables[table_name].var
451
+ var_index = var_df.index.values.tolist()
452
+
453
+ def try_index(gene_name):
454
+ try:
455
+ return var_index.index(gene_name)
456
+ except BaseException:
457
+ return -1
458
+ ddf[f"{var_name_col}_codes"] = ddf[var_name_col].apply(try_index).astype('int32')
459
+
460
+ # Identify dictionary-encoded columns (categorical/string)
461
+ orig_columns = ddf.columns.tolist()
462
+ dict_encoded_cols = [col for col in orig_columns if pd.api.types.is_categorical_dtype(ddf[col].dtype) or pd.api.types.is_string_dtype(ddf[col].dtype)]
463
+
464
+ # Dictionary-encoded columns (i.e., categorical and string) must be stored as the rightmost columns of the dataframe.
465
+ ordered_columns = sorted(orig_columns, key=lambda colname: orig_columns.index(colname) if colname not in dict_encoded_cols else len(orig_columns))
466
+
467
+ # Reorder the columns of the dataframe
468
+ ddf = ddf[ordered_columns]
469
+
470
+ return ddf
471
+
472
+
473
+ def sdata_points_write_bounding_box_attrs(sdata, element) -> dd.DataFrame:
474
+ ddf = sdata.points[element]
475
+
476
+ [x_min, x_max, y_min, y_max] = [ddf["x"].min().compute(), ddf["x"].max().compute(), ddf["y"].min().compute(), ddf["y"].max().compute()]
477
+ bounding_box = {
478
+ "x_min": float(x_min),
479
+ "x_max": float(x_max),
480
+ "y_min": float(y_min),
481
+ "y_max": float(y_max),
482
+ }
483
+
484
+ sdata_path = sdata.path
485
+ # TODO: error if no path
486
+
487
+ # Insert the bounding box as metadata for the sdata.points[element] Points element dataframe.
488
+ z = zarr.open(sdata_path, mode='a')
489
+ group = z[f'points/{element}']
490
+ group.attrs['bounding_box'] = bounding_box
491
+
492
+ # TODO: does anything special need to be done to ensure this is saved to disk?
493
+
494
+
495
+ def sdata_points_modify_row_group_size(sdata, element, row_group_size: int = 50_000):
496
+ import pyarrow.parquet as pq
497
+
498
+ sdata_path = sdata.path
499
+ # TODO: error if no path
500
+
501
+ # List the parts of the parquet file.
502
+ parquet_path = join(sdata_path, "points", element, "points.parquet")
503
+
504
+ # Read the number of "part.*.parquet" files on disk.
505
+ part_files = [f for f in os.listdir(parquet_path) if f.startswith("part.") and f.endswith(".parquet")]
506
+ num_parts = len(part_files)
507
+
508
+ # Update the row group size in each .parquet file part.
509
+ for i in range(num_parts):
510
+ part_path = join(parquet_path, f"part.{i}.parquet")
511
+ table_read = pq.read_table(part_path)
512
+
513
+ # Write the table to a new Parquet file with the desired row group size.
514
+ pq.write_table(table_read, part_path, row_group_size=row_group_size)
@@ -147,7 +147,7 @@ def gen_sdata_obs_segmentations_schema(options, path: str, table_path: str = "ta
147
147
  return options
148
148
 
149
149
 
150
- def gen_sdata_obs_points_schema(options, path: str, table_path: str = "tables/table", coordinate_system: Optional[str] = None) -> dict:
150
+ def gen_sdata_obs_points_schema(options, path: str, table_path: str = "tables/table", coordinate_system: Optional[str] = None, feature_index_column: Optional[str] = None, morton_code_column: Optional[str] = None) -> dict:
151
151
  if path is not None:
152
152
  options["obsPoints"] = {
153
153
  "path": path
@@ -156,6 +156,10 @@ def gen_sdata_obs_points_schema(options, path: str, table_path: str = "tables/ta
156
156
  options["obsPoints"]['tablePath'] = table_path
157
157
  if coordinate_system is not None:
158
158
  options["obsPoints"]['coordinateSystem'] = coordinate_system
159
+ if feature_index_column is not None:
160
+ options["obsPoints"]['featureIndexColumn'] = feature_index_column
161
+ if morton_code_column is not None:
162
+ options["obsPoints"]['mortonCodeColumn'] = morton_code_column
159
163
  return options
160
164
 
161
165
 
@@ -181,7 +181,7 @@ const fallbackImportMap = {
181
181
  "react-dom": "https://cdn.vitessce.io/react-dom@18.2.0/index.js",
182
182
  "react-dom/client": "https://cdn.vitessce.io/react-dom@18.2.0/es2022/client.mjs",
183
183
  // Replaced with version-specific URL below.
184
- "vitessce": "https://cdn.vitessce.io/vitessce@3.8.5/dist/index.min.js",
184
+ "vitessce": "https://cdn.vitessce.io/vitessce@VERSION/dist/index.min.js",
185
185
  },
186
186
  };
187
187
  /*
@@ -191,7 +191,7 @@ const fallbackDevImportMap = {
191
191
  "react-dom": "https://cdn.vitessce.io/react-dom@18.2.0/index_dev.js",
192
192
  "react-dom/client": "https://cdn.vitessce.io/react-dom@18.2.0/es2022/client.development.mjs",
193
193
  // Replaced with version-specific URL below.
194
- "vitessce": "https://cdn.vitessce.io/@vitessce/dev@3.8.5/dist/index.js",
194
+ "vitessce": "https://cdn.vitessce.io/@vitessce/dev@VERSION/dist/index.js",
195
195
  },
196
196
  };
197
197
  */
@@ -729,7 +729,7 @@ class VitessceWidget(anywidget.AnyWidget):
729
729
 
730
730
  next_port = DEFAULT_PORT
731
731
 
732
- js_package_version = Unicode('3.8.5').tag(sync=True)
732
+ js_package_version = Unicode('3.9.2').tag(sync=True)
733
733
  js_dev_mode = Bool(False).tag(sync=True)
734
734
  custom_js_url = Unicode('').tag(sync=True)
735
735
  plugin_esm = List(trait=Unicode(''), default_value=[]).tag(sync=True)
@@ -742,7 +742,7 @@ class VitessceWidget(anywidget.AnyWidget):
742
742
 
743
743
  store_urls = List(trait=Unicode(''), default_value=[]).tag(sync=True)
744
744
 
745
- def __init__(self, config, height=600, theme='auto', uid=None, port=None, proxy=False, js_package_version='3.8.5', js_dev_mode=False, custom_js_url='', plugins=None, remount_on_uid_change=True, prefer_local=True, invoke_timeout=300000, invoke_batched=True, page_mode=False, page_esm=None, prevent_scroll=True, server_host=None):
745
+ def __init__(self, config, height=600, theme='auto', uid=None, port=None, proxy=False, js_package_version='3.9.2', js_dev_mode=False, custom_js_url='', plugins=None, remount_on_uid_change=True, prefer_local=True, invoke_timeout=300000, invoke_batched=True, page_mode=False, page_esm=None, prevent_scroll=True, server_host=None):
746
746
  """
747
747
  Construct a new Vitessce widget. Not intended to be instantiated directly; instead, use ``VitessceConfig.widget``.
748
748
 
@@ -876,7 +876,7 @@ class VitessceWidget(anywidget.AnyWidget):
876
876
  # Launch Vitessce using plain HTML representation (no ipywidgets)
877
877
 
878
878
 
879
- def ipython_display(config, height=600, theme='auto', base_url=None, host_name=None, uid=None, port=None, proxy=False, js_package_version='3.8.5', js_dev_mode=False, custom_js_url='', plugins=None, remount_on_uid_change=True, page_mode=False, page_esm=None, server_host=None):
879
+ def ipython_display(config, height=600, theme='auto', base_url=None, host_name=None, uid=None, port=None, proxy=False, js_package_version='3.9.2', js_dev_mode=False, custom_js_url='', plugins=None, remount_on_uid_change=True, page_mode=False, page_esm=None, server_host=None):
880
880
  from IPython.display import display, HTML
881
881
  uid_str = "vitessce" + get_uid_str(uid)
882
882
 
@@ -1407,7 +1407,7 @@ SpatialDataWrapperType = TypeVar('SpatialDataWrapperType', bound='SpatialDataWra
1407
1407
 
1408
1408
  class SpatialDataWrapper(AnnDataWrapper):
1409
1409
 
1410
- def __init__(self, sdata_path: Optional[str] = None, sdata_url: Optional[str] = None, sdata_store: Optional[Union[str, zarr.storage.StoreLike]] = None, sdata_artifact: Optional[ln.Artifact] = None, image_path: Optional[str] = None, region: Optional[str] = None, coordinate_system: Optional[str] = None, obs_spots_path: Optional[str] = None, obs_segmentations_path: Optional[str] = None, obs_points_path: Optional[str] = None, table_path: str = "tables/table", is_zip=None, coordination_values=None, **kwargs):
1410
+ def __init__(self, sdata_path: Optional[str] = None, sdata_url: Optional[str] = None, sdata_store: Optional[Union[str, zarr.storage.StoreLike]] = None, sdata_artifact: Optional[ln.Artifact] = None, image_path: Optional[str] = None, region: Optional[str] = None, coordinate_system: Optional[str] = None, obs_spots_path: Optional[str] = None, obs_segmentations_path: Optional[str] = None, obs_points_path: Optional[str] = None, obs_points_feature_index_column: Optional[str] = None, obs_points_morton_code_column: Optional[str] = None, table_path: str = "tables/table", is_zip=None, coordination_values=None, **kwargs):
1411
1411
  """
1412
1412
  Wrap a SpatialData object.
1413
1413
 
@@ -1432,6 +1432,14 @@ class SpatialDataWrapper(AnnDataWrapper):
1432
1432
  :type obs_segmentations_path: Optional[str]
1433
1433
  :param obs_points_path: Path to a points element, by default None
1434
1434
  :type obs_points_path: Optional[str]
1435
+ :param obs_points_feature_index_column: Column in the points dataframe that contains a feature index value (i.e., index into table.var.index to specify a gene) for each point, by default None
1436
+ :type obs_points_feature_index_column: Optional[str]
1437
+ :param obs_points_morton_code_column: Column in the points dataframe that contains a morton code for each point, by default None
1438
+ :type obs_points_morton_code_column: Optional[str]
1439
+ :param str feature_labels_path: Path to a table var column containing feature labels (e.g., alternate gene symbols), instead of the default index column of the `var` dataframe.
1440
+ :param list[str] obs_embedding_paths: Column names like `['obsm/X_umap', 'obsm/X_pca']` for showing scatterplots
1441
+ :param list[str] obs_embedding_names: Overriding names like `['UMAP', 'PCA']` for displaying above scatterplots
1442
+ :param list[str] obs_embedding_dims: Dimensions along which to get data for the scatterplot, like `[[0, 1], [4, 5]]` where `[0, 1]` is just the normal x and y but `[4, 5]` could be comparing the third and fourth principal components, for example.
1435
1443
  """
1436
1444
  raise_error_if_zero_or_more_than_one([
1437
1445
  sdata_path,
@@ -1462,6 +1470,8 @@ class SpatialDataWrapper(AnnDataWrapper):
1462
1470
  self._obs_spots_path = obs_spots_path
1463
1471
  self._obs_segmentations_path = obs_segmentations_path
1464
1472
  self._obs_points_path = obs_points_path
1473
+ self._obs_points_feature_index_column = obs_points_feature_index_column
1474
+ self._obs_points_morton_code_column = obs_points_morton_code_column
1465
1475
  if self._adata_path is not None:
1466
1476
  self.zarr_folder = 'spatialdata.zarr'
1467
1477
  self.obs_type_label = None
@@ -1549,8 +1559,12 @@ class SpatialDataWrapper(AnnDataWrapper):
1549
1559
  options = gen_sdata_obs_spots_schema(options, self._obs_spots_path, self._table_path, self._region, self._coordinate_system)
1550
1560
  options = gen_sdata_image_schema(options, self._image_path, self._coordinate_system)
1551
1561
  options = gen_sdata_obs_segmentations_schema(options, self._obs_segmentations_path, self._table_path, self._coordinate_system)
1552
- options = gen_sdata_obs_points_schema(options, self._obs_points_path, self._table_path, self._coordinate_system)
1562
+ options = gen_sdata_obs_points_schema(
1563
+ options, self._obs_points_path, self._table_path, self._coordinate_system,
1564
+ self._obs_points_feature_index_column, self._obs_points_morton_code_column
1565
+ )
1553
1566
  options = gen_feature_labels_schema(self._feature_labels, options)
1567
+ options = gen_obs_embedding_schema(options, self._mappings_obsm, self._mappings_obsm_names, self._mappings_obsm_dims)
1554
1568
  if len(options.keys()) > 0:
1555
1569
  obj_file_def = {
1556
1570
  "fileType": ft.SPATIALDATA_ZARR_ZIP.value if self.is_zip else ft.SPATIALDATA_ZARR.value,
@@ -0,0 +1,55 @@
1
+ import os
2
+ from os.path import join, isfile, isdir
3
+ from urllib.request import urlretrieve
4
+ import zipfile
5
+ import shutil
6
+
7
+ # Used spatialdata==0.4.0 on October 30, 2025
8
+ from spatialdata import read_zarr, SpatialData
9
+
10
+
11
+ def create_xenium_filtered_points():
12
+ # 1. Download and extract the Xenium dataset if not already present
13
+ data_dir = "data"
14
+ zip_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr.zip")
15
+ spatialdata_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr")
16
+
17
+ if not isdir(spatialdata_filepath):
18
+ if not isfile(zip_filepath):
19
+ os.makedirs(data_dir, exist_ok=True)
20
+ # zip_url = 'https://s3.embl.de/spatialdata/spatialdata-sandbox/xenium_rep1_io.zip'
21
+ zip_url = 'https://s3.embl.de/spatialdata/spatialdata-sandbox/xenium_rep1_io_spatialdata_0.7.1.zip'
22
+ urlretrieve(zip_url, zip_filepath)
23
+ with zipfile.ZipFile(zip_filepath, "r") as zip_ref:
24
+ zip_ref.extractall(data_dir)
25
+ os.rename(join(data_dir, "data.zarr"), spatialdata_filepath)
26
+
27
+ # This Xenium dataset has an AnnData "raw" element.
28
+ # Reference: https://github.com/giovp/spatialdata-sandbox/issues/55
29
+ raw_dir = join(spatialdata_filepath, "tables", "table", "raw")
30
+ if isdir(raw_dir):
31
+ shutil.rmtree(raw_dir)
32
+
33
+ sdata = read_zarr(spatialdata_filepath)
34
+
35
+ ddf = sdata.points["transcripts"]
36
+
37
+ # 2. Define a function to take every 100th row from a partition
38
+
39
+ def select_every_200th(partition):
40
+ # Each 'partition' is a Pandas DataFrame
41
+ # .iloc[::100] is the efficient pandas way to get every 100th row
42
+ return partition.iloc[::200]
43
+
44
+ # 3. Apply this function to every partition in the Dask DataFrame
45
+ result = ddf.map_partitions(select_every_200th)
46
+
47
+ # 4. Compute the result to see it
48
+ filtered_ddf = result[["x", "y", "z", "feature_name", "cell_id"]]
49
+
50
+ small_sdata = SpatialData(points={"transcripts": filtered_ddf})
51
+
52
+ small_sdata.write("xenium_rep1_io.points_only.spatialdata.zarr", overwrite=True)
53
+
54
+ # Uncomment to run.
55
+ # create_xenium_filtered_points()
@@ -0,0 +1,183 @@
1
+ import pytest
2
+ from pathlib import Path
3
+ from spatialdata import read_zarr
4
+
5
+ from vitessce.data_utils.spatialdata_points_zorder import (
6
+ # Function for computing codes and sorting
7
+ sdata_morton_sort_points,
8
+ # Functions for querying
9
+ sdata_morton_query_rect_debug,
10
+ row_ranges_to_row_indices,
11
+ orig_coord_to_norm_coord,
12
+ )
13
+
14
+
15
+ def _is_sorted(arr):
16
+ return all(arr[i] <= arr[i + 1] for i in range(len(arr) - 1))
17
+
18
+
19
+ data_path = Path('tests/data')
20
+
21
+
22
+ @pytest.fixture
23
+ def sdata_with_points():
24
+ sdata = read_zarr(data_path / "xenium_rep1_io.points_only.spatialdata.zarr")
25
+ return sdata
26
+
27
+
28
+ def test_zorder_sorting(sdata_with_points):
29
+ sdata = sdata_with_points
30
+
31
+ sdata_morton_sort_points(sdata, "transcripts")
32
+
33
+ # Check that the morton codes are sorted
34
+ sorted_ddf = sdata.points["transcripts"]
35
+ morton_sorted = sorted_ddf["morton_code_2d"].compute().values.tolist()
36
+
37
+ assert _is_sorted(morton_sorted)
38
+
39
+
40
+ def test_zorder_query(sdata_with_points):
41
+ sdata = sdata_with_points
42
+
43
+ sdata_morton_sort_points(sdata, "transcripts")
44
+
45
+ # Query a rectangle that should return some points
46
+ orig_rect = [[50.0, 50.0], [100.0, 150.0]] # x0, y0, x1, y1
47
+ matching_row_ranges, rows_checked = sdata_morton_query_rect_debug(sdata, "transcripts", orig_rect)
48
+ rect_row_indices = row_ranges_to_row_indices(matching_row_ranges)
49
+
50
+ # Cannot use df.iloc on a dask dataframe, so convert it to pandas first
51
+ ddf = sdata.points["transcripts"]
52
+ df = ddf.compute()
53
+ df = df.reset_index(drop=True)
54
+ estimated_row_indices = df.iloc[rect_row_indices].index.tolist()
55
+
56
+ assert df.shape[0] == 213191
57
+
58
+ # Do the same query the "dumb" way, by checking all points
59
+
60
+ # We need an epsilon for the "dumb" query since the normalization
61
+ # introduces rounding issues. We can instead verify that a slightly
62
+ # smaller rectangle is fully contained in the morton code query
63
+ # estimated results.
64
+ EXACT_BOUNDARY_EPSILON = 1
65
+
66
+ in_rect = (
67
+ (df["x"] >= orig_rect[0][0] + EXACT_BOUNDARY_EPSILON)
68
+ & (df["x"] <= orig_rect[1][0] - EXACT_BOUNDARY_EPSILON)
69
+ & (df["y"] >= orig_rect[0][1] + EXACT_BOUNDARY_EPSILON)
70
+ & (df["y"] <= orig_rect[1][1] - EXACT_BOUNDARY_EPSILON)
71
+ )
72
+ dumb_df_subset = df.loc[in_rect]
73
+ # Get the row indices of the points in the rectangle
74
+ # (these are the indices in the original dataframe)
75
+ exact_row_indices = dumb_df_subset.index.tolist()
76
+
77
+ # Check that the estimated rows 100% contain the exact rows.
78
+ # A.issubset(B) checks that all elements of A are in B ("A is a subset of B").
79
+ assert set(exact_row_indices).issubset(set(estimated_row_indices))
80
+ assert len(exact_row_indices) == 4
81
+ assert len(estimated_row_indices) <= 4
82
+
83
+ # Check that the number of rows checked is less than the total number of points
84
+ assert len(rows_checked) <= 19858
85
+ assert len(matching_row_ranges) == 2 # Kind of an implementation detail.
86
+
87
+ # Do a second check, this time against x_uint/y_uint (the normalized coordinates)
88
+ # TODO: does this ensure that estimated == exact?
89
+
90
+ bounding_box = ddf.attrs["bounding_box"]
91
+ x_min = bounding_box["x_min"]
92
+ x_max = bounding_box["x_max"]
93
+ y_min = bounding_box["y_min"]
94
+ y_max = bounding_box["y_max"]
95
+ norm_rect = [
96
+ orig_coord_to_norm_coord(orig_rect[0], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max),
97
+ orig_coord_to_norm_coord(orig_rect[1], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max)
98
+ ]
99
+
100
+ in_rect_norm = (
101
+ (df["x_uint"] >= norm_rect[0][0])
102
+ & (df["x_uint"] <= norm_rect[1][0])
103
+ & (df["y_uint"] >= norm_rect[0][1])
104
+ & (df["y_uint"] <= norm_rect[1][1])
105
+ )
106
+ dumb_df_subset_norm = df.loc[in_rect_norm]
107
+ # Get the row indices of the points in the rectangle
108
+ # (these are the indices in the original dataframe)
109
+ exact_row_indices_norm = dumb_df_subset_norm.index.tolist()
110
+
111
+ # A.issubset(B)
112
+ # True if A is a subset of B and False otherwise.
113
+ assert set(exact_row_indices_norm).issubset(set(estimated_row_indices))
114
+
115
+ assert len(exact_row_indices_norm) == 4
116
+ assert len(estimated_row_indices) <= 4
117
+
118
+ # ========= Another query ==========
119
+ orig_rect = [[500.0, 500.0], [600.0, 600.0]] # x0, y0, x1, y1
120
+
121
+ # Query using z-order
122
+ matching_row_ranges, rows_checked = sdata_morton_query_rect_debug(sdata, "transcripts", orig_rect)
123
+ rect_row_indices = row_ranges_to_row_indices(matching_row_ranges)
124
+ estimated_row_indices = df.iloc[rect_row_indices].index.tolist()
125
+
126
+ # Do the same query the "dumb" way, by checking all points
127
+ in_rect = (
128
+ (df["x"] >= orig_rect[0][0] + EXACT_BOUNDARY_EPSILON)
129
+ & (df["x"] <= orig_rect[1][0] - EXACT_BOUNDARY_EPSILON)
130
+ & (df["y"] >= orig_rect[0][1] + EXACT_BOUNDARY_EPSILON)
131
+ & (df["y"] <= orig_rect[1][1] - EXACT_BOUNDARY_EPSILON)
132
+ )
133
+ dumb_df_subset = df.loc[in_rect]
134
+ # Get the row indices of the points in the rectangle
135
+ # (these are the indices in the original dataframe)
136
+ exact_row_indices = dumb_df_subset.index.tolist()
137
+
138
+ # Check that the estimated rows 100% contain the exact rows.
139
+ # A.issubset(B) checks that all elements of A are in B ("A is a subset of B").
140
+ assert set(exact_row_indices).issubset(set(estimated_row_indices))
141
+ assert len(exact_row_indices) == 85
142
+ assert len(estimated_row_indices) <= 95
143
+
144
+ # Check that the number of rows checked is less than the total number of points
145
+ assert len(rows_checked) <= 71675
146
+ assert len(matching_row_ranges) == 13 # Kind of an implementation detail.
147
+
148
+ # Do the same query the "dumb" way, by checking all points
149
+ in_rect = (
150
+ (df["x"] >= orig_rect[0][0] + EXACT_BOUNDARY_EPSILON)
151
+ & (df["x"] <= orig_rect[1][0] - EXACT_BOUNDARY_EPSILON)
152
+ & (df["y"] >= orig_rect[0][1] + EXACT_BOUNDARY_EPSILON)
153
+ & (df["y"] <= orig_rect[1][1] - EXACT_BOUNDARY_EPSILON)
154
+ )
155
+ dumb_df_subset = df.loc[in_rect]
156
+ # Get the row indices of the points in the rectangle
157
+ # (these are the indices in the original dataframe)
158
+ exact_row_indices = dumb_df_subset.index.tolist()
159
+
160
+ # Query 2: Do a second check, this time against x_uint/y_uint (the normalized coordinates)
161
+ norm_rect = [
162
+ orig_coord_to_norm_coord(orig_rect[0], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max),
163
+ orig_coord_to_norm_coord(orig_rect[1], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max)
164
+ ]
165
+
166
+ in_rect_norm = (
167
+ (df["x_uint"] >= norm_rect[0][0])
168
+ & (df["x_uint"] <= norm_rect[1][0])
169
+ & (df["y_uint"] >= norm_rect[0][1])
170
+ & (df["y_uint"] <= norm_rect[1][1])
171
+ )
172
+ dumb_df_subset_norm = df.loc[in_rect_norm]
173
+ # Get the row indices of the points in the rectangle
174
+ # (these are the indices in the original dataframe)
175
+ exact_row_indices_norm = dumb_df_subset_norm.index.tolist()
176
+
177
+ # A.issubset(B)
178
+ # True if A is a subset of B and False otherwise.
179
+ assert set(exact_row_indices_norm).issubset(set(estimated_row_indices))
180
+
181
+ # Check that the estimated rows contain all of the exact rows.
182
+ assert len(exact_row_indices_norm) == 91
183
+ assert len(estimated_row_indices) <= 95
@@ -481,7 +481,10 @@ class TestWrappers(unittest.TestCase):
481
481
  'obsSets': [{'name': 'Cell Type', 'path': 'obs/CellType'}],
482
482
  'tablePath': 'tables/table'
483
483
  },
484
- 'image': {'path': 'images/picture'}
484
+ 'image': {'path': 'images/picture'},
485
+ 'obsEmbedding': [
486
+ {'dims': [0, 1], 'embeddingType': 'UMAP', 'path': 'obsm/X_umap'},
487
+ ],
485
488
  }})
486
489
 
487
490
  def test_spatial_data_with_base_dir_2(self):
@@ -4496,7 +4496,7 @@ wheels = [
4496
4496
 
4497
4497
  [[package]]
4498
4498
  name = "vitessce"
4499
- version = "3.7.7"
4499
+ version = "3.8.1"
4500
4500
  source = { editable = "." }
4501
4501
  dependencies = [
4502
4502
  { name = "black" },
@@ -4559,12 +4559,14 @@ dev = [
4559
4559
  { name = "boto3" },
4560
4560
  { name = "build" },
4561
4561
  { name = "coverage" },
4562
+ { name = "dask", extra = ["dataframe"] },
4562
4563
  { name = "flake8" },
4563
4564
  { name = "jupyterlab" },
4564
4565
  { name = "loompy" },
4565
4566
  { name = "numba" },
4566
4567
  { name = "pytest" },
4567
4568
  { name = "scikit-misc" },
4569
+ { name = "spatialdata" },
4568
4570
  ]
4569
4571
 
4570
4572
  [package.metadata]
@@ -4620,6 +4622,7 @@ dev = [
4620
4622
  { name = "boto3", specifier = ">=1.16.30" },
4621
4623
  { name = "build", specifier = "==0.1.0" },
4622
4624
  { name = "coverage", specifier = ">=6.3.2" },
4625
+ { name = "dask", extras = ["dataframe"], specifier = "==2024.11.1" },
4623
4626
  { name = "flake8", specifier = ">=3.8.4" },
4624
4627
  { name = "jupyterlab" },
4625
4628
  { name = "jupyterlab", specifier = ">=3" },
@@ -4627,6 +4630,7 @@ dev = [
4627
4630
  { name = "numba", specifier = ">=0.53.0" },
4628
4631
  { name = "pytest", specifier = ">=6.2.4" },
4629
4632
  { name = "scikit-misc", specifier = ">=0.1.3" },
4633
+ { name = "spatialdata", specifier = ">=0.3.0" },
4630
4634
  ]
4631
4635
 
4632
4636
  [[package]]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes