starlet 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. starlet-0.1.0/PKG-INFO +196 -0
  2. starlet-0.1.0/README.md +169 -0
  3. starlet-0.1.0/pyproject.toml +47 -0
  4. starlet-0.1.0/setup.cfg +4 -0
  5. starlet-0.1.0/starlet/__init__.py +270 -0
  6. starlet-0.1.0/starlet/_cli.py +177 -0
  7. starlet-0.1.0/starlet/_internal/__init__.py +0 -0
  8. starlet-0.1.0/starlet/_internal/histogram/__init__.py +7 -0
  9. starlet-0.1.0/starlet/_internal/histogram/hist_pyramid.py +261 -0
  10. starlet-0.1.0/starlet/_internal/histogram/loader.py +16 -0
  11. starlet-0.1.0/starlet/_internal/mvt/__init__.py +3 -0
  12. starlet-0.1.0/starlet/_internal/mvt/assigner.py +66 -0
  13. starlet-0.1.0/starlet/_internal/mvt/generator.py +57 -0
  14. starlet-0.1.0/starlet/_internal/mvt/helpers.py +108 -0
  15. starlet-0.1.0/starlet/_internal/mvt/renderer.py +81 -0
  16. starlet-0.1.0/starlet/_internal/mvt/streamer.py +66 -0
  17. starlet-0.1.0/starlet/_internal/server/__init__.py +3 -0
  18. starlet-0.1.0/starlet/_internal/server/app.py +320 -0
  19. starlet-0.1.0/starlet/_internal/server/download_service.py +353 -0
  20. starlet-0.1.0/starlet/_internal/server/llm/__init__.py +13 -0
  21. starlet-0.1.0/starlet/_internal/server/llm/factory.py +70 -0
  22. starlet-0.1.0/starlet/_internal/server/llm/gemini_provider.py +71 -0
  23. starlet-0.1.0/starlet/_internal/server/llm/ollama_provider.py +61 -0
  24. starlet-0.1.0/starlet/_internal/server/llm/prompt.md +29 -0
  25. starlet-0.1.0/starlet/_internal/server/llm/provider.py +21 -0
  26. starlet-0.1.0/starlet/_internal/server/llm/suggestions.py +77 -0
  27. starlet-0.1.0/starlet/_internal/server/templates/index.html +192 -0
  28. starlet-0.1.0/starlet/_internal/server/tiler/__init__.py +11 -0
  29. starlet-0.1.0/starlet/_internal/server/tiler/mvt_encoder.py +41 -0
  30. starlet-0.1.0/starlet/_internal/server/tiler/parquet_index.py +59 -0
  31. starlet-0.1.0/starlet/_internal/server/tiler/tile_cache.py +28 -0
  32. starlet-0.1.0/starlet/_internal/server/tiler/tiler.py +170 -0
  33. starlet-0.1.0/starlet/_internal/server/tiler/tiler_bounds.py +48 -0
  34. starlet-0.1.0/starlet/_internal/stats/__init__.py +9 -0
  35. starlet-0.1.0/starlet/_internal/stats/collector.py +63 -0
  36. starlet-0.1.0/starlet/_internal/stats/sketches.py +192 -0
  37. starlet-0.1.0/starlet/_internal/stats/writer.py +11 -0
  38. starlet-0.1.0/starlet/_internal/tiling/RSGrove.py +681 -0
  39. starlet-0.1.0/starlet/_internal/tiling/__init__.py +11 -0
  40. starlet-0.1.0/starlet/_internal/tiling/assigner.py +369 -0
  41. starlet-0.1.0/starlet/_internal/tiling/datasource.py +371 -0
  42. starlet-0.1.0/starlet/_internal/tiling/orchestrator.py +290 -0
  43. starlet-0.1.0/starlet/_internal/tiling/utils_large.py +34 -0
  44. starlet-0.1.0/starlet/_internal/tiling/writer_pool.py +327 -0
  45. starlet-0.1.0/starlet/_types.py +101 -0
  46. starlet-0.1.0/starlet.egg-info/PKG-INFO +196 -0
  47. starlet-0.1.0/starlet.egg-info/SOURCES.txt +49 -0
  48. starlet-0.1.0/starlet.egg-info/dependency_links.txt +1 -0
  49. starlet-0.1.0/starlet.egg-info/entry_points.txt +2 -0
  50. starlet-0.1.0/starlet.egg-info/requires.txt +22 -0
  51. starlet-0.1.0/starlet.egg-info/top_level.txt +1 -0
starlet-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,196 @@
1
+ Metadata-Version: 2.4
2
+ Name: starlet
3
+ Version: 0.1.0
4
+ Summary: Spatial tiling, MVT generation, and tile serving for geospatial data
5
+ License: MIT
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: pyarrow>=15
9
+ Requires-Dist: pandas>=2
10
+ Requires-Dist: numpy>=1.24
11
+ Requires-Dist: shapely>=2.0
12
+ Requires-Dist: click>=8
13
+ Requires-Dist: ijson>=3.3
14
+ Provides-Extra: mvt
15
+ Requires-Dist: mapbox-vector-tile>=2; extra == "mvt"
16
+ Requires-Dist: pyproj>=3; extra == "mvt"
17
+ Provides-Extra: server
18
+ Requires-Dist: flask>=3; extra == "server"
19
+ Requires-Dist: flask-cors; extra == "server"
20
+ Requires-Dist: geopandas>=0.14; extra == "server"
21
+ Requires-Dist: pyproj>=3; extra == "server"
22
+ Requires-Dist: mapbox-vector-tile>=2; extra == "server"
23
+ Requires-Dist: gunicorn; extra == "server"
24
+ Requires-Dist: datasketch; extra == "server"
25
+ Provides-Extra: all
26
+ Requires-Dist: starlet[mvt,server]; extra == "all"
27
+
28
+ # Starlet
29
+
30
+ Spatial tiling, MVT generation, and tile serving for geospatial data.
31
+
32
+ ## Setup
33
+
34
+ ```bash
35
+ python -m venv .venv
36
+ source .venv/bin/activate
37
+ pip install -e .
38
+ ```
39
+
40
+ ## CLI
41
+
42
+ All commands are available through the `starlet` CLI.
43
+
44
+ ```bash
45
+ starlet --help
46
+ ```
47
+
48
+ ### `starlet tile` — Partition a dataset
49
+
50
+ ```bash
51
+ starlet tile --input data.parquet --outdir datasets/mydata --num-tiles 40
52
+ ```
53
+
54
+ | Flag | Default | Description |
55
+ |------|---------|-------------|
56
+ | `--input` | (required) | Path to GeoParquet or GeoJSON file |
57
+ | `--outdir` | (required) | Output dataset directory |
58
+ | `--num-tiles` | 40 | Target number of spatial partitions |
59
+ | `--partition-size` | 1gb | Target partition size (e.g. 512mb, 1gb) |
60
+ | `--sort` | zorder | Sort order: zorder, hilbert, columns, none |
61
+ | `--sample-cap` | 10000 | Reservoir sampling cap for centroids |
62
+ | `--compression` | zstd | Parquet compression codec |
63
+
64
+ ### `starlet mvt` — Generate vector tiles
65
+
66
+ ```bash
67
+ starlet mvt --dir datasets/mydata --zoom 7 --threshold 100000
68
+ ```
69
+
70
+ | Flag | Default | Description |
71
+ |------|---------|-------------|
72
+ | `--dir` | (required) | Dataset directory with parquet_tiles/ and histograms/ |
73
+ | `--zoom` | 7 | Maximum zoom level |
74
+ | `--threshold` | 0 | Minimum feature count per tile |
75
+ | `--outdir` | `<dir>/mvt/` | MVT output directory |
76
+
77
+ ### `starlet build` — Full pipeline (tile + MVT)
78
+
79
+ ```bash
80
+ starlet build --input data.parquet --outdir datasets/mydata
81
+ ```
82
+
83
+ ### `starlet serve` — Launch the tile server
84
+
85
+ ```bash
86
+ starlet serve --dir datasets --port 8765
87
+ ```
88
+
89
+ | Flag | Default | Description |
90
+ |------|---------|-------------|
91
+ | `--dir` | (required) | Root directory containing dataset subdirectories |
92
+ | `--host` | 0.0.0.0 | Host to bind |
93
+ | `--port` | 8765 | Port to bind |
94
+ | `--cache-size` | 256 | In-memory tile cache size |
95
+
96
+ ### `starlet info` — Inspect a dataset
97
+
98
+ ```bash
99
+ starlet info --dir datasets/mydata
100
+ ```
101
+
102
+ ## Make Targets
103
+
104
+ Convenience wrappers around the CLI:
105
+
106
+ ```bash
107
+ make tiles INPUT=path/to/data.parquet
108
+ make mvt INPUT=path/to/data.parquet
109
+ make build INPUT=path/to/data.parquet # tiles + mvt
110
+ make server # starts on port 8765
111
+ make clean # removes datasets/*
112
+ ```
113
+
114
+ ## API Endpoints
115
+
116
+ Once the server is running:
117
+
118
+ | Method | Path | Description |
119
+ |--------|------|-------------|
120
+ | `GET` | `/` | Interactive dataset selector |
121
+ | `GET` | `/api/datasets` | List all datasets |
122
+ | `GET` | `/datasets.json` | Search datasets by name |
123
+ | `GET` | `/datasets/<dataset>.json` | Dataset metadata |
124
+ | `GET` | `/datasets/<dataset>.html` | Dataset detail page |
125
+ | `GET` | `/<dataset>/<z>/<x>/<y>.mvt` | Mapbox Vector Tile |
126
+ | `GET` | `/datasets/<dataset>/features.<fmt>` | Download features (csv/geojson) |
127
+ | `POST` | `/datasets/<dataset>/features.<fmt>` | Download with geometry filter |
128
+ | `GET` | `/datasets/<dataset>/features/sample.json` | Sample attributes |
129
+ | `GET` | `/datasets/<dataset>/features/sample.geojson` | Sample record with geometry |
130
+ | `GET` | `/api/datasets/<dataset>/stats` | Attribute statistics |
131
+ | `POST` | `/datasets/<dataset>/styles.json` | LLM-generated styling suggestions |
132
+
133
+ ## LLM Styling Suggestions
134
+
135
+ The `POST /datasets/<dataset>/styles.json` endpoint uses an LLM to generate
136
+ map styling rules from dataset attribute statistics.
137
+
138
+ ### Provider Selection
139
+
140
+ Set the `LLM_PROVIDER` environment variable to choose a provider:
141
+
142
+ ```bash
143
+ export LLM_PROVIDER=gemini # default
144
+ export LLM_PROVIDER=ollama # local Ollama
145
+ ```
146
+
147
+ Falls back to Gemini if the variable is unset or invalid.
148
+
149
+ ### Gemini (default)
150
+
151
+ Requires a Google AI Studio API key:
152
+
153
+ ```bash
154
+ export GEMINI_API_KEY=your-key-here
155
+ starlet serve --dir datasets
156
+ ```
157
+
158
+ ### Ollama (local)
159
+
160
+ Requires a running Ollama instance on the default port (11434):
161
+
162
+ ```bash
163
+ ollama serve # start Ollama
164
+ ollama pull llama3 # pull a model (once)
165
+
166
+ export LLM_PROVIDER=ollama
167
+ starlet serve --dir datasets
168
+ ```
169
+
170
+ To use a different model:
171
+
172
+ ```bash
173
+ export OLLAMA_MODEL=mistral
174
+ ```
175
+
176
+ See [`starlet/_internal/server/llm/README.md`](starlet/_internal/server/llm/README.md) for full LLM provider documentation.
177
+
178
+ ## Example
179
+
180
+ ```bash
181
+ # Full pipeline
182
+ starlet build --input ../data/TIGER2018_COUNTY.parquet --outdir datasets/TIGER2018_COUNTY
183
+
184
+ # Or via Make
185
+ make build INPUT=../data/TIGER2018_COUNTY.parquet
186
+
187
+ # Start the server
188
+ make server
189
+ ```
190
+
191
+ Then open http://localhost:8765 and select a dataset to visualize.
192
+
193
+ ## Prerequisites
194
+
195
+ - Python 3.10+
196
+ - `make` (optional, for convenience targets)
@@ -0,0 +1,169 @@
1
+ # Starlet
2
+
3
+ Spatial tiling, MVT generation, and tile serving for geospatial data.
4
+
5
+ ## Setup
6
+
7
+ ```bash
8
+ python -m venv .venv
9
+ source .venv/bin/activate
10
+ pip install -e .
11
+ ```
12
+
13
+ ## CLI
14
+
15
+ All commands are available through the `starlet` CLI.
16
+
17
+ ```bash
18
+ starlet --help
19
+ ```
20
+
21
+ ### `starlet tile` — Partition a dataset
22
+
23
+ ```bash
24
+ starlet tile --input data.parquet --outdir datasets/mydata --num-tiles 40
25
+ ```
26
+
27
+ | Flag | Default | Description |
28
+ |------|---------|-------------|
29
+ | `--input` | (required) | Path to GeoParquet or GeoJSON file |
30
+ | `--outdir` | (required) | Output dataset directory |
31
+ | `--num-tiles` | 40 | Target number of spatial partitions |
32
+ | `--partition-size` | 1gb | Target partition size (e.g. 512mb, 1gb) |
33
+ | `--sort` | zorder | Sort order: zorder, hilbert, columns, none |
34
+ | `--sample-cap` | 10000 | Reservoir sampling cap for centroids |
35
+ | `--compression` | zstd | Parquet compression codec |
36
+
37
+ ### `starlet mvt` — Generate vector tiles
38
+
39
+ ```bash
40
+ starlet mvt --dir datasets/mydata --zoom 7 --threshold 100000
41
+ ```
42
+
43
+ | Flag | Default | Description |
44
+ |------|---------|-------------|
45
+ | `--dir` | (required) | Dataset directory with parquet_tiles/ and histograms/ |
46
+ | `--zoom` | 7 | Maximum zoom level |
47
+ | `--threshold` | 0 | Minimum feature count per tile |
48
+ | `--outdir` | `<dir>/mvt/` | MVT output directory |
49
+
50
+ ### `starlet build` — Full pipeline (tile + MVT)
51
+
52
+ ```bash
53
+ starlet build --input data.parquet --outdir datasets/mydata
54
+ ```
55
+
56
+ ### `starlet serve` — Launch the tile server
57
+
58
+ ```bash
59
+ starlet serve --dir datasets --port 8765
60
+ ```
61
+
62
+ | Flag | Default | Description |
63
+ |------|---------|-------------|
64
+ | `--dir` | (required) | Root directory containing dataset subdirectories |
65
+ | `--host` | 0.0.0.0 | Host to bind |
66
+ | `--port` | 8765 | Port to bind |
67
+ | `--cache-size` | 256 | In-memory tile cache size |
68
+
69
+ ### `starlet info` — Inspect a dataset
70
+
71
+ ```bash
72
+ starlet info --dir datasets/mydata
73
+ ```
74
+
75
+ ## Make Targets
76
+
77
+ Convenience wrappers around the CLI:
78
+
79
+ ```bash
80
+ make tiles INPUT=path/to/data.parquet
81
+ make mvt INPUT=path/to/data.parquet
82
+ make build INPUT=path/to/data.parquet # tiles + mvt
83
+ make server # starts on port 8765
84
+ make clean # removes datasets/*
85
+ ```
86
+
87
+ ## API Endpoints
88
+
89
+ Once the server is running:
90
+
91
+ | Method | Path | Description |
92
+ |--------|------|-------------|
93
+ | `GET` | `/` | Interactive dataset selector |
94
+ | `GET` | `/api/datasets` | List all datasets |
95
+ | `GET` | `/datasets.json` | Search datasets by name |
96
+ | `GET` | `/datasets/<dataset>.json` | Dataset metadata |
97
+ | `GET` | `/datasets/<dataset>.html` | Dataset detail page |
98
+ | `GET` | `/<dataset>/<z>/<x>/<y>.mvt` | Mapbox Vector Tile |
99
+ | `GET` | `/datasets/<dataset>/features.<fmt>` | Download features (csv/geojson) |
100
+ | `POST` | `/datasets/<dataset>/features.<fmt>` | Download with geometry filter |
101
+ | `GET` | `/datasets/<dataset>/features/sample.json` | Sample attributes |
102
+ | `GET` | `/datasets/<dataset>/features/sample.geojson` | Sample record with geometry |
103
+ | `GET` | `/api/datasets/<dataset>/stats` | Attribute statistics |
104
+ | `POST` | `/datasets/<dataset>/styles.json` | LLM-generated styling suggestions |
105
+
106
+ ## LLM Styling Suggestions
107
+
108
+ The `POST /datasets/<dataset>/styles.json` endpoint uses an LLM to generate
109
+ map styling rules from dataset attribute statistics.
110
+
111
+ ### Provider Selection
112
+
113
+ Set the `LLM_PROVIDER` environment variable to choose a provider:
114
+
115
+ ```bash
116
+ export LLM_PROVIDER=gemini # default
117
+ export LLM_PROVIDER=ollama # local Ollama
118
+ ```
119
+
120
+ Falls back to Gemini if the variable is unset or invalid.
121
+
122
+ ### Gemini (default)
123
+
124
+ Requires a Google AI Studio API key:
125
+
126
+ ```bash
127
+ export GEMINI_API_KEY=your-key-here
128
+ starlet serve --dir datasets
129
+ ```
130
+
131
+ ### Ollama (local)
132
+
133
+ Requires a running Ollama instance on the default port (11434):
134
+
135
+ ```bash
136
+ ollama serve # start Ollama
137
+ ollama pull llama3 # pull a model (once)
138
+
139
+ export LLM_PROVIDER=ollama
140
+ starlet serve --dir datasets
141
+ ```
142
+
143
+ To use a different model:
144
+
145
+ ```bash
146
+ export OLLAMA_MODEL=mistral
147
+ ```
148
+
149
+ See [`starlet/_internal/server/llm/README.md`](starlet/_internal/server/llm/README.md) for full LLM provider documentation.
150
+
151
+ ## Example
152
+
153
+ ```bash
154
+ # Full pipeline
155
+ starlet build --input ../data/TIGER2018_COUNTY.parquet --outdir datasets/TIGER2018_COUNTY
156
+
157
+ # Or via Make
158
+ make build INPUT=../data/TIGER2018_COUNTY.parquet
159
+
160
+ # Start the server
161
+ make server
162
+ ```
163
+
164
+ Then open http://localhost:8765 and select a dataset to visualize.
165
+
166
+ ## Prerequisites
167
+
168
+ - Python 3.10+
169
+ - `make` (optional, for convenience targets)
@@ -0,0 +1,47 @@
1
+ [project]
2
+ name = "starlet"
3
+ version = "0.1.0"
4
+ description = "Spatial tiling, MVT generation, and tile serving for geospatial data"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = {text = "MIT"}
8
+ dependencies = [
9
+ "pyarrow>=15",
10
+ "pandas>=2",
11
+ "numpy>=1.24",
12
+ "shapely>=2.0",
13
+ "click>=8",
14
+ "ijson>=3.3",
15
+ ]
16
+
17
+ [project.optional-dependencies]
18
+ mvt = [
19
+ "mapbox-vector-tile>=2",
20
+ "pyproj>=3",
21
+ ]
22
+ server = [
23
+ "flask>=3",
24
+ "flask-cors",
25
+ "geopandas>=0.14",
26
+ "pyproj>=3",
27
+ "mapbox-vector-tile>=2",
28
+ "gunicorn",
29
+ "datasketch",
30
+ ]
31
+ all = [
32
+ "starlet[mvt,server]",
33
+ ]
34
+
35
+ [project.scripts]
36
+ starlet = "starlet._cli:main"
37
+
38
+ [build-system]
39
+ requires = ["setuptools>=68", "wheel"]
40
+ build-backend = "setuptools.build_meta"
41
+
42
+ [tool.setuptools.packages.find]
43
+ include = ["starlet*"]
44
+
45
+ [tool.setuptools.package-data]
46
+ "starlet._internal.server" = ["templates/*.html"]
47
+ "starlet._internal.server.llm" = ["prompt.md"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,270 @@
1
+ """starlet — spatial tiling, MVT generation, and tile serving for geospatial data."""
2
+ from __future__ import annotations
3
+
4
+ __version__ = "0.1.0"
5
+
6
+ from starlet._types import TileResult, MVTResult, Dataset
7
+
8
+ __all__ = [
9
+ "tile",
10
+ "generate_mvt",
11
+ "build",
12
+ "create_app",
13
+ "TileResult",
14
+ "MVTResult",
15
+ "Dataset",
16
+ ]
17
+
18
+
19
+ def tile(
20
+ input: str,
21
+ outdir: str,
22
+ *,
23
+ num_tiles: int = 40,
24
+ partition_size: int = 1 << 30,
25
+ sort: str = "zorder",
26
+ compression: str = "zstd",
27
+ sample_cap: int | None = 10_000,
28
+ sample_ratio: float = 1.0,
29
+ seed: int = 42,
30
+ geom_col: str = "geometry",
31
+ sfc_bits: int = 16,
32
+ max_parallel_files: int = 64,
33
+ index: str | None = None,
34
+ ) -> TileResult:
35
+ """Partition a GeoParquet/GeoJSON dataset into spatially-tiled Parquet files.
36
+
37
+ Parameters
38
+ ----------
39
+ input : str
40
+ Path to a GeoParquet, GeoJSON, or GeoJSON-Lines file.
41
+ outdir : str
42
+ Output directory. Tiled files go into ``<outdir>/parquet_tiles/``
43
+ and histograms into ``<outdir>/histograms/``.
44
+ num_tiles : int
45
+ Target number of spatial partitions (used when *index* is ``None``).
46
+ partition_size : int
47
+ Target partition size in bytes. Overridden by *num_tiles* when set.
48
+ sort : str
49
+ Row sort order within each tile: ``"zorder"``, ``"hilbert"``,
50
+ ``"columns"``, or ``"none"``.
51
+ compression : str
52
+ Parquet compression codec (default ``"zstd"``).
53
+ sample_cap : int | None
54
+ Reservoir sampling cap for centroid sampling.
55
+ sample_ratio : float
56
+ Bernoulli sampling ratio for centroids (0 < r <= 1).
57
+ seed : int
58
+ Random seed for RSGrove partitioner.
59
+ geom_col : str
60
+ Name of the geometry column.
61
+ sfc_bits : int
62
+ Bits per axis for Z-order / Hilbert key.
63
+ max_parallel_files : int
64
+ Maximum concurrent tile files during write.
65
+ index : str | None
66
+ Path to a legacy CSV index file. When provided, *num_tiles* is ignored.
67
+
68
+ Returns
69
+ -------
70
+ TileResult
71
+ """
72
+ import logging
73
+ import math
74
+ from pathlib import Path
75
+
76
+ from starlet._internal.tiling.datasource import GeoParquetSource, GeoJSONSource, is_geojson_path
77
+ from starlet._internal.tiling.assigner import TileAssignerFromCSV, RSGroveAssigner
78
+ from starlet._internal.tiling.orchestrator import RoundOrchestrator
79
+ from starlet._internal.tiling.writer_pool import SortMode
80
+ from starlet._internal.histogram.hist_pyramid import build_histograms_for_dir
81
+
82
+ logger = logging.getLogger("starlet.tile")
83
+
84
+ # Parse sort mode
85
+ _sort_map = {
86
+ "none": SortMode.NONE,
87
+ "columns": SortMode.COLUMNS,
88
+ "zorder": SortMode.ZORDER,
89
+ "hilbert": SortMode.HILBERT,
90
+ }
91
+ sort_mode = _sort_map.get(sort.strip().lower(), SortMode.ZORDER)
92
+
93
+ # Build data source
94
+ if is_geojson_path(input):
95
+ source = GeoJSONSource(input)
96
+ else:
97
+ source = GeoParquetSource(input)
98
+
99
+ # Determine partition count
100
+ input_size_bytes = Path(input).stat().st_size
101
+ computed = max(1, math.ceil(input_size_bytes / partition_size))
102
+ target_partitions = num_tiles if num_tiles else computed
103
+ logger.info("Target partitions: %d (input=%d bytes)", target_partitions, input_size_bytes)
104
+
105
+ # Build assigner
106
+ if index:
107
+ assigner = TileAssignerFromCSV(index, geom_col=geom_col)
108
+ else:
109
+ assigner = RSGroveAssigner.from_source(
110
+ tables=source.iter_tables(),
111
+ num_partitions=target_partitions,
112
+ geom_col=geom_col,
113
+ seed=seed,
114
+ sample_ratio=sample_ratio,
115
+ sample_cap=sample_cap,
116
+ )
117
+
118
+ tiles_dir = str(Path(outdir) / "parquet_tiles")
119
+ hist_dir = str(Path(outdir) / "histograms")
120
+
121
+ orchestrator = RoundOrchestrator(
122
+ source=source,
123
+ assigner=assigner,
124
+ outdir=tiles_dir,
125
+ max_parallel_files=max_parallel_files,
126
+ compression=compression,
127
+ sort_mode=sort_mode,
128
+ sfc_bits=sfc_bits,
129
+ )
130
+ orchestrator.run()
131
+
132
+ logger.info("Tiling complete. Building histograms.")
133
+ build_histograms_for_dir(
134
+ tiles_dir=tiles_dir,
135
+ outdir=hist_dir,
136
+ geom_col=geom_col,
137
+ grid_size=4096,
138
+ dtype="float64",
139
+ hist_max_parallel=8,
140
+ hist_rg_parallel=4,
141
+ )
142
+
143
+ # Gather result metadata
144
+ tile_files = list(Path(tiles_dir).glob("*.parquet"))
145
+ total_rows = 0
146
+ bbox = (float("inf"), float("inf"), float("-inf"), float("-inf"))
147
+ for tf in tile_files:
148
+ import pyarrow.parquet as pq
149
+ meta = pq.read_metadata(str(tf))
150
+ total_rows += meta.num_rows
151
+
152
+ ds = Dataset(outdir)
153
+ result_bbox = ds.bbox or (0.0, 0.0, 0.0, 0.0)
154
+
155
+ return TileResult(
156
+ outdir=outdir,
157
+ num_files=len(tile_files),
158
+ total_rows=total_rows,
159
+ bbox=result_bbox,
160
+ histogram_path=str(Path(hist_dir) / "global_prefix.npy"),
161
+ )
162
+
163
+
164
+ def generate_mvt(
165
+ tile_dir: str,
166
+ *,
167
+ zoom: int = 7,
168
+ threshold: float = 0,
169
+ outdir: str | None = None,
170
+ ) -> MVTResult:
171
+ """Generate Mapbox Vector Tiles from a tiled dataset.
172
+
173
+ Parameters
174
+ ----------
175
+ tile_dir : str
176
+ Dataset directory containing ``parquet_tiles/`` and ``histograms/``.
177
+ zoom : int
178
+ Maximum zoom level.
179
+ threshold : float
180
+ Minimum feature count per tile.
181
+ outdir : str | None
182
+ MVT output directory. Defaults to ``<tile_dir>/mvt/``.
183
+
184
+ Returns
185
+ -------
186
+ MVTResult
187
+ """
188
+ from pathlib import Path
189
+ from starlet._internal.mvt.generator import BucketMVTGenerator
190
+
191
+ parquet_dir = str(Path(tile_dir) / "parquet_tiles")
192
+ hist_path = str(Path(tile_dir) / "histograms" / "global.npy")
193
+ mvt_outdir = outdir or str(Path(tile_dir) / "mvt")
194
+
195
+ gen = BucketMVTGenerator(
196
+ parquet_dir=parquet_dir,
197
+ hist_path=hist_path,
198
+ outdir=mvt_outdir,
199
+ last_zoom=zoom,
200
+ threshold=threshold,
201
+ )
202
+ gen.run()
203
+
204
+ # Count generated tiles
205
+ mvt_path = Path(mvt_outdir)
206
+ tile_count = len(list(mvt_path.rglob("*.mvt")))
207
+ zoom_levels = sorted(
208
+ int(d.name) for d in mvt_path.iterdir()
209
+ if d.is_dir() and d.name.isdigit()
210
+ ) if mvt_path.exists() else []
211
+
212
+ return MVTResult(
213
+ outdir=mvt_outdir,
214
+ zoom_levels=zoom_levels,
215
+ tile_count=tile_count,
216
+ )
217
+
218
+
219
+ def build(
220
+ input: str,
221
+ outdir: str,
222
+ *,
223
+ zoom: int = 7,
224
+ num_tiles: int = 40,
225
+ threshold: float = 100_000,
226
+ **tile_kwargs,
227
+ ) -> tuple[TileResult, MVTResult]:
228
+ """Run the full pipeline: tile then generate MVTs.
229
+
230
+ Parameters
231
+ ----------
232
+ input : str
233
+ Path to source GeoParquet or GeoJSON file.
234
+ outdir : str
235
+ Output dataset directory.
236
+ zoom : int
237
+ Maximum zoom level for MVT generation.
238
+ num_tiles : int
239
+ Target number of spatial partitions.
240
+ threshold : float
241
+ Minimum feature count per MVT tile.
242
+ **tile_kwargs
243
+ Additional keyword arguments forwarded to :func:`tile`.
244
+
245
+ Returns
246
+ -------
247
+ tuple[TileResult, MVTResult]
248
+ """
249
+ tile_result = tile(input=input, outdir=outdir, num_tiles=num_tiles, **tile_kwargs)
250
+ mvt_result = generate_mvt(tile_dir=outdir, zoom=zoom, threshold=threshold)
251
+ return tile_result, mvt_result
252
+
253
+
254
+ def create_app(data_dir: str, cache_size: int = 256):
255
+ """Create a Flask tile server application.
256
+
257
+ Parameters
258
+ ----------
259
+ data_dir : str
260
+ Root directory containing dataset subdirectories.
261
+ cache_size : int
262
+ Number of tiles in the in-memory LRU cache.
263
+
264
+ Returns
265
+ -------
266
+ Flask
267
+ Configured Flask application.
268
+ """
269
+ from starlet._internal.server.app import create_app as _create_app
270
+ return _create_app(data_dir=data_dir, cache_size=cache_size)