starlet 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starlet-0.1.0/PKG-INFO +196 -0
- starlet-0.1.0/README.md +169 -0
- starlet-0.1.0/pyproject.toml +47 -0
- starlet-0.1.0/setup.cfg +4 -0
- starlet-0.1.0/starlet/__init__.py +270 -0
- starlet-0.1.0/starlet/_cli.py +177 -0
- starlet-0.1.0/starlet/_internal/__init__.py +0 -0
- starlet-0.1.0/starlet/_internal/histogram/__init__.py +7 -0
- starlet-0.1.0/starlet/_internal/histogram/hist_pyramid.py +261 -0
- starlet-0.1.0/starlet/_internal/histogram/loader.py +16 -0
- starlet-0.1.0/starlet/_internal/mvt/__init__.py +3 -0
- starlet-0.1.0/starlet/_internal/mvt/assigner.py +66 -0
- starlet-0.1.0/starlet/_internal/mvt/generator.py +57 -0
- starlet-0.1.0/starlet/_internal/mvt/helpers.py +108 -0
- starlet-0.1.0/starlet/_internal/mvt/renderer.py +81 -0
- starlet-0.1.0/starlet/_internal/mvt/streamer.py +66 -0
- starlet-0.1.0/starlet/_internal/server/__init__.py +3 -0
- starlet-0.1.0/starlet/_internal/server/app.py +320 -0
- starlet-0.1.0/starlet/_internal/server/download_service.py +353 -0
- starlet-0.1.0/starlet/_internal/server/llm/__init__.py +13 -0
- starlet-0.1.0/starlet/_internal/server/llm/factory.py +70 -0
- starlet-0.1.0/starlet/_internal/server/llm/gemini_provider.py +71 -0
- starlet-0.1.0/starlet/_internal/server/llm/ollama_provider.py +61 -0
- starlet-0.1.0/starlet/_internal/server/llm/prompt.md +29 -0
- starlet-0.1.0/starlet/_internal/server/llm/provider.py +21 -0
- starlet-0.1.0/starlet/_internal/server/llm/suggestions.py +77 -0
- starlet-0.1.0/starlet/_internal/server/templates/index.html +192 -0
- starlet-0.1.0/starlet/_internal/server/tiler/__init__.py +11 -0
- starlet-0.1.0/starlet/_internal/server/tiler/mvt_encoder.py +41 -0
- starlet-0.1.0/starlet/_internal/server/tiler/parquet_index.py +59 -0
- starlet-0.1.0/starlet/_internal/server/tiler/tile_cache.py +28 -0
- starlet-0.1.0/starlet/_internal/server/tiler/tiler.py +170 -0
- starlet-0.1.0/starlet/_internal/server/tiler/tiler_bounds.py +48 -0
- starlet-0.1.0/starlet/_internal/stats/__init__.py +9 -0
- starlet-0.1.0/starlet/_internal/stats/collector.py +63 -0
- starlet-0.1.0/starlet/_internal/stats/sketches.py +192 -0
- starlet-0.1.0/starlet/_internal/stats/writer.py +11 -0
- starlet-0.1.0/starlet/_internal/tiling/RSGrove.py +681 -0
- starlet-0.1.0/starlet/_internal/tiling/__init__.py +11 -0
- starlet-0.1.0/starlet/_internal/tiling/assigner.py +369 -0
- starlet-0.1.0/starlet/_internal/tiling/datasource.py +371 -0
- starlet-0.1.0/starlet/_internal/tiling/orchestrator.py +290 -0
- starlet-0.1.0/starlet/_internal/tiling/utils_large.py +34 -0
- starlet-0.1.0/starlet/_internal/tiling/writer_pool.py +327 -0
- starlet-0.1.0/starlet/_types.py +101 -0
- starlet-0.1.0/starlet.egg-info/PKG-INFO +196 -0
- starlet-0.1.0/starlet.egg-info/SOURCES.txt +49 -0
- starlet-0.1.0/starlet.egg-info/dependency_links.txt +1 -0
- starlet-0.1.0/starlet.egg-info/entry_points.txt +2 -0
- starlet-0.1.0/starlet.egg-info/requires.txt +22 -0
- starlet-0.1.0/starlet.egg-info/top_level.txt +1 -0
starlet-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: starlet
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Spatial tiling, MVT generation, and tile serving for geospatial data
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: pyarrow>=15
|
|
9
|
+
Requires-Dist: pandas>=2
|
|
10
|
+
Requires-Dist: numpy>=1.24
|
|
11
|
+
Requires-Dist: shapely>=2.0
|
|
12
|
+
Requires-Dist: click>=8
|
|
13
|
+
Requires-Dist: ijson>=3.3
|
|
14
|
+
Provides-Extra: mvt
|
|
15
|
+
Requires-Dist: mapbox-vector-tile>=2; extra == "mvt"
|
|
16
|
+
Requires-Dist: pyproj>=3; extra == "mvt"
|
|
17
|
+
Provides-Extra: server
|
|
18
|
+
Requires-Dist: flask>=3; extra == "server"
|
|
19
|
+
Requires-Dist: flask-cors; extra == "server"
|
|
20
|
+
Requires-Dist: geopandas>=0.14; extra == "server"
|
|
21
|
+
Requires-Dist: pyproj>=3; extra == "server"
|
|
22
|
+
Requires-Dist: mapbox-vector-tile>=2; extra == "server"
|
|
23
|
+
Requires-Dist: gunicorn; extra == "server"
|
|
24
|
+
Requires-Dist: datasketch; extra == "server"
|
|
25
|
+
Provides-Extra: all
|
|
26
|
+
Requires-Dist: starlet[mvt,server]; extra == "all"
|
|
27
|
+
|
|
28
|
+
# Starlet
|
|
29
|
+
|
|
30
|
+
Spatial tiling, MVT generation, and tile serving for geospatial data.
|
|
31
|
+
|
|
32
|
+
## Setup
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
python -m venv .venv
|
|
36
|
+
source .venv/bin/activate
|
|
37
|
+
pip install -e .
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## CLI
|
|
41
|
+
|
|
42
|
+
All commands are available through the `starlet` CLI.
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
starlet --help
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### `starlet tile` — Partition a dataset
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
starlet tile --input data.parquet --outdir datasets/mydata --num-tiles 40
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
| Flag | Default | Description |
|
|
55
|
+
|------|---------|-------------|
|
|
56
|
+
| `--input` | (required) | Path to GeoParquet or GeoJSON file |
|
|
57
|
+
| `--outdir` | (required) | Output dataset directory |
|
|
58
|
+
| `--num-tiles` | 40 | Target number of spatial partitions |
|
|
59
|
+
| `--partition-size` | 1gb | Target partition size (e.g. 512mb, 1gb) |
|
|
60
|
+
| `--sort` | zorder | Sort order: zorder, hilbert, columns, none |
|
|
61
|
+
| `--sample-cap` | 10000 | Reservoir sampling cap for centroids |
|
|
62
|
+
| `--compression` | zstd | Parquet compression codec |
|
|
63
|
+
|
|
64
|
+
### `starlet mvt` — Generate vector tiles
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
starlet mvt --dir datasets/mydata --zoom 7 --threshold 100000
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
| Flag | Default | Description |
|
|
71
|
+
|------|---------|-------------|
|
|
72
|
+
| `--dir` | (required) | Dataset directory with parquet_tiles/ and histograms/ |
|
|
73
|
+
| `--zoom` | 7 | Maximum zoom level |
|
|
74
|
+
| `--threshold` | 0 | Minimum feature count per tile |
|
|
75
|
+
| `--outdir` | `<dir>/mvt/` | MVT output directory |
|
|
76
|
+
|
|
77
|
+
### `starlet build` — Full pipeline (tile + MVT)
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
starlet build --input data.parquet --outdir datasets/mydata
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### `starlet serve` — Launch the tile server
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
starlet serve --dir datasets --port 8765
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
| Flag | Default | Description |
|
|
90
|
+
|------|---------|-------------|
|
|
91
|
+
| `--dir` | (required) | Root directory containing dataset subdirectories |
|
|
92
|
+
| `--host` | 0.0.0.0 | Host to bind |
|
|
93
|
+
| `--port` | 8765 | Port to bind |
|
|
94
|
+
| `--cache-size` | 256 | In-memory tile cache size |
|
|
95
|
+
|
|
96
|
+
### `starlet info` — Inspect a dataset
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
starlet info --dir datasets/mydata
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Make Targets
|
|
103
|
+
|
|
104
|
+
Convenience wrappers around the CLI:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
make tiles INPUT=path/to/data.parquet
|
|
108
|
+
make mvt INPUT=path/to/data.parquet
|
|
109
|
+
make build INPUT=path/to/data.parquet # tiles + mvt
|
|
110
|
+
make server # starts on port 8765
|
|
111
|
+
make clean # removes datasets/*
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## API Endpoints
|
|
115
|
+
|
|
116
|
+
Once the server is running:
|
|
117
|
+
|
|
118
|
+
| Method | Path | Description |
|
|
119
|
+
|--------|------|-------------|
|
|
120
|
+
| `GET` | `/` | Interactive dataset selector |
|
|
121
|
+
| `GET` | `/api/datasets` | List all datasets |
|
|
122
|
+
| `GET` | `/datasets.json` | Search datasets by name |
|
|
123
|
+
| `GET` | `/datasets/<dataset>.json` | Dataset metadata |
|
|
124
|
+
| `GET` | `/datasets/<dataset>.html` | Dataset detail page |
|
|
125
|
+
| `GET` | `/<dataset>/<z>/<x>/<y>.mvt` | Mapbox Vector Tile |
|
|
126
|
+
| `GET` | `/datasets/<dataset>/features.<fmt>` | Download features (csv/geojson) |
|
|
127
|
+
| `POST` | `/datasets/<dataset>/features.<fmt>` | Download with geometry filter |
|
|
128
|
+
| `GET` | `/datasets/<dataset>/features/sample.json` | Sample attributes |
|
|
129
|
+
| `GET` | `/datasets/<dataset>/features/sample.geojson` | Sample record with geometry |
|
|
130
|
+
| `GET` | `/api/datasets/<dataset>/stats` | Attribute statistics |
|
|
131
|
+
| `POST` | `/datasets/<dataset>/styles.json` | LLM-generated styling suggestions |
|
|
132
|
+
|
|
133
|
+
## LLM Styling Suggestions
|
|
134
|
+
|
|
135
|
+
The `POST /datasets/<dataset>/styles.json` endpoint uses an LLM to generate
|
|
136
|
+
map styling rules from dataset attribute statistics.
|
|
137
|
+
|
|
138
|
+
### Provider Selection
|
|
139
|
+
|
|
140
|
+
Set the `LLM_PROVIDER` environment variable to choose a provider:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
export LLM_PROVIDER=gemini # default
|
|
144
|
+
export LLM_PROVIDER=ollama # local Ollama
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Falls back to Gemini if the variable is unset or invalid.
|
|
148
|
+
|
|
149
|
+
### Gemini (default)
|
|
150
|
+
|
|
151
|
+
Requires a Google AI Studio API key:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
export GEMINI_API_KEY=your-key-here
|
|
155
|
+
starlet serve --dir datasets
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Ollama (local)
|
|
159
|
+
|
|
160
|
+
Requires a running Ollama instance on the default port (11434):
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
ollama serve # start Ollama
|
|
164
|
+
ollama pull llama3 # pull a model (once)
|
|
165
|
+
|
|
166
|
+
export LLM_PROVIDER=ollama
|
|
167
|
+
starlet serve --dir datasets
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
To use a different model:
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
export OLLAMA_MODEL=mistral
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
See [`starlet/_internal/server/llm/README.md`](starlet/_internal/server/llm/README.md) for full LLM provider documentation.
|
|
177
|
+
|
|
178
|
+
## Example
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
# Full pipeline
|
|
182
|
+
starlet build --input ../data/TIGER2018_COUNTY.parquet --outdir datasets/TIGER2018_COUNTY
|
|
183
|
+
|
|
184
|
+
# Or via Make
|
|
185
|
+
make build INPUT=../data/TIGER2018_COUNTY.parquet
|
|
186
|
+
|
|
187
|
+
# Start the server
|
|
188
|
+
make server
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Then open http://localhost:8765 and select a dataset to visualize.
|
|
192
|
+
|
|
193
|
+
## Prerequisites
|
|
194
|
+
|
|
195
|
+
- Python 3.10+
|
|
196
|
+
- `make` (optional, for convenience targets)
|
starlet-0.1.0/README.md
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Starlet
|
|
2
|
+
|
|
3
|
+
Spatial tiling, MVT generation, and tile serving for geospatial data.
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
python -m venv .venv
|
|
9
|
+
source .venv/bin/activate
|
|
10
|
+
pip install -e .
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## CLI
|
|
14
|
+
|
|
15
|
+
All commands are available through the `starlet` CLI.
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
starlet --help
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### `starlet tile` — Partition a dataset
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
starlet tile --input data.parquet --outdir datasets/mydata --num-tiles 40
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
| Flag | Default | Description |
|
|
28
|
+
|------|---------|-------------|
|
|
29
|
+
| `--input` | (required) | Path to GeoParquet or GeoJSON file |
|
|
30
|
+
| `--outdir` | (required) | Output dataset directory |
|
|
31
|
+
| `--num-tiles` | 40 | Target number of spatial partitions |
|
|
32
|
+
| `--partition-size` | 1gb | Target partition size (e.g. 512mb, 1gb) |
|
|
33
|
+
| `--sort` | zorder | Sort order: zorder, hilbert, columns, none |
|
|
34
|
+
| `--sample-cap` | 10000 | Reservoir sampling cap for centroids |
|
|
35
|
+
| `--compression` | zstd | Parquet compression codec |
|
|
36
|
+
|
|
37
|
+
### `starlet mvt` — Generate vector tiles
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
starlet mvt --dir datasets/mydata --zoom 7 --threshold 100000
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
| Flag | Default | Description |
|
|
44
|
+
|------|---------|-------------|
|
|
45
|
+
| `--dir` | (required) | Dataset directory with parquet_tiles/ and histograms/ |
|
|
46
|
+
| `--zoom` | 7 | Maximum zoom level |
|
|
47
|
+
| `--threshold` | 0 | Minimum feature count per tile |
|
|
48
|
+
| `--outdir` | `<dir>/mvt/` | MVT output directory |
|
|
49
|
+
|
|
50
|
+
### `starlet build` — Full pipeline (tile + MVT)
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
starlet build --input data.parquet --outdir datasets/mydata
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### `starlet serve` — Launch the tile server
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
starlet serve --dir datasets --port 8765
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
| Flag | Default | Description |
|
|
63
|
+
|------|---------|-------------|
|
|
64
|
+
| `--dir` | (required) | Root directory containing dataset subdirectories |
|
|
65
|
+
| `--host` | 0.0.0.0 | Host to bind |
|
|
66
|
+
| `--port` | 8765 | Port to bind |
|
|
67
|
+
| `--cache-size` | 256 | In-memory tile cache size |
|
|
68
|
+
|
|
69
|
+
### `starlet info` — Inspect a dataset
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
starlet info --dir datasets/mydata
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Make Targets
|
|
76
|
+
|
|
77
|
+
Convenience wrappers around the CLI:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
make tiles INPUT=path/to/data.parquet
|
|
81
|
+
make mvt INPUT=path/to/data.parquet
|
|
82
|
+
make build INPUT=path/to/data.parquet # tiles + mvt
|
|
83
|
+
make server # starts on port 8765
|
|
84
|
+
make clean # removes datasets/*
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## API Endpoints
|
|
88
|
+
|
|
89
|
+
Once the server is running:
|
|
90
|
+
|
|
91
|
+
| Method | Path | Description |
|
|
92
|
+
|--------|------|-------------|
|
|
93
|
+
| `GET` | `/` | Interactive dataset selector |
|
|
94
|
+
| `GET` | `/api/datasets` | List all datasets |
|
|
95
|
+
| `GET` | `/datasets.json` | Search datasets by name |
|
|
96
|
+
| `GET` | `/datasets/<dataset>.json` | Dataset metadata |
|
|
97
|
+
| `GET` | `/datasets/<dataset>.html` | Dataset detail page |
|
|
98
|
+
| `GET` | `/<dataset>/<z>/<x>/<y>.mvt` | Mapbox Vector Tile |
|
|
99
|
+
| `GET` | `/datasets/<dataset>/features.<fmt>` | Download features (csv/geojson) |
|
|
100
|
+
| `POST` | `/datasets/<dataset>/features.<fmt>` | Download with geometry filter |
|
|
101
|
+
| `GET` | `/datasets/<dataset>/features/sample.json` | Sample attributes |
|
|
102
|
+
| `GET` | `/datasets/<dataset>/features/sample.geojson` | Sample record with geometry |
|
|
103
|
+
| `GET` | `/api/datasets/<dataset>/stats` | Attribute statistics |
|
|
104
|
+
| `POST` | `/datasets/<dataset>/styles.json` | LLM-generated styling suggestions |
|
|
105
|
+
|
|
106
|
+
## LLM Styling Suggestions
|
|
107
|
+
|
|
108
|
+
The `POST /datasets/<dataset>/styles.json` endpoint uses an LLM to generate
|
|
109
|
+
map styling rules from dataset attribute statistics.
|
|
110
|
+
|
|
111
|
+
### Provider Selection
|
|
112
|
+
|
|
113
|
+
Set the `LLM_PROVIDER` environment variable to choose a provider:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
export LLM_PROVIDER=gemini # default
|
|
117
|
+
export LLM_PROVIDER=ollama # local Ollama
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Falls back to Gemini if the variable is unset or invalid.
|
|
121
|
+
|
|
122
|
+
### Gemini (default)
|
|
123
|
+
|
|
124
|
+
Requires a Google AI Studio API key:
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
export GEMINI_API_KEY=your-key-here
|
|
128
|
+
starlet serve --dir datasets
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Ollama (local)
|
|
132
|
+
|
|
133
|
+
Requires a running Ollama instance on the default port (11434):
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
ollama serve # start Ollama
|
|
137
|
+
ollama pull llama3 # pull a model (once)
|
|
138
|
+
|
|
139
|
+
export LLM_PROVIDER=ollama
|
|
140
|
+
starlet serve --dir datasets
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
To use a different model:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
export OLLAMA_MODEL=mistral
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
See [`starlet/_internal/server/llm/README.md`](starlet/_internal/server/llm/README.md) for full LLM provider documentation.
|
|
150
|
+
|
|
151
|
+
## Example
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Full pipeline
|
|
155
|
+
starlet build --input ../data/TIGER2018_COUNTY.parquet --outdir datasets/TIGER2018_COUNTY
|
|
156
|
+
|
|
157
|
+
# Or via Make
|
|
158
|
+
make build INPUT=../data/TIGER2018_COUNTY.parquet
|
|
159
|
+
|
|
160
|
+
# Start the server
|
|
161
|
+
make server
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Then open http://localhost:8765 and select a dataset to visualize.
|
|
165
|
+
|
|
166
|
+
## Prerequisites
|
|
167
|
+
|
|
168
|
+
- Python 3.10+
|
|
169
|
+
- `make` (optional, for convenience targets)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "starlet"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Spatial tiling, MVT generation, and tile serving for geospatial data"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = {text = "MIT"}
|
|
8
|
+
dependencies = [
|
|
9
|
+
"pyarrow>=15",
|
|
10
|
+
"pandas>=2",
|
|
11
|
+
"numpy>=1.24",
|
|
12
|
+
"shapely>=2.0",
|
|
13
|
+
"click>=8",
|
|
14
|
+
"ijson>=3.3",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[project.optional-dependencies]
|
|
18
|
+
mvt = [
|
|
19
|
+
"mapbox-vector-tile>=2",
|
|
20
|
+
"pyproj>=3",
|
|
21
|
+
]
|
|
22
|
+
server = [
|
|
23
|
+
"flask>=3",
|
|
24
|
+
"flask-cors",
|
|
25
|
+
"geopandas>=0.14",
|
|
26
|
+
"pyproj>=3",
|
|
27
|
+
"mapbox-vector-tile>=2",
|
|
28
|
+
"gunicorn",
|
|
29
|
+
"datasketch",
|
|
30
|
+
]
|
|
31
|
+
all = [
|
|
32
|
+
"starlet[mvt,server]",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.scripts]
|
|
36
|
+
starlet = "starlet._cli:main"
|
|
37
|
+
|
|
38
|
+
[build-system]
|
|
39
|
+
requires = ["setuptools>=68", "wheel"]
|
|
40
|
+
build-backend = "setuptools.build_meta"
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
include = ["starlet*"]
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.package-data]
|
|
46
|
+
"starlet._internal.server" = ["templates/*.html"]
|
|
47
|
+
"starlet._internal.server.llm" = ["prompt.md"]
|
starlet-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""starlet — spatial tiling, MVT generation, and tile serving for geospatial data."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
__version__ = "0.1.0"
|
|
5
|
+
|
|
6
|
+
from starlet._types import TileResult, MVTResult, Dataset
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"tile",
|
|
10
|
+
"generate_mvt",
|
|
11
|
+
"build",
|
|
12
|
+
"create_app",
|
|
13
|
+
"TileResult",
|
|
14
|
+
"MVTResult",
|
|
15
|
+
"Dataset",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def tile(
|
|
20
|
+
input: str,
|
|
21
|
+
outdir: str,
|
|
22
|
+
*,
|
|
23
|
+
num_tiles: int = 40,
|
|
24
|
+
partition_size: int = 1 << 30,
|
|
25
|
+
sort: str = "zorder",
|
|
26
|
+
compression: str = "zstd",
|
|
27
|
+
sample_cap: int | None = 10_000,
|
|
28
|
+
sample_ratio: float = 1.0,
|
|
29
|
+
seed: int = 42,
|
|
30
|
+
geom_col: str = "geometry",
|
|
31
|
+
sfc_bits: int = 16,
|
|
32
|
+
max_parallel_files: int = 64,
|
|
33
|
+
index: str | None = None,
|
|
34
|
+
) -> TileResult:
|
|
35
|
+
"""Partition a GeoParquet/GeoJSON dataset into spatially-tiled Parquet files.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
input : str
|
|
40
|
+
Path to a GeoParquet, GeoJSON, or GeoJSON-Lines file.
|
|
41
|
+
outdir : str
|
|
42
|
+
Output directory. Tiled files go into ``<outdir>/parquet_tiles/``
|
|
43
|
+
and histograms into ``<outdir>/histograms/``.
|
|
44
|
+
num_tiles : int
|
|
45
|
+
Target number of spatial partitions (used when *index* is ``None``).
|
|
46
|
+
partition_size : int
|
|
47
|
+
Target partition size in bytes. Overridden by *num_tiles* when set.
|
|
48
|
+
sort : str
|
|
49
|
+
Row sort order within each tile: ``"zorder"``, ``"hilbert"``,
|
|
50
|
+
``"columns"``, or ``"none"``.
|
|
51
|
+
compression : str
|
|
52
|
+
Parquet compression codec (default ``"zstd"``).
|
|
53
|
+
sample_cap : int | None
|
|
54
|
+
Reservoir sampling cap for centroid sampling.
|
|
55
|
+
sample_ratio : float
|
|
56
|
+
Bernoulli sampling ratio for centroids (0 < r <= 1).
|
|
57
|
+
seed : int
|
|
58
|
+
Random seed for RSGrove partitioner.
|
|
59
|
+
geom_col : str
|
|
60
|
+
Name of the geometry column.
|
|
61
|
+
sfc_bits : int
|
|
62
|
+
Bits per axis for Z-order / Hilbert key.
|
|
63
|
+
max_parallel_files : int
|
|
64
|
+
Maximum concurrent tile files during write.
|
|
65
|
+
index : str | None
|
|
66
|
+
Path to a legacy CSV index file. When provided, *num_tiles* is ignored.
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
TileResult
|
|
71
|
+
"""
|
|
72
|
+
import logging
|
|
73
|
+
import math
|
|
74
|
+
from pathlib import Path
|
|
75
|
+
|
|
76
|
+
from starlet._internal.tiling.datasource import GeoParquetSource, GeoJSONSource, is_geojson_path
|
|
77
|
+
from starlet._internal.tiling.assigner import TileAssignerFromCSV, RSGroveAssigner
|
|
78
|
+
from starlet._internal.tiling.orchestrator import RoundOrchestrator
|
|
79
|
+
from starlet._internal.tiling.writer_pool import SortMode
|
|
80
|
+
from starlet._internal.histogram.hist_pyramid import build_histograms_for_dir
|
|
81
|
+
|
|
82
|
+
logger = logging.getLogger("starlet.tile")
|
|
83
|
+
|
|
84
|
+
# Parse sort mode
|
|
85
|
+
_sort_map = {
|
|
86
|
+
"none": SortMode.NONE,
|
|
87
|
+
"columns": SortMode.COLUMNS,
|
|
88
|
+
"zorder": SortMode.ZORDER,
|
|
89
|
+
"hilbert": SortMode.HILBERT,
|
|
90
|
+
}
|
|
91
|
+
sort_mode = _sort_map.get(sort.strip().lower(), SortMode.ZORDER)
|
|
92
|
+
|
|
93
|
+
# Build data source
|
|
94
|
+
if is_geojson_path(input):
|
|
95
|
+
source = GeoJSONSource(input)
|
|
96
|
+
else:
|
|
97
|
+
source = GeoParquetSource(input)
|
|
98
|
+
|
|
99
|
+
# Determine partition count
|
|
100
|
+
input_size_bytes = Path(input).stat().st_size
|
|
101
|
+
computed = max(1, math.ceil(input_size_bytes / partition_size))
|
|
102
|
+
target_partitions = num_tiles if num_tiles else computed
|
|
103
|
+
logger.info("Target partitions: %d (input=%d bytes)", target_partitions, input_size_bytes)
|
|
104
|
+
|
|
105
|
+
# Build assigner
|
|
106
|
+
if index:
|
|
107
|
+
assigner = TileAssignerFromCSV(index, geom_col=geom_col)
|
|
108
|
+
else:
|
|
109
|
+
assigner = RSGroveAssigner.from_source(
|
|
110
|
+
tables=source.iter_tables(),
|
|
111
|
+
num_partitions=target_partitions,
|
|
112
|
+
geom_col=geom_col,
|
|
113
|
+
seed=seed,
|
|
114
|
+
sample_ratio=sample_ratio,
|
|
115
|
+
sample_cap=sample_cap,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
tiles_dir = str(Path(outdir) / "parquet_tiles")
|
|
119
|
+
hist_dir = str(Path(outdir) / "histograms")
|
|
120
|
+
|
|
121
|
+
orchestrator = RoundOrchestrator(
|
|
122
|
+
source=source,
|
|
123
|
+
assigner=assigner,
|
|
124
|
+
outdir=tiles_dir,
|
|
125
|
+
max_parallel_files=max_parallel_files,
|
|
126
|
+
compression=compression,
|
|
127
|
+
sort_mode=sort_mode,
|
|
128
|
+
sfc_bits=sfc_bits,
|
|
129
|
+
)
|
|
130
|
+
orchestrator.run()
|
|
131
|
+
|
|
132
|
+
logger.info("Tiling complete. Building histograms.")
|
|
133
|
+
build_histograms_for_dir(
|
|
134
|
+
tiles_dir=tiles_dir,
|
|
135
|
+
outdir=hist_dir,
|
|
136
|
+
geom_col=geom_col,
|
|
137
|
+
grid_size=4096,
|
|
138
|
+
dtype="float64",
|
|
139
|
+
hist_max_parallel=8,
|
|
140
|
+
hist_rg_parallel=4,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Gather result metadata
|
|
144
|
+
tile_files = list(Path(tiles_dir).glob("*.parquet"))
|
|
145
|
+
total_rows = 0
|
|
146
|
+
bbox = (float("inf"), float("inf"), float("-inf"), float("-inf"))
|
|
147
|
+
for tf in tile_files:
|
|
148
|
+
import pyarrow.parquet as pq
|
|
149
|
+
meta = pq.read_metadata(str(tf))
|
|
150
|
+
total_rows += meta.num_rows
|
|
151
|
+
|
|
152
|
+
ds = Dataset(outdir)
|
|
153
|
+
result_bbox = ds.bbox or (0.0, 0.0, 0.0, 0.0)
|
|
154
|
+
|
|
155
|
+
return TileResult(
|
|
156
|
+
outdir=outdir,
|
|
157
|
+
num_files=len(tile_files),
|
|
158
|
+
total_rows=total_rows,
|
|
159
|
+
bbox=result_bbox,
|
|
160
|
+
histogram_path=str(Path(hist_dir) / "global_prefix.npy"),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def generate_mvt(
|
|
165
|
+
tile_dir: str,
|
|
166
|
+
*,
|
|
167
|
+
zoom: int = 7,
|
|
168
|
+
threshold: float = 0,
|
|
169
|
+
outdir: str | None = None,
|
|
170
|
+
) -> MVTResult:
|
|
171
|
+
"""Generate Mapbox Vector Tiles from a tiled dataset.
|
|
172
|
+
|
|
173
|
+
Parameters
|
|
174
|
+
----------
|
|
175
|
+
tile_dir : str
|
|
176
|
+
Dataset directory containing ``parquet_tiles/`` and ``histograms/``.
|
|
177
|
+
zoom : int
|
|
178
|
+
Maximum zoom level.
|
|
179
|
+
threshold : float
|
|
180
|
+
Minimum feature count per tile.
|
|
181
|
+
outdir : str | None
|
|
182
|
+
MVT output directory. Defaults to ``<tile_dir>/mvt/``.
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
MVTResult
|
|
187
|
+
"""
|
|
188
|
+
from pathlib import Path
|
|
189
|
+
from starlet._internal.mvt.generator import BucketMVTGenerator
|
|
190
|
+
|
|
191
|
+
parquet_dir = str(Path(tile_dir) / "parquet_tiles")
|
|
192
|
+
hist_path = str(Path(tile_dir) / "histograms" / "global.npy")
|
|
193
|
+
mvt_outdir = outdir or str(Path(tile_dir) / "mvt")
|
|
194
|
+
|
|
195
|
+
gen = BucketMVTGenerator(
|
|
196
|
+
parquet_dir=parquet_dir,
|
|
197
|
+
hist_path=hist_path,
|
|
198
|
+
outdir=mvt_outdir,
|
|
199
|
+
last_zoom=zoom,
|
|
200
|
+
threshold=threshold,
|
|
201
|
+
)
|
|
202
|
+
gen.run()
|
|
203
|
+
|
|
204
|
+
# Count generated tiles
|
|
205
|
+
mvt_path = Path(mvt_outdir)
|
|
206
|
+
tile_count = len(list(mvt_path.rglob("*.mvt")))
|
|
207
|
+
zoom_levels = sorted(
|
|
208
|
+
int(d.name) for d in mvt_path.iterdir()
|
|
209
|
+
if d.is_dir() and d.name.isdigit()
|
|
210
|
+
) if mvt_path.exists() else []
|
|
211
|
+
|
|
212
|
+
return MVTResult(
|
|
213
|
+
outdir=mvt_outdir,
|
|
214
|
+
zoom_levels=zoom_levels,
|
|
215
|
+
tile_count=tile_count,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def build(
|
|
220
|
+
input: str,
|
|
221
|
+
outdir: str,
|
|
222
|
+
*,
|
|
223
|
+
zoom: int = 7,
|
|
224
|
+
num_tiles: int = 40,
|
|
225
|
+
threshold: float = 100_000,
|
|
226
|
+
**tile_kwargs,
|
|
227
|
+
) -> tuple[TileResult, MVTResult]:
|
|
228
|
+
"""Run the full pipeline: tile then generate MVTs.
|
|
229
|
+
|
|
230
|
+
Parameters
|
|
231
|
+
----------
|
|
232
|
+
input : str
|
|
233
|
+
Path to source GeoParquet or GeoJSON file.
|
|
234
|
+
outdir : str
|
|
235
|
+
Output dataset directory.
|
|
236
|
+
zoom : int
|
|
237
|
+
Maximum zoom level for MVT generation.
|
|
238
|
+
num_tiles : int
|
|
239
|
+
Target number of spatial partitions.
|
|
240
|
+
threshold : float
|
|
241
|
+
Minimum feature count per MVT tile.
|
|
242
|
+
**tile_kwargs
|
|
243
|
+
Additional keyword arguments forwarded to :func:`tile`.
|
|
244
|
+
|
|
245
|
+
Returns
|
|
246
|
+
-------
|
|
247
|
+
tuple[TileResult, MVTResult]
|
|
248
|
+
"""
|
|
249
|
+
tile_result = tile(input=input, outdir=outdir, num_tiles=num_tiles, **tile_kwargs)
|
|
250
|
+
mvt_result = generate_mvt(tile_dir=outdir, zoom=zoom, threshold=threshold)
|
|
251
|
+
return tile_result, mvt_result
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def create_app(data_dir: str, cache_size: int = 256):
|
|
255
|
+
"""Create a Flask tile server application.
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
data_dir : str
|
|
260
|
+
Root directory containing dataset subdirectories.
|
|
261
|
+
cache_size : int
|
|
262
|
+
Number of tiles in the in-memory LRU cache.
|
|
263
|
+
|
|
264
|
+
Returns
|
|
265
|
+
-------
|
|
266
|
+
Flask
|
|
267
|
+
Configured Flask application.
|
|
268
|
+
"""
|
|
269
|
+
from starlet._internal.server.app import create_app as _create_app
|
|
270
|
+
return _create_app(data_dir=data_dir, cache_size=cache_size)
|