ukam-os-builder 0.1.0.dev2__tar.gz → 0.1.0.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/.gitignore +1 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/AGENTS.md +1 -1
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/PKG-INFO +37 -26
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/README.md +36 -25
- ukam_os_builder-0.1.0.dev2/config.yaml → ukam_os_builder-0.1.0.dev3/config.example.yaml +0 -9
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/pyproject.toml +1 -1
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_api.py +80 -9
- ukam_os_builder-0.1.0.dev3/tests/test_cli.py +32 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_settings.py +56 -15
- ukam_os_builder-0.1.0.dev3/tests/test_setup_wizard.py +136 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/__init__.py +1 -1
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/api/api.py +65 -21
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/api/settings.py +51 -23
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/cli.py +10 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/os_builder/pipeline_factory.py +12 -8
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/pipeline.py +4 -2
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/setup_wizard.py +56 -29
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/uv.lock +860 -858
- ukam_os_builder-0.1.0.dev2/ukam_os_builder/data_sources/abp/to_flatfile.py +0 -677
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/.env.example +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/.github/workflows/ci.yml +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/.github/workflows/release-pypi.yml +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/prompt.md +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/shell/test_release_locally.sh +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/README.md +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/add_gb_builtaddress.csv +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/add_gb_builtaddress_altadd.csv +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/add_gb_historicaddress.csv +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/add_gb_prebuildaddress.csv +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/add_gb_royalmailaddress.csv +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_cli_errors.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_extract_source_filtering.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_inspect_results.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_public_api_integration.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_smoke.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/_exceptions.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/api/cli_errors.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/schemas/abp_schema.yaml +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/split_raw.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/__init__.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/common.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/runner.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/__init__.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/business.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/combine.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/lpi.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/misc.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/postal.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/ngd/to_flatfile.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/os_builder/__init__.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/os_builder/extract.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/os_builder/inspect_results.py +0 -0
- {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/os_builder/os_hub.py +0 -0
|
@@ -7,7 +7,7 @@ This project transforms NGD (National Geographic Database) data into a clean fla
|
|
|
7
7
|
## Repository Structure
|
|
8
8
|
|
|
9
9
|
```
|
|
10
|
-
├── config.yaml
|
|
10
|
+
├── config.example.yaml # Pipeline configuration template (copy to config.yaml)
|
|
11
11
|
├── script.py # Main entry point
|
|
12
12
|
├── pyproject.toml # Project metadata and dependencies
|
|
13
13
|
├── README.md # User documentation
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ukam-os-builder
|
|
3
|
-
Version: 0.1.0.
|
|
3
|
+
Version: 0.1.0.dev3
|
|
4
4
|
Summary: Download, process and transform OS address data (NGD or ABP) for UK address matching
|
|
5
5
|
Project-URL: Homepage, https://github.com/moj-analytical-services/prepare_ngd_for_address_matching
|
|
6
6
|
Project-URL: Repository, https://github.com/moj-analytical-services/prepare_ngd_for_address_matching
|
|
@@ -140,7 +140,13 @@ result = inspect_flatfile_variants(config_path="config.yaml", top_offset=0, show
|
|
|
140
140
|
<summary>Configure manually</summary>
|
|
141
141
|
|
|
142
142
|
If you prefer not to use the setup wizard, edit `config.yaml` directly.
|
|
143
|
-
Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id
|
|
143
|
+
Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id`.
|
|
144
|
+
|
|
145
|
+
Most users only need one path setting:
|
|
146
|
+
|
|
147
|
+
- `paths.work_dir` (default `./data`, relative to the config file directory)
|
|
148
|
+
|
|
149
|
+
The tool derives all other directories automatically under `work_dir`.
|
|
144
150
|
|
|
145
151
|
</details>
|
|
146
152
|
|
|
@@ -153,7 +159,7 @@ Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id`, the
|
|
|
153
159
|
|
|
154
160
|
### Command notes
|
|
155
161
|
|
|
156
|
-
-
|
|
162
|
+
- `step` only supports `download` and `all` to simplify usage. Use `--overwrite` to re-run a step with the same parameters.
|
|
157
163
|
- CLI overrides take precedence over values in `config.yaml`.
|
|
158
164
|
- By default, `ukam-os-build` loads `.env` from the same directory as your config, unless `--env-file` is supplied.
|
|
159
165
|
|
|
@@ -294,25 +300,6 @@ When the same UPRN and address combination appears in multiple sources, records
|
|
|
294
300
|
4. Historic
|
|
295
301
|
5. Demolished
|
|
296
302
|
|
|
297
|
-
## Manual Download
|
|
298
|
-
|
|
299
|
-
If you prefer to download manually:
|
|
300
|
-
- Sign in to https://osdatahub.os.uk/
|
|
301
|
-
- Create a datapackage with NGD address features
|
|
302
|
-
- Download the zip file
|
|
303
|
-
|
|
304
|
-
To run the pipeline from a manual download:
|
|
305
|
-
|
|
306
|
-
1. Place the zip in the downloads directory configured in `config.yaml`
|
|
307
|
-
- By default this is `data/downloads/`
|
|
308
|
-
- The extract step looks for `*.zip` files in this folder
|
|
309
|
-
|
|
310
|
-
2. Run the pipeline starting from extract:
|
|
311
|
-
|
|
312
|
-
```bash
|
|
313
|
-
ukam-os-build --config config.yaml --step extract
|
|
314
|
-
ukam-os-build --config config.yaml --step flatfile
|
|
315
|
-
```
|
|
316
303
|
|
|
317
304
|
## OS Downloads API
|
|
318
305
|
|
|
@@ -348,10 +335,6 @@ source:
|
|
|
348
335
|
|
|
349
336
|
paths:
|
|
350
337
|
work_dir: ./data
|
|
351
|
-
downloads_dir: ./data/downloads
|
|
352
|
-
extracted_dir: ./data/extracted
|
|
353
|
-
parquet_dir: ./data/parquet
|
|
354
|
-
output_dir: ./data/output
|
|
355
338
|
|
|
356
339
|
os_downloads:
|
|
357
340
|
package_id: "<your_package_id>"
|
|
@@ -366,6 +349,34 @@ processing:
|
|
|
366
349
|
# duckdb_memory_limit: "8GB"
|
|
367
350
|
```
|
|
368
351
|
|
|
352
|
+
By default, the tool creates these directories under `paths.work_dir`:
|
|
353
|
+
|
|
354
|
+
- downloads: `<work_dir>/downloads`
|
|
355
|
+
- extracted: `<work_dir>/extracted`
|
|
356
|
+
- parquet: `<work_dir>/parquet`
|
|
357
|
+
- output: `<work_dir>/output`
|
|
358
|
+
|
|
359
|
+
<details>
|
|
360
|
+
<summary>Advanced: override default directories</summary>
|
|
361
|
+
|
|
362
|
+
Most users won’t need this.
|
|
363
|
+
|
|
364
|
+
If you need to customize locations, use `paths.overrides`:
|
|
365
|
+
|
|
366
|
+
```yaml
|
|
367
|
+
paths:
|
|
368
|
+
work_dir: ./data
|
|
369
|
+
overrides:
|
|
370
|
+
downloads_dir: ./somewhere/downloads
|
|
371
|
+
extracted_dir: /mnt/fast/extracted
|
|
372
|
+
parquet_dir: ./data/parquet
|
|
373
|
+
output_dir: ./output
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
Override keys replace derived defaults. Relative paths are resolved relative to the directory containing `config.yaml`.
|
|
377
|
+
|
|
378
|
+
</details>
|
|
379
|
+
|
|
369
380
|
## Smoke test
|
|
370
381
|
|
|
371
382
|
```bash
|
|
@@ -114,7 +114,13 @@ result = inspect_flatfile_variants(config_path="config.yaml", top_offset=0, show
|
|
|
114
114
|
<summary>Configure manually</summary>
|
|
115
115
|
|
|
116
116
|
If you prefer not to use the setup wizard, edit `config.yaml` directly.
|
|
117
|
-
Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id
|
|
117
|
+
Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id`.
|
|
118
|
+
|
|
119
|
+
Most users only need one path setting:
|
|
120
|
+
|
|
121
|
+
- `paths.work_dir` (default `./data`, relative to the config file directory)
|
|
122
|
+
|
|
123
|
+
The tool derives all other directories automatically under `work_dir`.
|
|
118
124
|
|
|
119
125
|
</details>
|
|
120
126
|
|
|
@@ -127,7 +133,7 @@ Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id`, the
|
|
|
127
133
|
|
|
128
134
|
### Command notes
|
|
129
135
|
|
|
130
|
-
-
|
|
136
|
+
- `step` only supports `download` and `all` to simplify usage. Use `--overwrite` to re-run a step with the same parameters.
|
|
131
137
|
- CLI overrides take precedence over values in `config.yaml`.
|
|
132
138
|
- By default, `ukam-os-build` loads `.env` from the same directory as your config, unless `--env-file` is supplied.
|
|
133
139
|
|
|
@@ -268,25 +274,6 @@ When the same UPRN and address combination appears in multiple sources, records
|
|
|
268
274
|
4. Historic
|
|
269
275
|
5. Demolished
|
|
270
276
|
|
|
271
|
-
## Manual Download
|
|
272
|
-
|
|
273
|
-
If you prefer to download manually:
|
|
274
|
-
- Sign in to https://osdatahub.os.uk/
|
|
275
|
-
- Create a datapackage with NGD address features
|
|
276
|
-
- Download the zip file
|
|
277
|
-
|
|
278
|
-
To run the pipeline from a manual download:
|
|
279
|
-
|
|
280
|
-
1. Place the zip in the downloads directory configured in `config.yaml`
|
|
281
|
-
- By default this is `data/downloads/`
|
|
282
|
-
- The extract step looks for `*.zip` files in this folder
|
|
283
|
-
|
|
284
|
-
2. Run the pipeline starting from extract:
|
|
285
|
-
|
|
286
|
-
```bash
|
|
287
|
-
ukam-os-build --config config.yaml --step extract
|
|
288
|
-
ukam-os-build --config config.yaml --step flatfile
|
|
289
|
-
```
|
|
290
277
|
|
|
291
278
|
## OS Downloads API
|
|
292
279
|
|
|
@@ -322,10 +309,6 @@ source:
|
|
|
322
309
|
|
|
323
310
|
paths:
|
|
324
311
|
work_dir: ./data
|
|
325
|
-
downloads_dir: ./data/downloads
|
|
326
|
-
extracted_dir: ./data/extracted
|
|
327
|
-
parquet_dir: ./data/parquet
|
|
328
|
-
output_dir: ./data/output
|
|
329
312
|
|
|
330
313
|
os_downloads:
|
|
331
314
|
package_id: "<your_package_id>"
|
|
@@ -340,6 +323,34 @@ processing:
|
|
|
340
323
|
# duckdb_memory_limit: "8GB"
|
|
341
324
|
```
|
|
342
325
|
|
|
326
|
+
By default, the tool creates these directories under `paths.work_dir`:
|
|
327
|
+
|
|
328
|
+
- downloads: `<work_dir>/downloads`
|
|
329
|
+
- extracted: `<work_dir>/extracted`
|
|
330
|
+
- parquet: `<work_dir>/parquet`
|
|
331
|
+
- output: `<work_dir>/output`
|
|
332
|
+
|
|
333
|
+
<details>
|
|
334
|
+
<summary>Advanced: override default directories</summary>
|
|
335
|
+
|
|
336
|
+
Most users won’t need this.
|
|
337
|
+
|
|
338
|
+
If you need to customize locations, use `paths.overrides`:
|
|
339
|
+
|
|
340
|
+
```yaml
|
|
341
|
+
paths:
|
|
342
|
+
work_dir: ./data
|
|
343
|
+
overrides:
|
|
344
|
+
downloads_dir: ./somewhere/downloads
|
|
345
|
+
extracted_dir: /mnt/fast/extracted
|
|
346
|
+
parquet_dir: ./data/parquet
|
|
347
|
+
output_dir: ./output
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
Override keys replace derived defaults. Relative paths are resolved relative to the directory containing `config.yaml`.
|
|
351
|
+
|
|
352
|
+
</details>
|
|
353
|
+
|
|
343
354
|
## Smoke test
|
|
344
355
|
|
|
345
356
|
```bash
|
|
@@ -5,15 +5,6 @@ paths:
|
|
|
5
5
|
# Base working directory for all data
|
|
6
6
|
work_dir: ./data
|
|
7
7
|
|
|
8
|
-
# Downloaded zip files from OS
|
|
9
|
-
downloads_dir: ./data/downloads
|
|
10
|
-
|
|
11
|
-
# Extracted CSV files and intermediate parquet
|
|
12
|
-
extracted_dir: ./data/extracted
|
|
13
|
-
|
|
14
|
-
# Final output parquet files
|
|
15
|
-
output_dir: ./data/output
|
|
16
|
-
|
|
17
8
|
# OS Data Hub download settings
|
|
18
9
|
# Given a datapackage at: https://osdatahub.os.uk/data/downloads/data-packages/16331
|
|
19
10
|
# You can get versions from:
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import os
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from textwrap import dedent
|
|
6
|
+
from typing import Literal
|
|
5
7
|
|
|
6
8
|
import pytest
|
|
7
9
|
|
|
@@ -39,6 +41,37 @@ def test_create_config_and_env_writes_expected_files(tmp_path: Path) -> None:
|
|
|
39
41
|
assert "OS_PROJECT_API_SECRET=your_api_secret_here" in env_text
|
|
40
42
|
|
|
41
43
|
|
|
44
|
+
def test_create_config_and_env_writes_supplied_api_credentials(tmp_path: Path) -> None:
|
|
45
|
+
config_path = tmp_path / "config.yaml"
|
|
46
|
+
env_path = tmp_path / ".env"
|
|
47
|
+
|
|
48
|
+
create_config_and_env(
|
|
49
|
+
config_out=config_path,
|
|
50
|
+
env_out=env_path,
|
|
51
|
+
source="ngd",
|
|
52
|
+
package_id="16331",
|
|
53
|
+
version_id="104444",
|
|
54
|
+
api_key="my-key",
|
|
55
|
+
api_secret="my-secret",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
env_text = env_path.read_text()
|
|
59
|
+
assert "OS_PROJECT_API_KEY=my-key" in env_text
|
|
60
|
+
assert "OS_PROJECT_API_SECRET=my-secret" in env_text
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_create_config_and_env_rejects_partial_api_credentials(tmp_path: Path) -> None:
|
|
64
|
+
with pytest.raises(ValueError, match="must be provided together"):
|
|
65
|
+
create_config_and_env(
|
|
66
|
+
config_out=tmp_path / "config.yaml",
|
|
67
|
+
env_out=tmp_path / ".env",
|
|
68
|
+
source="ngd",
|
|
69
|
+
package_id="16331",
|
|
70
|
+
version_id="104444",
|
|
71
|
+
api_key="my-key",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
42
75
|
def test_run_from_config_applies_overrides(
|
|
43
76
|
monkeypatch: pytest.MonkeyPatch,
|
|
44
77
|
tmp_path: Path,
|
|
@@ -52,9 +85,6 @@ def test_run_from_config_applies_overrides(
|
|
|
52
85
|
"""
|
|
53
86
|
paths:
|
|
54
87
|
work_dir: ./data
|
|
55
|
-
downloads_dir: ./data/downloads
|
|
56
|
-
extracted_dir: ./data/extracted
|
|
57
|
-
output_dir: ./data/output
|
|
58
88
|
|
|
59
89
|
os_downloads:
|
|
60
90
|
package_id: "16465"
|
|
@@ -70,7 +100,9 @@ def test_run_from_config_applies_overrides(
|
|
|
70
100
|
def fake_check_api(_settings: object) -> None:
|
|
71
101
|
calls["checked_api"] = True
|
|
72
102
|
|
|
73
|
-
def fake_run_pipeline(
|
|
103
|
+
def fake_run_pipeline(
|
|
104
|
+
step: Literal["all", "download"], settings: object, force: bool, list_only: bool
|
|
105
|
+
) -> None:
|
|
74
106
|
calls["step"] = step
|
|
75
107
|
calls["force"] = force
|
|
76
108
|
calls["list_only"] = list_only
|
|
@@ -94,6 +126,47 @@ def test_run_from_config_applies_overrides(
|
|
|
94
126
|
assert calls["num_chunks"] == 5
|
|
95
127
|
|
|
96
128
|
|
|
129
|
+
def test_run_from_config_accepts_api_key_secret_overrides(
|
|
130
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
131
|
+
tmp_path: Path,
|
|
132
|
+
) -> None:
|
|
133
|
+
monkeypatch.delenv("OS_PROJECT_API_KEY", raising=False)
|
|
134
|
+
monkeypatch.delenv("OS_PROJECT_API_SECRET", raising=False)
|
|
135
|
+
|
|
136
|
+
config_path = tmp_path / "config.yaml"
|
|
137
|
+
_write_config(
|
|
138
|
+
config_path,
|
|
139
|
+
"""
|
|
140
|
+
source:
|
|
141
|
+
type: ngd
|
|
142
|
+
|
|
143
|
+
os_downloads:
|
|
144
|
+
package_id: "16465"
|
|
145
|
+
version_id: "104444"
|
|
146
|
+
""",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
monkeypatch.setattr("ukam_os_builder.api.api.get_package_version", lambda _settings: None)
|
|
150
|
+
monkeypatch.setattr("ukam_os_builder.api.api.run_pipeline", lambda **_kwargs: None)
|
|
151
|
+
|
|
152
|
+
run_from_config(
|
|
153
|
+
config_path=config_path,
|
|
154
|
+
api_key="runtime-key",
|
|
155
|
+
api_secret="runtime-secret",
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
assert os.environ["OS_PROJECT_API_KEY"] == "runtime-key"
|
|
159
|
+
assert os.environ["OS_PROJECT_API_SECRET"] == "runtime-secret"
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def test_run_from_config_rejects_partial_api_credentials(tmp_path: Path) -> None:
|
|
163
|
+
with pytest.raises(ValueError, match="must be provided together"):
|
|
164
|
+
run_from_config(
|
|
165
|
+
config_path=tmp_path / "config.yaml",
|
|
166
|
+
api_key="runtime-key",
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
|
|
97
170
|
def test_run_from_config_validates_list_only_step(tmp_path: Path) -> None:
|
|
98
171
|
with pytest.raises(ValueError, match="--list-only can only be used"):
|
|
99
172
|
run_from_config(config_path=tmp_path / "config.yaml", step="extract", list_only=True)
|
|
@@ -126,7 +199,9 @@ def test_run_from_config_uses_source_override_for_pipeline_validation(
|
|
|
126
199
|
|
|
127
200
|
monkeypatch.setattr("ukam_os_builder.api.api.get_package_version", lambda _settings: None)
|
|
128
201
|
|
|
129
|
-
def fake_run_pipeline(
|
|
202
|
+
def fake_run_pipeline(
|
|
203
|
+
step: Literal["all", "download"], settings: object, force: bool, list_only: bool
|
|
204
|
+
) -> None:
|
|
130
205
|
calls["step"] = step
|
|
131
206
|
calls["source"] = settings.source.type
|
|
132
207
|
calls["force"] = force
|
|
@@ -195,10 +270,6 @@ def test_run_from_config_applies_schema_path_override(
|
|
|
195
270
|
|
|
196
271
|
paths:
|
|
197
272
|
work_dir: ./data
|
|
198
|
-
downloads_dir: ./data/downloads
|
|
199
|
-
extracted_dir: ./data/extracted
|
|
200
|
-
output_dir: ./data/output
|
|
201
|
-
parquet_dir: ./data/parquet
|
|
202
273
|
|
|
203
274
|
os_downloads:
|
|
204
275
|
package_id: "16465"
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from ukam_os_builder import cli
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_build_cli_passes_api_credentials_to_run_from_config(monkeypatch) -> None:
|
|
7
|
+
captured: dict[str, object] = {}
|
|
8
|
+
|
|
9
|
+
def fake_run_from_config(**kwargs):
|
|
10
|
+
captured.update(kwargs)
|
|
11
|
+
return None
|
|
12
|
+
|
|
13
|
+
monkeypatch.setattr(cli, "run_from_config", fake_run_from_config)
|
|
14
|
+
monkeypatch.setattr(cli, "_configure_logging", lambda _verbose: None)
|
|
15
|
+
|
|
16
|
+
exit_code = cli.main(
|
|
17
|
+
[
|
|
18
|
+
"--config",
|
|
19
|
+
"config.yaml",
|
|
20
|
+
"--step",
|
|
21
|
+
"download",
|
|
22
|
+
"--list-only",
|
|
23
|
+
"--api-key",
|
|
24
|
+
"runtime-key",
|
|
25
|
+
"--api-secret",
|
|
26
|
+
"runtime-secret",
|
|
27
|
+
]
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
assert exit_code == 0
|
|
31
|
+
assert captured["api_key"] == "runtime-key"
|
|
32
|
+
assert captured["api_secret"] == "runtime-secret"
|
|
@@ -25,9 +25,6 @@ def test_load_settings_resolves_paths_relative_to_config(
|
|
|
25
25
|
"""
|
|
26
26
|
paths:
|
|
27
27
|
work_dir: ./data
|
|
28
|
-
downloads_dir: ./data/downloads
|
|
29
|
-
extracted_dir: ./data/extracted
|
|
30
|
-
output_dir: ./data/output
|
|
31
28
|
|
|
32
29
|
os_downloads:
|
|
33
30
|
package_id: "16465"
|
|
@@ -57,9 +54,6 @@ def test_load_settings_rejects_unknown_config_key(
|
|
|
57
54
|
"""
|
|
58
55
|
paths:
|
|
59
56
|
work_dir: ./data
|
|
60
|
-
downloads_dir: ./data/downloads
|
|
61
|
-
extracted_dir: ./data/extracted
|
|
62
|
-
output_dir: ./data/output
|
|
63
57
|
|
|
64
58
|
os_downloads:
|
|
65
59
|
package_id: "16465"
|
|
@@ -87,9 +81,6 @@ def test_load_settings_missing_package_id_has_clear_message(
|
|
|
87
81
|
"""
|
|
88
82
|
paths:
|
|
89
83
|
work_dir: ./data
|
|
90
|
-
downloads_dir: ./data/downloads
|
|
91
|
-
extracted_dir: ./data/extracted
|
|
92
|
-
output_dir: ./data/output
|
|
93
84
|
|
|
94
85
|
os_downloads:
|
|
95
86
|
version_id: "104444"
|
|
@@ -141,9 +132,6 @@ def test_load_settings_requires_env_vars(tmp_path: Path, monkeypatch: pytest.Mon
|
|
|
141
132
|
"""
|
|
142
133
|
paths:
|
|
143
134
|
work_dir: ./data
|
|
144
|
-
downloads_dir: ./data/downloads
|
|
145
|
-
extracted_dir: ./data/extracted
|
|
146
|
-
output_dir: ./data/output
|
|
147
135
|
|
|
148
136
|
os_downloads:
|
|
149
137
|
package_id: "16465"
|
|
@@ -167,9 +155,6 @@ def test_load_settings_validates_positive_read_timeout(
|
|
|
167
155
|
"""
|
|
168
156
|
paths:
|
|
169
157
|
work_dir: ./data
|
|
170
|
-
downloads_dir: ./data/downloads
|
|
171
|
-
extracted_dir: ./data/extracted
|
|
172
|
-
output_dir: ./data/output
|
|
173
158
|
|
|
174
159
|
os_downloads:
|
|
175
160
|
package_id: "16465"
|
|
@@ -205,3 +190,59 @@ def test_load_settings_defaults_source_and_num_chunks(
|
|
|
205
190
|
|
|
206
191
|
assert settings.source.type == "ngd"
|
|
207
192
|
assert settings.processing.num_chunks == 20
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def test_load_settings_applies_path_overrides(
|
|
196
|
+
monkeypatch: pytest.MonkeyPatch, tmp_path: Path
|
|
197
|
+
) -> None:
|
|
198
|
+
monkeypatch.setenv("OS_PROJECT_API_KEY", "key")
|
|
199
|
+
monkeypatch.setenv("OS_PROJECT_API_SECRET", "secret")
|
|
200
|
+
|
|
201
|
+
config_path = tmp_path / "config.yaml"
|
|
202
|
+
_write_config(
|
|
203
|
+
config_path,
|
|
204
|
+
"""
|
|
205
|
+
paths:
|
|
206
|
+
work_dir: ./data
|
|
207
|
+
overrides:
|
|
208
|
+
downloads_dir: ./custom/downloads
|
|
209
|
+
extracted_dir: /tmp/extracted
|
|
210
|
+
|
|
211
|
+
os_downloads:
|
|
212
|
+
package_id: "16465"
|
|
213
|
+
version_id: "104444"
|
|
214
|
+
""",
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
settings = load_settings(config_path, load_env=False)
|
|
218
|
+
|
|
219
|
+
assert settings.paths.work_dir == (tmp_path / "data").resolve()
|
|
220
|
+
assert settings.paths.downloads_dir == (tmp_path / "custom/downloads").resolve()
|
|
221
|
+
assert str(settings.paths.extracted_dir).endswith("/tmp/extracted")
|
|
222
|
+
assert settings.paths.parquet_dir == (tmp_path / "data/parquet").resolve()
|
|
223
|
+
assert settings.paths.output_dir == (tmp_path / "data/output").resolve()
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def test_load_settings_rejects_legacy_path_keys(
|
|
227
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
228
|
+
tmp_path: Path,
|
|
229
|
+
) -> None:
|
|
230
|
+
monkeypatch.setenv("OS_PROJECT_API_KEY", "key")
|
|
231
|
+
monkeypatch.setenv("OS_PROJECT_API_SECRET", "secret")
|
|
232
|
+
|
|
233
|
+
config_path = tmp_path / "config.yaml"
|
|
234
|
+
_write_config(
|
|
235
|
+
config_path,
|
|
236
|
+
"""
|
|
237
|
+
paths:
|
|
238
|
+
work_dir: ./data
|
|
239
|
+
downloads_dir: ./legacy/downloads
|
|
240
|
+
|
|
241
|
+
os_downloads:
|
|
242
|
+
package_id: "16465"
|
|
243
|
+
version_id: "104444"
|
|
244
|
+
""",
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
with pytest.raises(SettingsError, match="no longer supported"):
|
|
248
|
+
load_settings(config_path, load_env=False)
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from ukam_os_builder import setup_wizard
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _input_feeder(values: list[str]):
|
|
11
|
+
iterator = iter(values)
|
|
12
|
+
|
|
13
|
+
def _fake_input(_prompt: str, markup: bool = False) -> str: # noqa: ARG001
|
|
14
|
+
return next(iterator)
|
|
15
|
+
|
|
16
|
+
return _fake_input
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_setup_wizard_prompts_for_env_credentials_and_overwrites_existing(
|
|
20
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
21
|
+
tmp_path: Path,
|
|
22
|
+
) -> None:
|
|
23
|
+
config_path = tmp_path / "config.yaml"
|
|
24
|
+
env_path = tmp_path / ".env"
|
|
25
|
+
env_path.write_text("OS_PROJECT_API_KEY=old\nOS_PROJECT_API_SECRET=old\n", encoding="utf-8")
|
|
26
|
+
|
|
27
|
+
monkeypatch.setattr(
|
|
28
|
+
setup_wizard.console,
|
|
29
|
+
"input",
|
|
30
|
+
_input_feeder(
|
|
31
|
+
[
|
|
32
|
+
"", # source (default)
|
|
33
|
+
"pkg-1",
|
|
34
|
+
"ver-1",
|
|
35
|
+
"",
|
|
36
|
+
"n", # advanced settings
|
|
37
|
+
"y", # setup .env now
|
|
38
|
+
"y", # overwrite existing .env
|
|
39
|
+
"new-key",
|
|
40
|
+
"new-secret",
|
|
41
|
+
]
|
|
42
|
+
),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
captured: dict[str, object] = {}
|
|
46
|
+
|
|
47
|
+
def fake_write_config_and_env(**kwargs):
|
|
48
|
+
captured.update(kwargs)
|
|
49
|
+
return Path(kwargs["config_out"]).resolve(), Path(kwargs["env_out"]).resolve(), True
|
|
50
|
+
|
|
51
|
+
monkeypatch.setattr(setup_wizard, "write_config_and_env", fake_write_config_and_env)
|
|
52
|
+
|
|
53
|
+
exit_code = setup_wizard.main(["--config-out", str(config_path), "--env-out", str(env_path)])
|
|
54
|
+
|
|
55
|
+
assert exit_code == 0
|
|
56
|
+
assert captured["write_env"] is True
|
|
57
|
+
assert captured["overwrite_env"] is True
|
|
58
|
+
assert captured["api_key"] == "new-key"
|
|
59
|
+
assert captured["api_secret"] == "new-secret"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_setup_wizard_skips_env_update_when_user_declines(
|
|
63
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
64
|
+
tmp_path: Path,
|
|
65
|
+
) -> None:
|
|
66
|
+
config_path = tmp_path / "config.yaml"
|
|
67
|
+
env_path = tmp_path / ".env"
|
|
68
|
+
|
|
69
|
+
monkeypatch.setattr(
|
|
70
|
+
setup_wizard.console,
|
|
71
|
+
"input",
|
|
72
|
+
_input_feeder(
|
|
73
|
+
[
|
|
74
|
+
"", # source (default)
|
|
75
|
+
"pkg-1",
|
|
76
|
+
"ver-1",
|
|
77
|
+
"",
|
|
78
|
+
"n", # advanced settings
|
|
79
|
+
"n", # setup .env now
|
|
80
|
+
]
|
|
81
|
+
),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
captured: dict[str, object] = {}
|
|
85
|
+
|
|
86
|
+
def fake_write_config_and_env(**kwargs):
|
|
87
|
+
captured.update(kwargs)
|
|
88
|
+
return Path(kwargs["config_out"]).resolve(), Path(kwargs["env_out"]).resolve(), False
|
|
89
|
+
|
|
90
|
+
monkeypatch.setattr(setup_wizard, "write_config_and_env", fake_write_config_and_env)
|
|
91
|
+
|
|
92
|
+
exit_code = setup_wizard.main(["--config-out", str(config_path), "--env-out", str(env_path)])
|
|
93
|
+
|
|
94
|
+
assert exit_code == 0
|
|
95
|
+
assert captured["write_env"] is False
|
|
96
|
+
assert captured["api_key"] is None
|
|
97
|
+
assert captured["api_secret"] is None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_setup_wizard_decline_overwrite_keeps_existing_env(
|
|
101
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
102
|
+
tmp_path: Path,
|
|
103
|
+
) -> None:
|
|
104
|
+
config_path = tmp_path / "config.yaml"
|
|
105
|
+
env_path = tmp_path / ".env"
|
|
106
|
+
env_path.write_text("OS_PROJECT_API_KEY=old\nOS_PROJECT_API_SECRET=old\n", encoding="utf-8")
|
|
107
|
+
|
|
108
|
+
monkeypatch.setattr(
|
|
109
|
+
setup_wizard.console,
|
|
110
|
+
"input",
|
|
111
|
+
_input_feeder(
|
|
112
|
+
[
|
|
113
|
+
"", # source (default)
|
|
114
|
+
"pkg-1",
|
|
115
|
+
"ver-1",
|
|
116
|
+
"",
|
|
117
|
+
"n", # advanced settings
|
|
118
|
+
"y", # setup .env now
|
|
119
|
+
"n", # do not overwrite existing .env
|
|
120
|
+
]
|
|
121
|
+
),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
captured: dict[str, object] = {}
|
|
125
|
+
|
|
126
|
+
def fake_write_config_and_env(**kwargs):
|
|
127
|
+
captured.update(kwargs)
|
|
128
|
+
return Path(kwargs["config_out"]).resolve(), Path(kwargs["env_out"]).resolve(), False
|
|
129
|
+
|
|
130
|
+
monkeypatch.setattr(setup_wizard, "write_config_and_env", fake_write_config_and_env)
|
|
131
|
+
|
|
132
|
+
exit_code = setup_wizard.main(["--config-out", str(config_path), "--env-out", str(env_path)])
|
|
133
|
+
|
|
134
|
+
assert exit_code == 0
|
|
135
|
+
assert captured["write_env"] is False
|
|
136
|
+
assert captured["overwrite_env"] is False
|