ukam-os-builder 0.1.0.dev2__tar.gz → 0.1.0.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/.gitignore +1 -0
  2. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/AGENTS.md +1 -1
  3. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/PKG-INFO +37 -26
  4. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/README.md +36 -25
  5. ukam_os_builder-0.1.0.dev2/config.yaml → ukam_os_builder-0.1.0.dev3/config.example.yaml +0 -9
  6. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/pyproject.toml +1 -1
  7. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_api.py +80 -9
  8. ukam_os_builder-0.1.0.dev3/tests/test_cli.py +32 -0
  9. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_settings.py +56 -15
  10. ukam_os_builder-0.1.0.dev3/tests/test_setup_wizard.py +136 -0
  11. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/__init__.py +1 -1
  12. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/api/api.py +65 -21
  13. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/api/settings.py +51 -23
  14. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/cli.py +10 -0
  15. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/os_builder/pipeline_factory.py +12 -8
  16. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/pipeline.py +4 -2
  17. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/setup_wizard.py +56 -29
  18. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/uv.lock +860 -858
  19. ukam_os_builder-0.1.0.dev2/ukam_os_builder/data_sources/abp/to_flatfile.py +0 -677
  20. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/.env.example +0 -0
  21. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/.github/workflows/ci.yml +0 -0
  22. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/.github/workflows/release-pypi.yml +0 -0
  23. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/prompt.md +0 -0
  24. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/shell/test_release_locally.sh +0 -0
  25. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/README.md +0 -0
  26. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/add_gb_builtaddress.csv +0 -0
  27. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/add_gb_builtaddress_altadd.csv +0 -0
  28. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/add_gb_historicaddress.csv +0 -0
  29. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/add_gb_prebuildaddress.csv +0 -0
  30. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/data/add_gb_royalmailaddress.csv +0 -0
  31. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_cli_errors.py +0 -0
  32. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_extract_source_filtering.py +0 -0
  33. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_inspect_results.py +0 -0
  34. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_public_api_integration.py +0 -0
  35. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/tests/test_smoke.py +0 -0
  36. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/_exceptions.py +0 -0
  37. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/api/cli_errors.py +0 -0
  38. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/schemas/abp_schema.yaml +0 -0
  39. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/split_raw.py +0 -0
  40. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/__init__.py +0 -0
  41. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/common.py +0 -0
  42. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/runner.py +0 -0
  43. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/__init__.py +0 -0
  44. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/business.py +0 -0
  45. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/combine.py +0 -0
  46. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/lpi.py +0 -0
  47. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/misc.py +0 -0
  48. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/abp/transform/stages/postal.py +0 -0
  49. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/data_sources/ngd/to_flatfile.py +0 -0
  50. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/os_builder/__init__.py +0 -0
  51. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/os_builder/extract.py +0 -0
  52. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/os_builder/inspect_results.py +0 -0
  53. {ukam_os_builder-0.1.0.dev2 → ukam_os_builder-0.1.0.dev3}/ukam_os_builder/os_builder/os_hub.py +0 -0
@@ -5,6 +5,7 @@ data/
5
5
  !tests/data/**
6
6
  scripts/os_docs.md
7
7
  .env
8
+ config.yaml
8
9
  # Byte-compiled / optimized / DLL files
9
10
  __pycache__/
10
11
  *.py[codz]
@@ -7,7 +7,7 @@ This project transforms NGD (National Geographic Database) data into a clean fla
7
7
  ## Repository Structure
8
8
 
9
9
  ```
10
- ├── config.yaml # Pipeline configuration
10
+ ├── config.example.yaml # Pipeline configuration template (copy to config.yaml)
11
11
  ├── script.py # Main entry point
12
12
  ├── pyproject.toml # Project metadata and dependencies
13
13
  ├── README.md # User documentation
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ukam-os-builder
3
- Version: 0.1.0.dev2
3
+ Version: 0.1.0.dev3
4
4
  Summary: Download, process and transform OS address data (NGD or ABP) for UK address matching
5
5
  Project-URL: Homepage, https://github.com/moj-analytical-services/prepare_ngd_for_address_matching
6
6
  Project-URL: Repository, https://github.com/moj-analytical-services/prepare_ngd_for_address_matching
@@ -140,7 +140,13 @@ result = inspect_flatfile_variants(config_path="config.yaml", top_offset=0, show
140
140
  <summary>Configure manually</summary>
141
141
 
142
142
  If you prefer not to use the setup wizard, edit `config.yaml` directly.
143
- Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id`, then adjust `paths` and `processing` as needed.
143
+ Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id`.
144
+
145
+ Most users only need one path setting:
146
+
147
+ - `paths.work_dir` (default `./data`, relative to the config file directory)
148
+
149
+ The tool derives all other directories automatically under `work_dir`.
144
150
 
145
151
  </details>
146
152
 
@@ -153,7 +159,7 @@ Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id`, the
153
159
 
154
160
  ### Command notes
155
161
 
156
- - `--list-only` is only valid with `--step download` or `--step all`.
162
+ - `step` only supports `download` and `all` to simplify usage. Use `--overwrite` to re-run a step with the same parameters.
157
163
  - CLI overrides take precedence over values in `config.yaml`.
158
164
  - By default, `ukam-os-build` loads `.env` from the same directory as your config, unless `--env-file` is supplied.
159
165
 
@@ -294,25 +300,6 @@ When the same UPRN and address combination appears in multiple sources, records
294
300
  4. Historic
295
301
  5. Demolished
296
302
 
297
- ## Manual Download
298
-
299
- If you prefer to download manually:
300
- - Sign in to https://osdatahub.os.uk/
301
- - Create a datapackage with NGD address features
302
- - Download the zip file
303
-
304
- To run the pipeline from a manual download:
305
-
306
- 1. Place the zip in the downloads directory configured in `config.yaml`
307
- - By default this is `data/downloads/`
308
- - The extract step looks for `*.zip` files in this folder
309
-
310
- 2. Run the pipeline starting from extract:
311
-
312
- ```bash
313
- ukam-os-build --config config.yaml --step extract
314
- ukam-os-build --config config.yaml --step flatfile
315
- ```
316
303
 
317
304
  ## OS Downloads API
318
305
 
@@ -348,10 +335,6 @@ source:
348
335
 
349
336
  paths:
350
337
  work_dir: ./data
351
- downloads_dir: ./data/downloads
352
- extracted_dir: ./data/extracted
353
- parquet_dir: ./data/parquet
354
- output_dir: ./data/output
355
338
 
356
339
  os_downloads:
357
340
  package_id: "<your_package_id>"
@@ -366,6 +349,34 @@ processing:
366
349
  # duckdb_memory_limit: "8GB"
367
350
  ```
368
351
 
352
+ By default, the tool creates these directories under `paths.work_dir`:
353
+
354
+ - downloads: `<work_dir>/downloads`
355
+ - extracted: `<work_dir>/extracted`
356
+ - parquet: `<work_dir>/parquet`
357
+ - output: `<work_dir>/output`
358
+
359
+ <details>
360
+ <summary>Advanced: override default directories</summary>
361
+
362
+ Most users won’t need this.
363
+
364
+ If you need to customize locations, use `paths.overrides`:
365
+
366
+ ```yaml
367
+ paths:
368
+ work_dir: ./data
369
+ overrides:
370
+ downloads_dir: ./somewhere/downloads
371
+ extracted_dir: /mnt/fast/extracted
372
+ parquet_dir: ./data/parquet
373
+ output_dir: ./output
374
+ ```
375
+
376
+ Override keys replace derived defaults. Relative paths are resolved relative to the directory containing `config.yaml`.
377
+
378
+ </details>
379
+
369
380
  ## Smoke test
370
381
 
371
382
  ```bash
@@ -114,7 +114,13 @@ result = inspect_flatfile_variants(config_path="config.yaml", top_offset=0, show
114
114
  <summary>Configure manually</summary>
115
115
 
116
116
  If you prefer not to use the setup wizard, edit `config.yaml` directly.
117
- Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id`, then adjust `paths` and `processing` as needed.
117
+ Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id`.
118
+
119
+ Most users only need one path setting:
120
+
121
+ - `paths.work_dir` (default `./data`, relative to the config file directory)
122
+
123
+ The tool derives all other directories automatically under `work_dir`.
118
124
 
119
125
  </details>
120
126
 
@@ -127,7 +133,7 @@ Set `source.type`, `os_downloads.package_id`, and `os_downloads.version_id`, the
127
133
 
128
134
  ### Command notes
129
135
 
130
- - `--list-only` is only valid with `--step download` or `--step all`.
136
+ - `step` only supports `download` and `all` to simplify usage. Use `--overwrite` to re-run a step with the same parameters.
131
137
  - CLI overrides take precedence over values in `config.yaml`.
132
138
  - By default, `ukam-os-build` loads `.env` from the same directory as your config, unless `--env-file` is supplied.
133
139
 
@@ -268,25 +274,6 @@ When the same UPRN and address combination appears in multiple sources, records
268
274
  4. Historic
269
275
  5. Demolished
270
276
 
271
- ## Manual Download
272
-
273
- If you prefer to download manually:
274
- - Sign in to https://osdatahub.os.uk/
275
- - Create a datapackage with NGD address features
276
- - Download the zip file
277
-
278
- To run the pipeline from a manual download:
279
-
280
- 1. Place the zip in the downloads directory configured in `config.yaml`
281
- - By default this is `data/downloads/`
282
- - The extract step looks for `*.zip` files in this folder
283
-
284
- 2. Run the pipeline starting from extract:
285
-
286
- ```bash
287
- ukam-os-build --config config.yaml --step extract
288
- ukam-os-build --config config.yaml --step flatfile
289
- ```
290
277
 
291
278
  ## OS Downloads API
292
279
 
@@ -322,10 +309,6 @@ source:
322
309
 
323
310
  paths:
324
311
  work_dir: ./data
325
- downloads_dir: ./data/downloads
326
- extracted_dir: ./data/extracted
327
- parquet_dir: ./data/parquet
328
- output_dir: ./data/output
329
312
 
330
313
  os_downloads:
331
314
  package_id: "<your_package_id>"
@@ -340,6 +323,34 @@ processing:
340
323
  # duckdb_memory_limit: "8GB"
341
324
  ```
342
325
 
326
+ By default, the tool creates these directories under `paths.work_dir`:
327
+
328
+ - downloads: `<work_dir>/downloads`
329
+ - extracted: `<work_dir>/extracted`
330
+ - parquet: `<work_dir>/parquet`
331
+ - output: `<work_dir>/output`
332
+
333
+ <details>
334
+ <summary>Advanced: override default directories</summary>
335
+
336
+ Most users won’t need this.
337
+
338
+ If you need to customize locations, use `paths.overrides`:
339
+
340
+ ```yaml
341
+ paths:
342
+ work_dir: ./data
343
+ overrides:
344
+ downloads_dir: ./somewhere/downloads
345
+ extracted_dir: /mnt/fast/extracted
346
+ parquet_dir: ./data/parquet
347
+ output_dir: ./output
348
+ ```
349
+
350
+ Override keys replace derived defaults. Relative paths are resolved relative to the directory containing `config.yaml`.
351
+
352
+ </details>
353
+
343
354
  ## Smoke test
344
355
 
345
356
  ```bash
@@ -5,15 +5,6 @@ paths:
5
5
  # Base working directory for all data
6
6
  work_dir: ./data
7
7
 
8
- # Downloaded zip files from OS
9
- downloads_dir: ./data/downloads
10
-
11
- # Extracted CSV files and intermediate parquet
12
- extracted_dir: ./data/extracted
13
-
14
- # Final output parquet files
15
- output_dir: ./data/output
16
-
17
8
  # OS Data Hub download settings
18
9
  # Given a datapackage at: https://osdatahub.os.uk/data/downloads/data-packages/16331
19
10
  # You can get versions from:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ukam-os-builder"
3
- version = "0.1.0.dev2"
3
+ version = "0.1.0.dev3"
4
4
  description = "Download, process and transform OS address data (NGD or ABP) for UK address matching"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import os
3
4
  from pathlib import Path
4
5
  from textwrap import dedent
6
+ from typing import Literal
5
7
 
6
8
  import pytest
7
9
 
@@ -39,6 +41,37 @@ def test_create_config_and_env_writes_expected_files(tmp_path: Path) -> None:
39
41
  assert "OS_PROJECT_API_SECRET=your_api_secret_here" in env_text
40
42
 
41
43
 
44
+ def test_create_config_and_env_writes_supplied_api_credentials(tmp_path: Path) -> None:
45
+ config_path = tmp_path / "config.yaml"
46
+ env_path = tmp_path / ".env"
47
+
48
+ create_config_and_env(
49
+ config_out=config_path,
50
+ env_out=env_path,
51
+ source="ngd",
52
+ package_id="16331",
53
+ version_id="104444",
54
+ api_key="my-key",
55
+ api_secret="my-secret",
56
+ )
57
+
58
+ env_text = env_path.read_text()
59
+ assert "OS_PROJECT_API_KEY=my-key" in env_text
60
+ assert "OS_PROJECT_API_SECRET=my-secret" in env_text
61
+
62
+
63
+ def test_create_config_and_env_rejects_partial_api_credentials(tmp_path: Path) -> None:
64
+ with pytest.raises(ValueError, match="must be provided together"):
65
+ create_config_and_env(
66
+ config_out=tmp_path / "config.yaml",
67
+ env_out=tmp_path / ".env",
68
+ source="ngd",
69
+ package_id="16331",
70
+ version_id="104444",
71
+ api_key="my-key",
72
+ )
73
+
74
+
42
75
  def test_run_from_config_applies_overrides(
43
76
  monkeypatch: pytest.MonkeyPatch,
44
77
  tmp_path: Path,
@@ -52,9 +85,6 @@ def test_run_from_config_applies_overrides(
52
85
  """
53
86
  paths:
54
87
  work_dir: ./data
55
- downloads_dir: ./data/downloads
56
- extracted_dir: ./data/extracted
57
- output_dir: ./data/output
58
88
 
59
89
  os_downloads:
60
90
  package_id: "16465"
@@ -70,7 +100,9 @@ def test_run_from_config_applies_overrides(
70
100
  def fake_check_api(_settings: object) -> None:
71
101
  calls["checked_api"] = True
72
102
 
73
- def fake_run_pipeline(step: str, settings: object, force: bool, list_only: bool) -> None:
103
+ def fake_run_pipeline(
104
+ step: Literal["all", "download"], settings: object, force: bool, list_only: bool
105
+ ) -> None:
74
106
  calls["step"] = step
75
107
  calls["force"] = force
76
108
  calls["list_only"] = list_only
@@ -94,6 +126,47 @@ def test_run_from_config_applies_overrides(
94
126
  assert calls["num_chunks"] == 5
95
127
 
96
128
 
129
+ def test_run_from_config_accepts_api_key_secret_overrides(
130
+ monkeypatch: pytest.MonkeyPatch,
131
+ tmp_path: Path,
132
+ ) -> None:
133
+ monkeypatch.delenv("OS_PROJECT_API_KEY", raising=False)
134
+ monkeypatch.delenv("OS_PROJECT_API_SECRET", raising=False)
135
+
136
+ config_path = tmp_path / "config.yaml"
137
+ _write_config(
138
+ config_path,
139
+ """
140
+ source:
141
+ type: ngd
142
+
143
+ os_downloads:
144
+ package_id: "16465"
145
+ version_id: "104444"
146
+ """,
147
+ )
148
+
149
+ monkeypatch.setattr("ukam_os_builder.api.api.get_package_version", lambda _settings: None)
150
+ monkeypatch.setattr("ukam_os_builder.api.api.run_pipeline", lambda **_kwargs: None)
151
+
152
+ run_from_config(
153
+ config_path=config_path,
154
+ api_key="runtime-key",
155
+ api_secret="runtime-secret",
156
+ )
157
+
158
+ assert os.environ["OS_PROJECT_API_KEY"] == "runtime-key"
159
+ assert os.environ["OS_PROJECT_API_SECRET"] == "runtime-secret"
160
+
161
+
162
+ def test_run_from_config_rejects_partial_api_credentials(tmp_path: Path) -> None:
163
+ with pytest.raises(ValueError, match="must be provided together"):
164
+ run_from_config(
165
+ config_path=tmp_path / "config.yaml",
166
+ api_key="runtime-key",
167
+ )
168
+
169
+
97
170
  def test_run_from_config_validates_list_only_step(tmp_path: Path) -> None:
98
171
  with pytest.raises(ValueError, match="--list-only can only be used"):
99
172
  run_from_config(config_path=tmp_path / "config.yaml", step="extract", list_only=True)
@@ -126,7 +199,9 @@ def test_run_from_config_uses_source_override_for_pipeline_validation(
126
199
 
127
200
  monkeypatch.setattr("ukam_os_builder.api.api.get_package_version", lambda _settings: None)
128
201
 
129
- def fake_run_pipeline(step: str, settings: object, force: bool, list_only: bool) -> None:
202
+ def fake_run_pipeline(
203
+ step: Literal["all", "download"], settings: object, force: bool, list_only: bool
204
+ ) -> None:
130
205
  calls["step"] = step
131
206
  calls["source"] = settings.source.type
132
207
  calls["force"] = force
@@ -195,10 +270,6 @@ def test_run_from_config_applies_schema_path_override(
195
270
 
196
271
  paths:
197
272
  work_dir: ./data
198
- downloads_dir: ./data/downloads
199
- extracted_dir: ./data/extracted
200
- output_dir: ./data/output
201
- parquet_dir: ./data/parquet
202
273
 
203
274
  os_downloads:
204
275
  package_id: "16465"
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ from ukam_os_builder import cli
4
+
5
+
6
+ def test_build_cli_passes_api_credentials_to_run_from_config(monkeypatch) -> None:
7
+ captured: dict[str, object] = {}
8
+
9
+ def fake_run_from_config(**kwargs):
10
+ captured.update(kwargs)
11
+ return None
12
+
13
+ monkeypatch.setattr(cli, "run_from_config", fake_run_from_config)
14
+ monkeypatch.setattr(cli, "_configure_logging", lambda _verbose: None)
15
+
16
+ exit_code = cli.main(
17
+ [
18
+ "--config",
19
+ "config.yaml",
20
+ "--step",
21
+ "download",
22
+ "--list-only",
23
+ "--api-key",
24
+ "runtime-key",
25
+ "--api-secret",
26
+ "runtime-secret",
27
+ ]
28
+ )
29
+
30
+ assert exit_code == 0
31
+ assert captured["api_key"] == "runtime-key"
32
+ assert captured["api_secret"] == "runtime-secret"
@@ -25,9 +25,6 @@ def test_load_settings_resolves_paths_relative_to_config(
25
25
  """
26
26
  paths:
27
27
  work_dir: ./data
28
- downloads_dir: ./data/downloads
29
- extracted_dir: ./data/extracted
30
- output_dir: ./data/output
31
28
 
32
29
  os_downloads:
33
30
  package_id: "16465"
@@ -57,9 +54,6 @@ def test_load_settings_rejects_unknown_config_key(
57
54
  """
58
55
  paths:
59
56
  work_dir: ./data
60
- downloads_dir: ./data/downloads
61
- extracted_dir: ./data/extracted
62
- output_dir: ./data/output
63
57
 
64
58
  os_downloads:
65
59
  package_id: "16465"
@@ -87,9 +81,6 @@ def test_load_settings_missing_package_id_has_clear_message(
87
81
  """
88
82
  paths:
89
83
  work_dir: ./data
90
- downloads_dir: ./data/downloads
91
- extracted_dir: ./data/extracted
92
- output_dir: ./data/output
93
84
 
94
85
  os_downloads:
95
86
  version_id: "104444"
@@ -141,9 +132,6 @@ def test_load_settings_requires_env_vars(tmp_path: Path, monkeypatch: pytest.Mon
141
132
  """
142
133
  paths:
143
134
  work_dir: ./data
144
- downloads_dir: ./data/downloads
145
- extracted_dir: ./data/extracted
146
- output_dir: ./data/output
147
135
 
148
136
  os_downloads:
149
137
  package_id: "16465"
@@ -167,9 +155,6 @@ def test_load_settings_validates_positive_read_timeout(
167
155
  """
168
156
  paths:
169
157
  work_dir: ./data
170
- downloads_dir: ./data/downloads
171
- extracted_dir: ./data/extracted
172
- output_dir: ./data/output
173
158
 
174
159
  os_downloads:
175
160
  package_id: "16465"
@@ -205,3 +190,59 @@ def test_load_settings_defaults_source_and_num_chunks(
205
190
 
206
191
  assert settings.source.type == "ngd"
207
192
  assert settings.processing.num_chunks == 20
193
+
194
+
195
+ def test_load_settings_applies_path_overrides(
196
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path
197
+ ) -> None:
198
+ monkeypatch.setenv("OS_PROJECT_API_KEY", "key")
199
+ monkeypatch.setenv("OS_PROJECT_API_SECRET", "secret")
200
+
201
+ config_path = tmp_path / "config.yaml"
202
+ _write_config(
203
+ config_path,
204
+ """
205
+ paths:
206
+ work_dir: ./data
207
+ overrides:
208
+ downloads_dir: ./custom/downloads
209
+ extracted_dir: /tmp/extracted
210
+
211
+ os_downloads:
212
+ package_id: "16465"
213
+ version_id: "104444"
214
+ """,
215
+ )
216
+
217
+ settings = load_settings(config_path, load_env=False)
218
+
219
+ assert settings.paths.work_dir == (tmp_path / "data").resolve()
220
+ assert settings.paths.downloads_dir == (tmp_path / "custom/downloads").resolve()
221
+ assert str(settings.paths.extracted_dir).endswith("/tmp/extracted")
222
+ assert settings.paths.parquet_dir == (tmp_path / "data/parquet").resolve()
223
+ assert settings.paths.output_dir == (tmp_path / "data/output").resolve()
224
+
225
+
226
+ def test_load_settings_rejects_legacy_path_keys(
227
+ monkeypatch: pytest.MonkeyPatch,
228
+ tmp_path: Path,
229
+ ) -> None:
230
+ monkeypatch.setenv("OS_PROJECT_API_KEY", "key")
231
+ monkeypatch.setenv("OS_PROJECT_API_SECRET", "secret")
232
+
233
+ config_path = tmp_path / "config.yaml"
234
+ _write_config(
235
+ config_path,
236
+ """
237
+ paths:
238
+ work_dir: ./data
239
+ downloads_dir: ./legacy/downloads
240
+
241
+ os_downloads:
242
+ package_id: "16465"
243
+ version_id: "104444"
244
+ """,
245
+ )
246
+
247
+ with pytest.raises(SettingsError, match="no longer supported"):
248
+ load_settings(config_path, load_env=False)
@@ -0,0 +1,136 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ import pytest
6
+
7
+ from ukam_os_builder import setup_wizard
8
+
9
+
10
+ def _input_feeder(values: list[str]):
11
+ iterator = iter(values)
12
+
13
+ def _fake_input(_prompt: str, markup: bool = False) -> str: # noqa: ARG001
14
+ return next(iterator)
15
+
16
+ return _fake_input
17
+
18
+
19
+ def test_setup_wizard_prompts_for_env_credentials_and_overwrites_existing(
20
+ monkeypatch: pytest.MonkeyPatch,
21
+ tmp_path: Path,
22
+ ) -> None:
23
+ config_path = tmp_path / "config.yaml"
24
+ env_path = tmp_path / ".env"
25
+ env_path.write_text("OS_PROJECT_API_KEY=old\nOS_PROJECT_API_SECRET=old\n", encoding="utf-8")
26
+
27
+ monkeypatch.setattr(
28
+ setup_wizard.console,
29
+ "input",
30
+ _input_feeder(
31
+ [
32
+ "", # source (default)
33
+ "pkg-1",
34
+ "ver-1",
35
+ "",
36
+ "n", # advanced settings
37
+ "y", # setup .env now
38
+ "y", # overwrite existing .env
39
+ "new-key",
40
+ "new-secret",
41
+ ]
42
+ ),
43
+ )
44
+
45
+ captured: dict[str, object] = {}
46
+
47
+ def fake_write_config_and_env(**kwargs):
48
+ captured.update(kwargs)
49
+ return Path(kwargs["config_out"]).resolve(), Path(kwargs["env_out"]).resolve(), True
50
+
51
+ monkeypatch.setattr(setup_wizard, "write_config_and_env", fake_write_config_and_env)
52
+
53
+ exit_code = setup_wizard.main(["--config-out", str(config_path), "--env-out", str(env_path)])
54
+
55
+ assert exit_code == 0
56
+ assert captured["write_env"] is True
57
+ assert captured["overwrite_env"] is True
58
+ assert captured["api_key"] == "new-key"
59
+ assert captured["api_secret"] == "new-secret"
60
+
61
+
62
+ def test_setup_wizard_skips_env_update_when_user_declines(
63
+ monkeypatch: pytest.MonkeyPatch,
64
+ tmp_path: Path,
65
+ ) -> None:
66
+ config_path = tmp_path / "config.yaml"
67
+ env_path = tmp_path / ".env"
68
+
69
+ monkeypatch.setattr(
70
+ setup_wizard.console,
71
+ "input",
72
+ _input_feeder(
73
+ [
74
+ "", # source (default)
75
+ "pkg-1",
76
+ "ver-1",
77
+ "",
78
+ "n", # advanced settings
79
+ "n", # setup .env now
80
+ ]
81
+ ),
82
+ )
83
+
84
+ captured: dict[str, object] = {}
85
+
86
+ def fake_write_config_and_env(**kwargs):
87
+ captured.update(kwargs)
88
+ return Path(kwargs["config_out"]).resolve(), Path(kwargs["env_out"]).resolve(), False
89
+
90
+ monkeypatch.setattr(setup_wizard, "write_config_and_env", fake_write_config_and_env)
91
+
92
+ exit_code = setup_wizard.main(["--config-out", str(config_path), "--env-out", str(env_path)])
93
+
94
+ assert exit_code == 0
95
+ assert captured["write_env"] is False
96
+ assert captured["api_key"] is None
97
+ assert captured["api_secret"] is None
98
+
99
+
100
+ def test_setup_wizard_decline_overwrite_keeps_existing_env(
101
+ monkeypatch: pytest.MonkeyPatch,
102
+ tmp_path: Path,
103
+ ) -> None:
104
+ config_path = tmp_path / "config.yaml"
105
+ env_path = tmp_path / ".env"
106
+ env_path.write_text("OS_PROJECT_API_KEY=old\nOS_PROJECT_API_SECRET=old\n", encoding="utf-8")
107
+
108
+ monkeypatch.setattr(
109
+ setup_wizard.console,
110
+ "input",
111
+ _input_feeder(
112
+ [
113
+ "", # source (default)
114
+ "pkg-1",
115
+ "ver-1",
116
+ "",
117
+ "n", # advanced settings
118
+ "y", # setup .env now
119
+ "n", # do not overwrite existing .env
120
+ ]
121
+ ),
122
+ )
123
+
124
+ captured: dict[str, object] = {}
125
+
126
+ def fake_write_config_and_env(**kwargs):
127
+ captured.update(kwargs)
128
+ return Path(kwargs["config_out"]).resolve(), Path(kwargs["env_out"]).resolve(), False
129
+
130
+ monkeypatch.setattr(setup_wizard, "write_config_and_env", fake_write_config_and_env)
131
+
132
+ exit_code = setup_wizard.main(["--config-out", str(config_path), "--env-out", str(env_path)])
133
+
134
+ assert exit_code == 0
135
+ assert captured["write_env"] is False
136
+ assert captured["overwrite_env"] is False
@@ -8,7 +8,7 @@ from ukam_os_builder.os_builder.inspect_results import (
8
8
  inspect_flatfile_variants,
9
9
  )
10
10
 
11
- __version__ = "0.1.0.dev2"
11
+ __version__ = "0.1.0.dev3"
12
12
 
13
13
  __all__ = [
14
14
  "create_config_and_env",