ukam-os-builder 0.1.0.dev6__tar.gz → 0.1.0.dev8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/.github/workflows/ci.yml +3 -3
  2. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/PKG-INFO +5 -2
  3. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/README.md +4 -1
  4. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/pyproject.toml +1 -1
  5. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/test_api.py +72 -0
  6. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/__init__.py +1 -1
  7. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/api/api.py +11 -1
  8. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/os_builder/os_hub.py +38 -14
  9. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/uv.lock +1 -1
  10. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/.env.example +0 -0
  11. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/.github/workflows/e2e.yml +0 -0
  12. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/.github/workflows/release-pypi.yml +0 -0
  13. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/.gitignore +0 -0
  14. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/AGENTS.md +0 -0
  15. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/config.example.yaml +0 -0
  16. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/prompt.md +0 -0
  17. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/shell/test_release_locally.sh +0 -0
  18. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/data/README.md +0 -0
  19. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/data/add_gb_builtaddress.csv +0 -0
  20. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/data/add_gb_builtaddress_altadd.csv +0 -0
  21. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/data/add_gb_historicaddress.csv +0 -0
  22. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/data/add_gb_prebuildaddress.csv +0 -0
  23. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/data/add_gb_royalmailaddress.csv +0 -0
  24. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/test_cli.py +0 -0
  25. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/test_cli_errors.py +0 -0
  26. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/test_extract_source_filtering.py +0 -0
  27. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/test_inspect_results.py +0 -0
  28. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/test_public_api_integration.py +0 -0
  29. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/test_settings.py +0 -0
  30. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/test_setup_wizard.py +0 -0
  31. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/tests/test_smoke.py +0 -0
  32. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/_exceptions.py +0 -0
  33. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/api/cli_errors.py +0 -0
  34. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/api/settings.py +0 -0
  35. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/cli.py +0 -0
  36. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/schemas/abp_schema.yaml +0 -0
  37. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/split_raw.py +0 -0
  38. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/transform/__init__.py +0 -0
  39. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/transform/common.py +0 -0
  40. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/transform/runner.py +0 -0
  41. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/transform/stages/__init__.py +0 -0
  42. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/transform/stages/business.py +0 -0
  43. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/transform/stages/combine.py +0 -0
  44. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/transform/stages/lpi.py +0 -0
  45. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/transform/stages/misc.py +0 -0
  46. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/abp/transform/stages/postal.py +0 -0
  47. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/data_sources/ngd/to_flatfile.py +0 -0
  48. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/os_builder/__init__.py +0 -0
  49. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/os_builder/extract.py +0 -0
  50. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/os_builder/inspect_results.py +0 -0
  51. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/os_builder/pipeline_factory.py +0 -0
  52. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/pipeline.py +0 -0
  53. {ukam_os_builder-0.1.0.dev6 → ukam_os_builder-0.1.0.dev8}/ukam_os_builder/setup_wizard.py +0 -0
@@ -3,9 +3,9 @@ name: Build & package
3
3
  on:
4
4
  workflow_dispatch:
5
5
  push:
6
- branches: [main]
6
+ branches: [ main ]
7
7
  paths:
8
- - "src/**"
8
+ - "ukam_os_builder/**"
9
9
  - "tests/**"
10
10
  - "pyproject.toml"
11
11
  - "uv.lock"
@@ -65,4 +65,4 @@ jobs:
65
65
  uses: actions/upload-artifact@v4
66
66
  with:
67
67
  name: dist
68
- path: dist/*
68
+ path: dist/*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ukam-os-builder
3
- Version: 0.1.0.dev6
3
+ Version: 0.1.0.dev8
4
4
  Summary: Download, process and transform OS address data (NGD or ABP) for UK address matching
5
5
  Project-URL: Homepage, https://github.com/moj-analytical-services/prepare_ngd_for_address_matching
6
6
  Project-URL: Repository, https://github.com/moj-analytical-services/prepare_ngd_for_address_matching
@@ -32,11 +32,14 @@ Build OS address data for `uk_address_matcher` from either NGD (National Geograp
32
32
 
33
33
  - Python `3.10+`
34
34
  - OS Data Hub package and version IDs
35
- - Network access to OS Downloads API
35
+ - Network access to OS Downloads API for downloads or remote listing
36
+ - Existing downloaded archives if you want to run offline without re-downloading
36
37
  - Credentials in `.env`:
37
38
  - `OS_PROJECT_API_KEY`
38
39
  - `OS_PROJECT_API_SECRET`
39
40
 
41
+ If the required zip files already exist in your downloads directory, the build can now continue offline without contacting OS Data Hub. `--list-only` still requires network access because it queries remote package metadata.
42
+
40
43
  ## Install from PyPI
41
44
 
42
45
  ```bash
@@ -6,11 +6,14 @@ Build OS address data for `uk_address_matcher` from either NGD (National Geograp
6
6
 
7
7
  - Python `3.10+`
8
8
  - OS Data Hub package and version IDs
9
- - Network access to OS Downloads API
9
+ - Network access to OS Downloads API for downloads or remote listing
10
+ - Existing downloaded archives if you want to run offline without re-downloading
10
11
  - Credentials in `.env`:
11
12
  - `OS_PROJECT_API_KEY`
12
13
  - `OS_PROJECT_API_SECRET`
13
14
 
15
+ If the required zip files already exist in your downloads directory, the build can now continue offline without contacting OS Data Hub. `--list-only` still requires network access because it queries remote package metadata.
16
+
14
17
  ## Install from PyPI
15
18
 
16
19
  ```bash
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ukam-os-builder"
3
- version = "0.1.0.dev6"
3
+ version = "0.1.0.dev8"
4
4
  description = "Download, process and transform OS address data (NGD or ABP) for UK address matching"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -6,6 +6,7 @@ from textwrap import dedent
6
6
  from typing import Literal
7
7
 
8
8
  import pytest
9
+ import requests
9
10
 
10
11
  from ukam_os_builder.api.api import create_config_and_env, run_from_config
11
12
 
@@ -295,3 +296,74 @@ def test_run_from_config_applies_schema_path_override(
295
296
 
296
297
  assert calls["step"] == "split"
297
298
  assert calls["schema_path"] == custom_schema.resolve()
299
+
300
+
301
+ def test_run_from_config_continues_when_api_preflight_is_offline(
302
+ monkeypatch: pytest.MonkeyPatch,
303
+ tmp_path: Path,
304
+ caplog: pytest.LogCaptureFixture,
305
+ ) -> None:
306
+ monkeypatch.setenv("OS_PROJECT_API_KEY", "key")
307
+ monkeypatch.setenv("OS_PROJECT_API_SECRET", "secret")
308
+
309
+ config_path = tmp_path / "config.yaml"
310
+ _write_config(
311
+ config_path,
312
+ """
313
+ source:
314
+ type: ngd
315
+
316
+ os_downloads:
317
+ package_id: "16465"
318
+ version_id: "104444"
319
+ """,
320
+ )
321
+
322
+ calls: dict[str, object] = {}
323
+
324
+ def fake_check_api(_settings: object) -> None:
325
+ raise requests.exceptions.ConnectionError("offline")
326
+
327
+ def fake_run_pipeline(step: str, settings: object, force: bool, list_only: bool) -> None:
328
+ calls["step"] = step
329
+ calls["list_only"] = list_only
330
+
331
+ monkeypatch.setattr("ukam_os_builder.api.api.get_package_version", fake_check_api)
332
+ monkeypatch.setattr("ukam_os_builder.api.api.run_pipeline", fake_run_pipeline)
333
+
334
+ with caplog.at_level("WARNING"):
335
+ run_from_config(config_path=config_path, step="all")
336
+
337
+ assert calls["step"] == "all"
338
+ assert calls["list_only"] is False
339
+ assert "Could not reach OS Data Hub during API preflight" in caplog.text
340
+
341
+
342
+ def test_run_from_config_raises_when_list_only_api_preflight_is_offline(
343
+ monkeypatch: pytest.MonkeyPatch,
344
+ tmp_path: Path,
345
+ ) -> None:
346
+ monkeypatch.setenv("OS_PROJECT_API_KEY", "key")
347
+ monkeypatch.setenv("OS_PROJECT_API_SECRET", "secret")
348
+
349
+ config_path = tmp_path / "config.yaml"
350
+ _write_config(
351
+ config_path,
352
+ """
353
+ source:
354
+ type: ngd
355
+
356
+ os_downloads:
357
+ package_id: "16465"
358
+ version_id: "104444"
359
+ """,
360
+ )
361
+
362
+ def fake_check_api(_settings: object) -> None:
363
+ raise requests.exceptions.ConnectionError("offline")
364
+
365
+ monkeypatch.setattr("ukam_os_builder.api.api.get_package_version", fake_check_api)
366
+ monkeypatch.setattr("ukam_os_builder.api.api.run_pipeline", lambda **_kwargs: None)
367
+
368
+ with pytest.raises(requests.exceptions.ConnectionError, match="offline"):
369
+ run_from_config(config_path=config_path, step="download", list_only=True)
@@ -8,7 +8,7 @@ from ukam_os_builder.os_builder.inspect_results import (
8
8
  inspect_flatfile_variants,
9
9
  )
10
10
 
11
- __version__ = "0.1.0.dev6"
11
+ __version__ = "0.1.0.dev8"
12
12
 
13
13
  __all__ = [
14
14
  "create_config_and_env",
@@ -5,6 +5,7 @@ import os
5
5
  from pathlib import Path
6
6
  from typing import Any, Literal
7
7
 
8
+ import requests
8
9
  import yaml
9
10
 
10
11
  from ukam_os_builder.api.settings import Settings, SettingsError, load_settings
@@ -344,7 +345,16 @@ def run_from_config(
344
345
 
345
346
  has_api_key = bool(os.environ.get("OS_PROJECT_API_KEY"))
346
347
  if check_api and has_api_key:
347
- get_package_version(settings)
348
+ try:
349
+ get_package_version(settings)
350
+ except requests.exceptions.RequestException as exc:
351
+ if list_only:
352
+ raise
353
+ logger.warning(
354
+ "Could not reach OS Data Hub during API preflight (%s). "
355
+ "Continuing so local downloads can be used if available.",
356
+ exc.__class__.__name__,
357
+ )
348
358
 
349
359
  overwrite_effective = overwrite if overwrite is not None else bool(force)
350
360
  run_pipeline(step=step, settings=settings, force=overwrite_effective, list_only=list_only)
@@ -253,6 +253,29 @@ def download_file(
253
253
  return True
254
254
 
255
255
 
256
+ def _use_existing_archives_or_raise(
257
+ downloads_dir: Path,
258
+ reason: str,
259
+ original_exc: Exception,
260
+ ) -> list[Path]:
261
+ """Fall back to existing local archives, or re-raise with a helpful message."""
262
+ existing_archives = _find_existing_download_archives(downloads_dir)
263
+ if existing_archives:
264
+ logger.warning(
265
+ "%s; using %d existing archive(s) in %s and skipping download "
266
+ "(MD5 verification against the OS Data Hub will be skipped).",
267
+ reason,
268
+ len(existing_archives),
269
+ downloads_dir,
270
+ )
271
+ return existing_archives
272
+
273
+ raise ValueError(
274
+ f"{reason}. No local zip files were found in {downloads_dir}, "
275
+ "so download cannot be skipped."
276
+ ) from original_exc
277
+
278
+
256
279
  def run_download_step(
257
280
  settings: Any,
258
281
  force: bool = False,
@@ -266,22 +289,23 @@ def run_download_step(
266
289
  except ValueError as exc:
267
290
  if list_only:
268
291
  raise
269
-
270
- existing_archives = _find_existing_download_archives(downloads_dir)
271
- if existing_archives:
272
- logger.warning(
273
- "No API key found; using %d existing archive(s) in %s and skipping download.",
274
- len(existing_archives),
275
- downloads_dir,
276
- )
277
- return existing_archives
278
-
279
- raise ValueError(
280
- f"{exc} No local zip files were found in {downloads_dir}, so download cannot be skipped."
281
- ) from exc
292
+ return _use_existing_archives_or_raise(
293
+ downloads_dir,
294
+ reason="No API key found",
295
+ original_exc=exc,
296
+ )
282
297
 
283
298
  logger.info("Fetching package metadata...")
284
- metadata = get_package_version(settings)
299
+ try:
300
+ metadata = get_package_version(settings)
301
+ except (requests.exceptions.RequestException, OSError) as exc:
302
+ if list_only:
303
+ raise
304
+ return _use_existing_archives_or_raise(
305
+ downloads_dir,
306
+ reason=f"Could not reach OS Data Hub ({exc.__class__.__name__})",
307
+ original_exc=exc,
308
+ )
285
309
  items = list_downloads(metadata)
286
310
 
287
311
  if list_only:
@@ -1421,7 +1421,7 @@ wheels = [
1421
1421
 
1422
1422
  [[package]]
1423
1423
  name = "ukam-os-builder"
1424
- version = "0.1.0.dev6"
1424
+ version = "0.1.0.dev8"
1425
1425
  source = { editable = "." }
1426
1426
  dependencies = [
1427
1427
  { name = "duckdb" },