xfmr-zem 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xfmr_zem/client.py CHANGED
@@ -51,22 +51,48 @@ class PipelineClient:
51
51
  items.append((new_key, v))
52
52
  return dict(items)
53
53
 
54
+ def _unflatten_params(self, flat_dict: Dict[str, Any]) -> Dict[str, Any]:
55
+ """Expand dot-notation keys into nested dictionaries."""
56
+ nested = {}
57
+ for key, value in flat_dict.items():
58
+ if "." in key:
59
+ parts = key.split(".")
60
+ d = nested
61
+ for part in parts[:-1]:
62
+ if part not in d or not isinstance(d[part], dict):
63
+ d[part] = {}
64
+ d = d[part]
65
+ d[parts[-1]] = value
66
+ else:
67
+ if isinstance(value, dict) and key in nested and isinstance(nested[key], dict):
68
+ nested[key].update(value)
69
+ else:
70
+ nested[key] = value
71
+ return nested
72
+
54
73
  def _load_config_dict(self, path: Path) -> Dict[str, Any]:
55
74
  """Load YAML config and perform substitution."""
56
75
  with open(path, "r") as f:
57
76
  raw_content = f.read()
58
77
 
59
- self.params = self._load_params(None)
78
+ # 1. Load parameters from file
79
+ base_params = self._load_params(None)
80
+
81
+ # 2. Add custom parameters file if provided
82
+ if self.params_path:
83
+ custom_params = self._load_params(self.params_path)
84
+ base_params.update(custom_params)
85
+
86
+ # 3. Load internal parameters from the config file itself
60
87
  preliminary_dict = yaml.safe_load(raw_content) or {}
61
88
  internal_params = preliminary_dict.get("parameters", {})
62
89
  if internal_params:
63
- self.params.update(internal_params)
64
-
65
- if self.params_path:
66
- custom_params = self._load_params(self.params_path)
67
- self.params.update(custom_params)
90
+ base_params.update(internal_params)
68
91
 
69
- # Flatten params for template substitution
92
+ # Store unflattened parameters for hierarchical lookup
93
+ self.params = self._unflatten_params(base_params)
94
+
95
+ # 4. Flatten all params for template substitution ({{ key }})
70
96
  flat_params = self._flatten_params(self.params)
71
97
 
72
98
  content = raw_content
@@ -105,11 +131,12 @@ class PipelineClient:
105
131
  env["PYTHONPATH"] = f"{src_path}:{current_pythonpath}" if current_pythonpath else src_path
106
132
 
107
133
  server_specific_params = {}
108
- prefix = f"{name}."
109
134
  for key, value in self.params.items():
110
- if key.startswith(prefix):
111
- server_specific_params[key[len(prefix):]] = value
112
- else:
135
+ if key == name and isinstance(value, dict):
136
+ # Direct match: ocr -> { ... }
137
+ server_specific_params.update(value)
138
+ elif not isinstance(value, dict):
139
+ # Global scalars
113
140
  server_specific_params[key] = value
114
141
 
115
142
  env["ZEM_PARAMETERS"] = yaml.dump(server_specific_params)
@@ -43,6 +43,7 @@ def extract_pdf_pages(
43
43
  # Temporary save for engine compatibility (engines expect path)
44
44
  temp_path = os.path.join(temp_dir, f"ocr_page_{os.getpid()}_{page_num}.png")
45
45
  img.save(temp_path)
46
+ logger.debug(f"Saved temporary page image to: {temp_path}")
46
47
 
47
48
  try:
48
49
  ocr_result = ocr_engine.process(temp_path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xfmr-zem
3
- Version: 0.2.7
3
+ Version: 0.2.8
4
4
  Summary: Zem: Unified Data Pipeline Framework (ZenML + NeMo Curator + DataJuicer) for multi-domain processing
5
5
  Project-URL: Homepage, https://github.com/OAI-Labs/xfmr-zem
6
6
  Project-URL: Repository, https://github.com/OAI-Labs/xfmr-zem
@@ -1,6 +1,6 @@
1
1
  xfmr_zem/__init__.py,sha256=Abx2BepsZu-e7E93N2lOgu9w0b4TBZLN6MEzCzDCn_A,1138
2
2
  xfmr_zem/cli.py,sha256=5oz4qxXthU4mXu7bSbfKreVkAvCqrieXpGoKhJBXBvk,12538
3
- xfmr_zem/client.py,sha256=wf9N_fILDBvWd-08TnNq3B1PqKQPhR0pvVuJq0vidk0,11435
3
+ xfmr_zem/client.py,sha256=2PkJavZ8kMVq0dXoeZvpRODO96tWiXyT1alZLcw5RH0,12601
4
4
  xfmr_zem/schemas.py,sha256=0tHM0ftOWTWxNiqmAZn_MyIYJwF2p9brHK0MHlOMlKY,494
5
5
  xfmr_zem/server.py,sha256=EeohfqhUiCm0cGnV85H2ODZ4FLXjcTjbkdHrHuGHW4I,8363
6
6
  xfmr_zem/zenml_wrapper.py,sha256=LHgDewuPBjCl4EiU6JZVU-_lyEi-ATURDSG9Vf7PbEY,6739
@@ -18,7 +18,7 @@ xfmr_zem/servers/nemo_curator/server.py,sha256=zcHoSwxxoK_rMaDIAbEy1s8qfdp68Ue4B
18
18
  xfmr_zem/servers/ocr/engines.py,sha256=zScn4Qjxbpl2nB8UXEf3kd9l8z84TEwGs6bV5ka8Lks,10295
19
19
  xfmr_zem/servers/ocr/install_models.py,sha256=t02zpoy8djVhITOLEaRJ2mjiMrFfA9H6fpeHD3hXuio,2135
20
20
  xfmr_zem/servers/ocr/parameters.yml,sha256=UTMwtTu0Eeit0tFkYcZOxpuzD78UBlpONXZIx6STYwc,144
21
- xfmr_zem/servers/ocr/server.py,sha256=eJtQnMVBFX6PLZMxZITNlNEXGarjsvkz003-uT1iIo0,4369
21
+ xfmr_zem/servers/ocr/server.py,sha256=wfk9L1776TOpFNlmc73jknEMDDobfcFgqBUhcVX2elc,4441
22
22
  xfmr_zem/servers/ocr/deepdoc_vietocr/__init__.py,sha256=XJE7RnOu5oo5p902HPWPDBd7FhVQXetmnr2-kWEG0nI,2419
23
23
  xfmr_zem/servers/ocr/deepdoc_vietocr/implementations.py,sha256=79fYr76fx8yZda3HaFcK1d5G-4sDVf1JFHNW_OBQAk8,47348
24
24
  xfmr_zem/servers/ocr/deepdoc_vietocr/layout_recognizer.py,sha256=7BeLHzf9FQUkkHMb5jDpggruJmfXVMU78MF_EeZ9PG4,10462
@@ -51,8 +51,8 @@ xfmr_zem/servers/sinks/parameters.yml,sha256=9HAnv84Utw2qWsVZH8uOjVE62lnAKBkzv4P
51
51
  xfmr_zem/servers/sinks/server.py,sha256=jI_r4sq_U_avNwF1PiE0alpaDrYpzOI-qPeLU7hgHP0,1589
52
52
  xfmr_zem/servers/unstructured/parameters.yml,sha256=N31cmc56GTr3rkVhbni4yOpbnHISReN8f-KnRZTDbBc,118
53
53
  xfmr_zem/servers/unstructured/server.py,sha256=0XmXWMAUNEJboX-J4bn_8EBUfMHIqu_ylNC_s9YOZdk,1996
54
- xfmr_zem-0.2.7.dist-info/METADATA,sha256=Iv77eb-eHw6rdJhG1LfoNY4Hf9I7oFlIsx1K3K7_sH0,6379
55
- xfmr_zem-0.2.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
- xfmr_zem-0.2.7.dist-info/entry_points.txt,sha256=uxs-IXFxpSakHivpFN3mEr13cz-z-0vkeSF_4dEBMa4,65
57
- xfmr_zem-0.2.7.dist-info/licenses/LICENSE,sha256=kf_ILr0zLkSy5-EBu0VF2PGaOykYo83z3UijI-bZeAE,11342
58
- xfmr_zem-0.2.7.dist-info/RECORD,,
54
+ xfmr_zem-0.2.8.dist-info/METADATA,sha256=sv4boGlSzTYgE1MlKIZieIVvRoioVKoWwTOPXhrqKeE,6379
55
+ xfmr_zem-0.2.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
+ xfmr_zem-0.2.8.dist-info/entry_points.txt,sha256=uxs-IXFxpSakHivpFN3mEr13cz-z-0vkeSF_4dEBMa4,65
57
+ xfmr_zem-0.2.8.dist-info/licenses/LICENSE,sha256=kf_ILr0zLkSy5-EBu0VF2PGaOykYo83z3UijI-bZeAE,11342
58
+ xfmr_zem-0.2.8.dist-info/RECORD,,