uqa 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. uqa/__init__.py +34 -0
  2. uqa/api/__init__.py +6 -0
  3. uqa/api/query_builder.py +436 -0
  4. uqa/core/__init__.py +6 -0
  5. uqa/core/hierarchical.py +62 -0
  6. uqa/core/posting_list.py +208 -0
  7. uqa/core/types.py +234 -0
  8. uqa/engine.py +474 -0
  9. uqa/execution/__init__.py +5 -0
  10. uqa/execution/batch.py +300 -0
  11. uqa/execution/physical.py +43 -0
  12. uqa/execution/relational.py +955 -0
  13. uqa/execution/scan.py +149 -0
  14. uqa/execution/spill.py +173 -0
  15. uqa/fusion/__init__.py +6 -0
  16. uqa/fusion/boolean.py +45 -0
  17. uqa/fusion/log_odds.py +85 -0
  18. uqa/graph/__init__.py +6 -0
  19. uqa/graph/cross_paradigm.py +336 -0
  20. uqa/graph/operators.py +388 -0
  21. uqa/graph/pattern.py +179 -0
  22. uqa/graph/posting_list.py +97 -0
  23. uqa/graph/store.py +77 -0
  24. uqa/joins/__init__.py +6 -0
  25. uqa/joins/base.py +37 -0
  26. uqa/joins/cross_paradigm.py +361 -0
  27. uqa/joins/index.py +78 -0
  28. uqa/joins/inner.py +69 -0
  29. uqa/joins/outer.py +84 -0
  30. uqa/joins/sort_merge.py +109 -0
  31. uqa/operators/__init__.py +6 -0
  32. uqa/operators/aggregation.py +192 -0
  33. uqa/operators/base.py +67 -0
  34. uqa/operators/boolean.py +75 -0
  35. uqa/operators/hierarchical.py +111 -0
  36. uqa/operators/hybrid.py +214 -0
  37. uqa/operators/primitive.py +257 -0
  38. uqa/planner/__init__.py +6 -0
  39. uqa/planner/cardinality.py +326 -0
  40. uqa/planner/cost_model.py +93 -0
  41. uqa/planner/executor.py +198 -0
  42. uqa/planner/optimizer.py +311 -0
  43. uqa/planner/parallel.py +87 -0
  44. uqa/scoring/__init__.py +6 -0
  45. uqa/scoring/bayesian_bm25.py +119 -0
  46. uqa/scoring/bm25.py +67 -0
  47. uqa/scoring/vector.py +34 -0
  48. uqa/scoring/wand.py +220 -0
  49. uqa/sql/__init__.py +6 -0
  50. uqa/sql/compiler.py +2062 -0
  51. uqa/sql/expr_evaluator.py +520 -0
  52. uqa/sql/table.py +327 -0
  53. uqa/storage/__init__.py +6 -0
  54. uqa/storage/block_max_index.py +172 -0
  55. uqa/storage/btree_index.py +120 -0
  56. uqa/storage/catalog.py +600 -0
  57. uqa/storage/document_store.py +48 -0
  58. uqa/storage/index_abc.py +56 -0
  59. uqa/storage/index_manager.py +122 -0
  60. uqa/storage/index_types.py +31 -0
  61. uqa/storage/inverted_index.py +195 -0
  62. uqa/storage/managed_connection.py +105 -0
  63. uqa/storage/sqlite_document_store.py +182 -0
  64. uqa/storage/sqlite_graph_store.py +153 -0
  65. uqa/storage/sqlite_inverted_index.py +607 -0
  66. uqa/storage/sqlite_vector_index.py +130 -0
  67. uqa/storage/transaction.py +94 -0
  68. uqa/storage/vector_index.py +83 -0
  69. uqa/tests/__init__.py +6 -0
  70. uqa/tests/conftest.py +139 -0
  71. uqa/tests/test_catalog.py +868 -0
  72. uqa/tests/test_correlated_subquery.py +228 -0
  73. uqa/tests/test_cost_optimizer.py +287 -0
  74. uqa/tests/test_cross_paradigm_optimizer.py +610 -0
  75. uqa/tests/test_cte.py +177 -0
  76. uqa/tests/test_execution.py +883 -0
  77. uqa/tests/test_expr_evaluator.py +623 -0
  78. uqa/tests/test_fusion.py +168 -0
  79. uqa/tests/test_graph.py +547 -0
  80. uqa/tests/test_index.py +496 -0
  81. uqa/tests/test_integration.py +276 -0
  82. uqa/tests/test_joins.py +631 -0
  83. uqa/tests/test_offset_like.py +234 -0
  84. uqa/tests/test_operators.py +514 -0
  85. uqa/tests/test_parallel.py +321 -0
  86. uqa/tests/test_posting_list.py +249 -0
  87. uqa/tests/test_prepared.py +269 -0
  88. uqa/tests/test_scoring.py +193 -0
  89. uqa/tests/test_skip_blockmax.py +495 -0
  90. uqa/tests/test_spill.py +414 -0
  91. uqa/tests/test_sql.py +816 -0
  92. uqa/tests/test_sqlite_document_store.py +407 -0
  93. uqa/tests/test_sqlite_graph_store.py +450 -0
  94. uqa/tests/test_sqlite_inverted_index.py +501 -0
  95. uqa/tests/test_sqlite_vector_index.py +309 -0
  96. uqa/tests/test_subquery.py +231 -0
  97. uqa/tests/test_transaction.py +513 -0
  98. uqa/tests/test_update_delete.py +402 -0
  99. uqa/tests/test_views.py +253 -0
  100. uqa/tests/test_window.py +290 -0
  101. uqa-0.3.0.dist-info/METADATA +17 -0
  102. uqa-0.3.0.dist-info/RECORD +106 -0
  103. uqa-0.3.0.dist-info/WHEEL +5 -0
  104. uqa-0.3.0.dist-info/entry_points.txt +2 -0
  105. uqa-0.3.0.dist-info/licenses/LICENSE +661 -0
  106. uqa-0.3.0.dist-info/top_level.txt +1 -0
uqa/__init__.py ADDED
@@ -0,0 +1,34 @@
1
+ #
2
+ # Unified Query Algebra
3
+ #
4
+ # Copyright (c) 2023-2026 Cognica, Inc.
5
+ #
6
+
7
+ def __getattr__(name: str):
8
+ """Lazy imports to avoid circular / missing-module errors during parallel development."""
9
+ _imports = {
10
+ "Engine": ("uqa.engine", "Engine"),
11
+ "QueryBuilder": ("uqa.api.query_builder", "QueryBuilder"),
12
+ "PostingList": ("uqa.core.posting_list", "PostingList"),
13
+ "Vertex": ("uqa.core.types", "Vertex"),
14
+ "Edge": ("uqa.core.types", "Edge"),
15
+ "GraphPattern": ("uqa.graph.pattern", "GraphPattern"),
16
+ }
17
+ if name in _imports:
18
+ import importlib
19
+ module_path, attr = _imports[name]
20
+ module = importlib.import_module(module_path)
21
+ return getattr(module, attr)
22
+ raise AttributeError(f"module 'uqa' has no attribute {name!r}")
23
+
24
+
25
+ __version__ = "0.3.0"
26
+
27
+ __all__ = [
28
+ "Engine",
29
+ "QueryBuilder",
30
+ "PostingList",
31
+ "Vertex",
32
+ "Edge",
33
+ "GraphPattern",
34
+ ]
uqa/api/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ #
2
+ # Unified Query Algebra
3
+ #
4
+ # Copyright (c) 2023-2026 Cognica, Inc.
5
+ #
6
+
@@ -0,0 +1,436 @@
1
+ #
2
+ # Unified Query Algebra
3
+ #
4
+ # Copyright (c) 2023-2026 Cognica, Inc.
5
+ #
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING, Any
10
+
11
+ import numpy as np
12
+ from numpy.typing import NDArray
13
+
14
+ from uqa.core.types import PathExpr, Predicate
15
+
16
+ if TYPE_CHECKING:
17
+ from uqa.core.posting_list import GeneralizedPostingList, PostingList
18
+ from uqa.engine import Engine
19
+
20
+
21
+ class AggregateResult:
22
+ """Result of an aggregation query."""
23
+
24
+ def __init__(self, value: Any):
25
+ self.value = value
26
+
27
+ def __repr__(self) -> str:
28
+ return f"AggregateResult({self.value!r})"
29
+
30
+
31
+ class FacetResult:
32
+ """Result of a facet query: mapping of value -> count."""
33
+
34
+ def __init__(self, counts: dict[Any, int]):
35
+ self.counts = counts
36
+
37
+ def __repr__(self) -> str:
38
+ return f"FacetResult({self.counts!r})"
39
+
40
+
41
+ class QueryBuilder:
42
+ """Fluent API for constructing queries over the unified algebra (Section 13, Design Doc)."""
43
+
44
+ def __init__(self, engine: Engine):
45
+ self._engine = engine
46
+ self._root: Any = None
47
+
48
+ # -- Term retrieval (Definition 3.1.1) --
49
+
50
+ def term(self, term: str, field: str | None = None) -> QueryBuilder:
51
+ from uqa.operators.primitive import TermOperator
52
+
53
+ op = TermOperator(term, field)
54
+ return self._chain(op)
55
+
56
+ # -- Vector search (Definitions 3.1.2, 3.1.3) --
57
+
58
+ def vector(
59
+ self, query: NDArray, threshold: float, field: str = "embedding"
60
+ ) -> QueryBuilder:
61
+ from uqa.operators.primitive import VectorSimilarityOperator
62
+
63
+ op = VectorSimilarityOperator(query, threshold, field)
64
+ return self._chain(op)
65
+
66
+ def knn(self, query: NDArray, k: int, field: str = "embedding") -> QueryBuilder:
67
+ from uqa.operators.primitive import KNNOperator
68
+
69
+ op = KNNOperator(query, k, field)
70
+ return self._chain(op)
71
+
72
+ # -- Boolean algebra --
73
+
74
+ def and_(self, other: QueryBuilder) -> QueryBuilder:
75
+ from uqa.operators.boolean import IntersectOperator
76
+
77
+ if self._root is None or other._root is None:
78
+ raise ValueError("Both builders must have operators before combining")
79
+ op = IntersectOperator([self._root, other._root])
80
+ qb = QueryBuilder(self._engine)
81
+ qb._root = op
82
+ return qb
83
+
84
+ def or_(self, other: QueryBuilder) -> QueryBuilder:
85
+ from uqa.operators.boolean import UnionOperator
86
+
87
+ if self._root is None or other._root is None:
88
+ raise ValueError("Both builders must have operators before combining")
89
+ op = UnionOperator([self._root, other._root])
90
+ qb = QueryBuilder(self._engine)
91
+ qb._root = op
92
+ return qb
93
+
94
+ def not_(self) -> QueryBuilder:
95
+ from uqa.operators.boolean import ComplementOperator
96
+
97
+ if self._root is None:
98
+ raise ValueError("Builder must have an operator before negation")
99
+ op = ComplementOperator(self._root)
100
+ qb = QueryBuilder(self._engine)
101
+ qb._root = op
102
+ return qb
103
+
104
+ # -- Filter (Definition 3.1.4) --
105
+
106
+ def filter(self, field: str, predicate: Predicate) -> QueryBuilder:
107
+ from uqa.operators.primitive import FilterOperator
108
+
109
+ op = FilterOperator(field, predicate, self._root)
110
+ qb = QueryBuilder(self._engine)
111
+ qb._root = op
112
+ return qb
113
+
114
+ # -- Joins (Section 4, Paper 1) --
115
+
116
+ def join(
117
+ self,
118
+ other: QueryBuilder,
119
+ left_field: str,
120
+ right_field: str,
121
+ ) -> QueryBuilder:
122
+ from uqa.joins.base import JoinCondition
123
+ from uqa.joins.inner import InnerJoinOperator
124
+
125
+ if self._root is None or other._root is None:
126
+ raise ValueError("Both builders must have operators before joining")
127
+ condition = JoinCondition(left_field, right_field)
128
+ op = InnerJoinOperator(self._root, other._root, condition)
129
+ qb = QueryBuilder(self._engine)
130
+ qb._root = op
131
+ return qb
132
+
133
+ def vector_join(
134
+ self,
135
+ other: QueryBuilder,
136
+ left_field: str,
137
+ right_field: str,
138
+ threshold: float,
139
+ ) -> QueryBuilder:
140
+ from uqa.joins.cross_paradigm import VectorSimilarityJoinOperator
141
+
142
+ if self._root is None or other._root is None:
143
+ raise ValueError("Both builders must have operators before joining")
144
+ op = VectorSimilarityJoinOperator(
145
+ self._root, other._root, left_field, right_field, threshold
146
+ )
147
+ qb = QueryBuilder(self._engine)
148
+ qb._root = op
149
+ return qb
150
+
151
+ # -- Graph operations (Paper 2) --
152
+
153
+ def traverse(
154
+ self, start: int, label: str | None = None, max_hops: int = 1
155
+ ) -> QueryBuilder:
156
+ from uqa.graph.operators import TraverseOperator
157
+
158
+ op = TraverseOperator(start, label, max_hops)
159
+ return self._chain(op)
160
+
161
+ def match_pattern(self, pattern: Any) -> QueryBuilder:
162
+ from uqa.graph.operators import PatternMatchOperator
163
+
164
+ op = PatternMatchOperator(pattern)
165
+ return self._chain(op)
166
+
167
+ def rpq(self, expr: str, start: int | None = None) -> QueryBuilder:
168
+ from uqa.graph.pattern import parse_rpq
169
+ from uqa.graph.operators import RegularPathQueryOperator
170
+
171
+ path_expr = parse_rpq(expr)
172
+ op = RegularPathQueryOperator(path_expr, start_vertex=start)
173
+ return self._chain(op)
174
+
175
+ # -- Aggregation (Section 5.1, Paper 1) --
176
+
177
+ def aggregate(self, field: str, agg: str) -> AggregateResult:
178
+ from uqa.operators.aggregation import (
179
+ AggregateOperator,
180
+ AvgMonoid,
181
+ CountMonoid,
182
+ MaxMonoid,
183
+ MinMonoid,
184
+ SumMonoid,
185
+ )
186
+
187
+ monoid_map = {
188
+ "count": CountMonoid,
189
+ "sum": SumMonoid,
190
+ "avg": AvgMonoid,
191
+ "min": MinMonoid,
192
+ "max": MaxMonoid,
193
+ }
194
+ monoid_cls = monoid_map.get(agg.lower())
195
+ if monoid_cls is None:
196
+ raise ValueError(f"Unknown aggregation: {agg}")
197
+
198
+ monoid = monoid_cls()
199
+ agg_op = AggregateOperator(self._root, field, monoid)
200
+ ctx = self._engine._build_context()
201
+ result_pl = agg_op.execute(ctx)
202
+
203
+ if result_pl and len(result_pl) > 0:
204
+ entry = next(iter(result_pl))
205
+ return AggregateResult(entry.payload.fields.get("_aggregate"))
206
+ return AggregateResult(monoid.finalize(monoid.identity()))
207
+
208
+ def facet(self, field: str) -> FacetResult:
209
+ from uqa.operators.primitive import FacetOperator
210
+
211
+ op = FacetOperator(field, self._root)
212
+ ctx = self._engine._build_context()
213
+ result_pl = op.execute(ctx)
214
+
215
+ counts: dict[Any, int] = {}
216
+ for entry in result_pl:
217
+ val = entry.payload.fields.get("_facet_value")
218
+ count = int(entry.payload.fields.get("_facet_count", 0))
219
+ if val is not None:
220
+ counts[val] = count
221
+ return FacetResult(counts)
222
+
223
+ # -- Hierarchical (Section 5.2-5.3, Paper 1) --
224
+
225
+ def path_filter(self, path: PathExpr, predicate: Predicate) -> QueryBuilder:
226
+ from uqa.operators.hierarchical import PathFilterOperator
227
+
228
+ op = PathFilterOperator(path, predicate, self._root)
229
+ qb = QueryBuilder(self._engine)
230
+ qb._root = op
231
+ return qb
232
+
233
+ def path_project(self, *paths: PathExpr) -> QueryBuilder:
234
+ from uqa.operators.hierarchical import PathProjectOperator
235
+
236
+ op = PathProjectOperator(list(paths), self._root)
237
+ qb = QueryBuilder(self._engine)
238
+ qb._root = op
239
+ return qb
240
+
241
+ def unnest(self, path: PathExpr) -> QueryBuilder:
242
+ from uqa.operators.hierarchical import PathUnnestOperator
243
+
244
+ op = PathUnnestOperator(path, self._root)
245
+ qb = QueryBuilder(self._engine)
246
+ qb._root = op
247
+ return qb
248
+
249
+ # -- Scoring --
250
+
251
+ def score_bm25(self, query: str) -> QueryBuilder:
252
+ from uqa.operators.primitive import ScoreOperator
253
+ from uqa.scoring.bm25 import BM25Params, BM25Scorer
254
+
255
+ terms = query.lower().split()
256
+ ctx = self._engine._build_context()
257
+ scorer = BM25Scorer(BM25Params(), ctx.inverted_index.stats)
258
+ op = ScoreOperator(scorer, self._root, terms)
259
+ qb = QueryBuilder(self._engine)
260
+ qb._root = op
261
+ return qb
262
+
263
+ def score_bayesian_bm25(self, query: str) -> QueryBuilder:
264
+ from uqa.operators.primitive import ScoreOperator
265
+ from uqa.scoring.bayesian_bm25 import BayesianBM25Params, BayesianBM25Scorer
266
+
267
+ terms = query.lower().split()
268
+ ctx = self._engine._build_context()
269
+ scorer = BayesianBM25Scorer(BayesianBM25Params(), ctx.inverted_index.stats)
270
+ op = ScoreOperator(scorer, self._root, terms)
271
+ qb = QueryBuilder(self._engine)
272
+ qb._root = op
273
+ return qb
274
+
275
+ # -- Fusion (Paper 4) --
276
+
277
+ def fuse_log_odds(
278
+ self, *builders: QueryBuilder, alpha: float = 0.5
279
+ ) -> QueryBuilder:
280
+ from uqa.fusion.log_odds import LogOddsFusion
281
+ from uqa.operators.boolean import UnionOperator
282
+
283
+ fusion = LogOddsFusion(confidence_alpha=alpha)
284
+ ctx = self._engine._build_context()
285
+
286
+ sources = []
287
+ for b in builders:
288
+ if b._root is not None:
289
+ sources.append(b)
290
+
291
+ if not sources:
292
+ return self
293
+
294
+ qb = QueryBuilder(self._engine)
295
+ qb._root = _FusionOperator(fusion, [b._root for b in sources])
296
+ return qb
297
+
298
+ def fuse_prob_and(self, *builders: QueryBuilder) -> QueryBuilder:
299
+ from uqa.fusion.boolean import ProbabilisticBoolean
300
+
301
+ qb = QueryBuilder(self._engine)
302
+ qb._root = _ProbBooleanOperator("and", [b._root for b in builders if b._root])
303
+ return qb
304
+
305
+ def fuse_prob_or(self, *builders: QueryBuilder) -> QueryBuilder:
306
+ from uqa.fusion.boolean import ProbabilisticBoolean
307
+
308
+ qb = QueryBuilder(self._engine)
309
+ qb._root = _ProbBooleanOperator("or", [b._root for b in builders if b._root])
310
+ return qb
311
+
312
+ # -- Execution --
313
+
314
+ def execute(self) -> PostingList:
315
+ from uqa.planner.executor import PlanExecutor
316
+ from uqa.planner.optimizer import QueryOptimizer
317
+ from uqa.core.posting_list import PostingList
318
+
319
+ if self._root is None:
320
+ return PostingList()
321
+
322
+ ctx = self._engine._build_context()
323
+ optimizer = QueryOptimizer(ctx.inverted_index.stats)
324
+ optimized = optimizer.optimize(self._root)
325
+
326
+ executor = PlanExecutor(ctx)
327
+ return executor.execute(optimized)
328
+
329
+ def explain(self) -> str:
330
+ from uqa.planner.executor import PlanExecutor
331
+ from uqa.planner.optimizer import QueryOptimizer
332
+
333
+ if self._root is None:
334
+ return "(empty query)"
335
+
336
+ ctx = self._engine._build_context()
337
+ optimizer = QueryOptimizer(ctx.inverted_index.stats)
338
+ optimized = optimizer.optimize(self._root)
339
+
340
+ executor = PlanExecutor(ctx)
341
+ return executor.explain(optimized)
342
+
343
+ # -- Internal --
344
+
345
+ def _chain(self, op: Any) -> QueryBuilder:
346
+ if self._root is not None:
347
+ from uqa.operators.boolean import IntersectOperator
348
+
349
+ op = IntersectOperator([self._root, op])
350
+ qb = QueryBuilder(self._engine)
351
+ qb._root = op
352
+ return qb
353
+
354
+
355
+ class _FusionOperator:
356
+ """Internal operator for log-odds fusion across multiple sub-queries."""
357
+
358
+ def __init__(self, fusion: Any, sources: list[Any]):
359
+ self.fusion = fusion
360
+ self.sources = sources
361
+
362
+ def execute(self, context: Any) -> Any:
363
+ from uqa.core.types import Payload, PostingEntry
364
+ from uqa.core.posting_list import PostingList
365
+
366
+ results = [src.execute(context) for src in self.sources]
367
+
368
+ doc_scores: dict[int, list[float]] = {}
369
+ for result in results:
370
+ for entry in result:
371
+ if entry.doc_id not in doc_scores:
372
+ doc_scores[entry.doc_id] = []
373
+ doc_scores[entry.doc_id].append(entry.payload.score)
374
+
375
+ entries = []
376
+ for doc_id, scores in doc_scores.items():
377
+ while len(scores) < len(results):
378
+ scores.append(0.5)
379
+ fused_score = self.fusion.fuse(scores)
380
+ entries.append(PostingEntry(doc_id, Payload(score=fused_score)))
381
+
382
+ return PostingList(entries)
383
+
384
+ def compose(self, other: Any) -> Any:
385
+ from uqa.operators.base import ComposedOperator
386
+ return ComposedOperator([self, other])
387
+
388
+ def cost_estimate(self, stats: Any) -> float:
389
+ return sum(
390
+ getattr(s, "cost_estimate", lambda _: 100.0)(stats)
391
+ for s in self.sources
392
+ )
393
+
394
+
395
+ class _ProbBooleanOperator:
396
+ """Internal operator for probabilistic boolean fusion."""
397
+
398
+ def __init__(self, mode: str, sources: list[Any]):
399
+ self.mode = mode
400
+ self.sources = sources
401
+
402
+ def execute(self, context: Any) -> Any:
403
+ from uqa.core.types import Payload, PostingEntry
404
+ from uqa.core.posting_list import PostingList
405
+ from uqa.fusion.boolean import ProbabilisticBoolean
406
+
407
+ results = [src.execute(context) for src in self.sources]
408
+
409
+ doc_scores: dict[int, list[float]] = {}
410
+ for result in results:
411
+ for entry in result:
412
+ if entry.doc_id not in doc_scores:
413
+ doc_scores[entry.doc_id] = []
414
+ doc_scores[entry.doc_id].append(entry.payload.score)
415
+
416
+ entries = []
417
+ for doc_id, scores in doc_scores.items():
418
+ if self.mode == "and":
419
+ while len(scores) < len(results):
420
+ scores.append(0.0)
421
+ fused = ProbabilisticBoolean.prob_and(scores)
422
+ else:
423
+ fused = ProbabilisticBoolean.prob_or(scores)
424
+ entries.append(PostingEntry(doc_id, Payload(score=fused)))
425
+
426
+ return PostingList(entries)
427
+
428
+ def compose(self, other: Any) -> Any:
429
+ from uqa.operators.base import ComposedOperator
430
+ return ComposedOperator([self, other])
431
+
432
+ def cost_estimate(self, stats: Any) -> float:
433
+ return sum(
434
+ getattr(s, "cost_estimate", lambda _: 100.0)(stats)
435
+ for s in self.sources
436
+ )
uqa/core/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ #
2
+ # Unified Query Algebra
3
+ #
4
+ # Copyright (c) 2023-2026 Cognica, Inc.
5
+ #
6
+
@@ -0,0 +1,62 @@
1
+ #
2
+ # Unified Query Algebra
3
+ #
4
+ # Copyright (c) 2023-2026 Cognica, Inc.
5
+ #
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ from uqa.core.types import DocId, PathExpr, Payload, PostingEntry
12
+ from uqa.core.posting_list import PostingList
13
+
14
+
15
+ class HierarchicalDocument:
16
+ """Recursive document structure (Definition 5.2.1, Paper 1)."""
17
+
18
+ def __init__(self, doc_id: DocId, data: dict | list | Any):
19
+ self.doc_id = doc_id
20
+ self.data = data
21
+
22
+ def eval_path(self, path: PathExpr) -> Any:
23
+ """Path evaluation (Definition 5.2.3)."""
24
+ current = self.data
25
+ for component in path:
26
+ if isinstance(current, dict) and isinstance(component, str):
27
+ current = current.get(component)
28
+ elif isinstance(current, list) and isinstance(component, int):
29
+ current = current[component] if component < len(current) else None
30
+ else:
31
+ return None
32
+ if current is None:
33
+ return None
34
+ return current
35
+
36
+
37
+ def project_paths(
38
+ doc: HierarchicalDocument, paths: list[PathExpr]
39
+ ) -> dict[str, Any]:
40
+ """Project a document to a subset of paths (Definition 5.3.2)."""
41
+ result: dict[str, Any] = {}
42
+ for path in paths:
43
+ key = ".".join(str(c) for c in path)
44
+ result[key] = doc.eval_path(path)
45
+ return result
46
+
47
+
48
+ def unnest_array(
49
+ doc: HierarchicalDocument, path: PathExpr
50
+ ) -> list[HierarchicalDocument]:
51
+ """Unnest an array at a path into separate documents (Definition 5.3.4)."""
52
+ arr = doc.eval_path(path)
53
+ if not isinstance(arr, list):
54
+ return []
55
+ result: list[HierarchicalDocument] = []
56
+ for i, item in enumerate(arr):
57
+ nested = dict(doc.data) if isinstance(doc.data, dict) else {}
58
+ path_key = ".".join(str(c) for c in path)
59
+ nested[path_key + "._unnested"] = item
60
+ nested["_unnest_index"] = i
61
+ result.append(HierarchicalDocument(doc.doc_id, nested))
62
+ return result