uqa 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uqa/__init__.py +34 -0
- uqa/api/__init__.py +6 -0
- uqa/api/query_builder.py +436 -0
- uqa/core/__init__.py +6 -0
- uqa/core/hierarchical.py +62 -0
- uqa/core/posting_list.py +208 -0
- uqa/core/types.py +234 -0
- uqa/engine.py +474 -0
- uqa/execution/__init__.py +5 -0
- uqa/execution/batch.py +300 -0
- uqa/execution/physical.py +43 -0
- uqa/execution/relational.py +955 -0
- uqa/execution/scan.py +149 -0
- uqa/execution/spill.py +173 -0
- uqa/fusion/__init__.py +6 -0
- uqa/fusion/boolean.py +45 -0
- uqa/fusion/log_odds.py +85 -0
- uqa/graph/__init__.py +6 -0
- uqa/graph/cross_paradigm.py +336 -0
- uqa/graph/operators.py +388 -0
- uqa/graph/pattern.py +179 -0
- uqa/graph/posting_list.py +97 -0
- uqa/graph/store.py +77 -0
- uqa/joins/__init__.py +6 -0
- uqa/joins/base.py +37 -0
- uqa/joins/cross_paradigm.py +361 -0
- uqa/joins/index.py +78 -0
- uqa/joins/inner.py +69 -0
- uqa/joins/outer.py +84 -0
- uqa/joins/sort_merge.py +109 -0
- uqa/operators/__init__.py +6 -0
- uqa/operators/aggregation.py +192 -0
- uqa/operators/base.py +67 -0
- uqa/operators/boolean.py +75 -0
- uqa/operators/hierarchical.py +111 -0
- uqa/operators/hybrid.py +214 -0
- uqa/operators/primitive.py +257 -0
- uqa/planner/__init__.py +6 -0
- uqa/planner/cardinality.py +326 -0
- uqa/planner/cost_model.py +93 -0
- uqa/planner/executor.py +198 -0
- uqa/planner/optimizer.py +311 -0
- uqa/planner/parallel.py +87 -0
- uqa/scoring/__init__.py +6 -0
- uqa/scoring/bayesian_bm25.py +119 -0
- uqa/scoring/bm25.py +67 -0
- uqa/scoring/vector.py +34 -0
- uqa/scoring/wand.py +220 -0
- uqa/sql/__init__.py +6 -0
- uqa/sql/compiler.py +2062 -0
- uqa/sql/expr_evaluator.py +520 -0
- uqa/sql/table.py +327 -0
- uqa/storage/__init__.py +6 -0
- uqa/storage/block_max_index.py +172 -0
- uqa/storage/btree_index.py +120 -0
- uqa/storage/catalog.py +600 -0
- uqa/storage/document_store.py +48 -0
- uqa/storage/index_abc.py +56 -0
- uqa/storage/index_manager.py +122 -0
- uqa/storage/index_types.py +31 -0
- uqa/storage/inverted_index.py +195 -0
- uqa/storage/managed_connection.py +105 -0
- uqa/storage/sqlite_document_store.py +182 -0
- uqa/storage/sqlite_graph_store.py +153 -0
- uqa/storage/sqlite_inverted_index.py +607 -0
- uqa/storage/sqlite_vector_index.py +130 -0
- uqa/storage/transaction.py +94 -0
- uqa/storage/vector_index.py +83 -0
- uqa/tests/__init__.py +6 -0
- uqa/tests/conftest.py +139 -0
- uqa/tests/test_catalog.py +868 -0
- uqa/tests/test_correlated_subquery.py +228 -0
- uqa/tests/test_cost_optimizer.py +287 -0
- uqa/tests/test_cross_paradigm_optimizer.py +610 -0
- uqa/tests/test_cte.py +177 -0
- uqa/tests/test_execution.py +883 -0
- uqa/tests/test_expr_evaluator.py +623 -0
- uqa/tests/test_fusion.py +168 -0
- uqa/tests/test_graph.py +547 -0
- uqa/tests/test_index.py +496 -0
- uqa/tests/test_integration.py +276 -0
- uqa/tests/test_joins.py +631 -0
- uqa/tests/test_offset_like.py +234 -0
- uqa/tests/test_operators.py +514 -0
- uqa/tests/test_parallel.py +321 -0
- uqa/tests/test_posting_list.py +249 -0
- uqa/tests/test_prepared.py +269 -0
- uqa/tests/test_scoring.py +193 -0
- uqa/tests/test_skip_blockmax.py +495 -0
- uqa/tests/test_spill.py +414 -0
- uqa/tests/test_sql.py +816 -0
- uqa/tests/test_sqlite_document_store.py +407 -0
- uqa/tests/test_sqlite_graph_store.py +450 -0
- uqa/tests/test_sqlite_inverted_index.py +501 -0
- uqa/tests/test_sqlite_vector_index.py +309 -0
- uqa/tests/test_subquery.py +231 -0
- uqa/tests/test_transaction.py +513 -0
- uqa/tests/test_update_delete.py +402 -0
- uqa/tests/test_views.py +253 -0
- uqa/tests/test_window.py +290 -0
- uqa-0.3.0.dist-info/METADATA +17 -0
- uqa-0.3.0.dist-info/RECORD +106 -0
- uqa-0.3.0.dist-info/WHEEL +5 -0
- uqa-0.3.0.dist-info/entry_points.txt +2 -0
- uqa-0.3.0.dist-info/licenses/LICENSE +661 -0
- uqa-0.3.0.dist-info/top_level.txt +1 -0
uqa/__init__.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Unified Query Algebra
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2023-2026 Cognica, Inc.
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
def __getattr__(name: str):
|
|
8
|
+
"""Lazy imports to avoid circular / missing-module errors during parallel development."""
|
|
9
|
+
_imports = {
|
|
10
|
+
"Engine": ("uqa.engine", "Engine"),
|
|
11
|
+
"QueryBuilder": ("uqa.api.query_builder", "QueryBuilder"),
|
|
12
|
+
"PostingList": ("uqa.core.posting_list", "PostingList"),
|
|
13
|
+
"Vertex": ("uqa.core.types", "Vertex"),
|
|
14
|
+
"Edge": ("uqa.core.types", "Edge"),
|
|
15
|
+
"GraphPattern": ("uqa.graph.pattern", "GraphPattern"),
|
|
16
|
+
}
|
|
17
|
+
if name in _imports:
|
|
18
|
+
import importlib
|
|
19
|
+
module_path, attr = _imports[name]
|
|
20
|
+
module = importlib.import_module(module_path)
|
|
21
|
+
return getattr(module, attr)
|
|
22
|
+
raise AttributeError(f"module 'uqa' has no attribute {name!r}")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
__version__ = "0.3.0"
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"Engine",
|
|
29
|
+
"QueryBuilder",
|
|
30
|
+
"PostingList",
|
|
31
|
+
"Vertex",
|
|
32
|
+
"Edge",
|
|
33
|
+
"GraphPattern",
|
|
34
|
+
]
|
uqa/api/__init__.py
ADDED
uqa/api/query_builder.py
ADDED
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Unified Query Algebra
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2023-2026 Cognica, Inc.
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from numpy.typing import NDArray
|
|
13
|
+
|
|
14
|
+
from uqa.core.types import PathExpr, Predicate
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from uqa.core.posting_list import GeneralizedPostingList, PostingList
|
|
18
|
+
from uqa.engine import Engine
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AggregateResult:
|
|
22
|
+
"""Result of an aggregation query."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, value: Any):
|
|
25
|
+
self.value = value
|
|
26
|
+
|
|
27
|
+
def __repr__(self) -> str:
|
|
28
|
+
return f"AggregateResult({self.value!r})"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FacetResult:
|
|
32
|
+
"""Result of a facet query: mapping of value -> count."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, counts: dict[Any, int]):
|
|
35
|
+
self.counts = counts
|
|
36
|
+
|
|
37
|
+
def __repr__(self) -> str:
|
|
38
|
+
return f"FacetResult({self.counts!r})"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class QueryBuilder:
|
|
42
|
+
"""Fluent API for constructing queries over the unified algebra (Section 13, Design Doc)."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, engine: Engine):
|
|
45
|
+
self._engine = engine
|
|
46
|
+
self._root: Any = None
|
|
47
|
+
|
|
48
|
+
# -- Term retrieval (Definition 3.1.1) --
|
|
49
|
+
|
|
50
|
+
def term(self, term: str, field: str | None = None) -> QueryBuilder:
|
|
51
|
+
from uqa.operators.primitive import TermOperator
|
|
52
|
+
|
|
53
|
+
op = TermOperator(term, field)
|
|
54
|
+
return self._chain(op)
|
|
55
|
+
|
|
56
|
+
# -- Vector search (Definitions 3.1.2, 3.1.3) --
|
|
57
|
+
|
|
58
|
+
def vector(
|
|
59
|
+
self, query: NDArray, threshold: float, field: str = "embedding"
|
|
60
|
+
) -> QueryBuilder:
|
|
61
|
+
from uqa.operators.primitive import VectorSimilarityOperator
|
|
62
|
+
|
|
63
|
+
op = VectorSimilarityOperator(query, threshold, field)
|
|
64
|
+
return self._chain(op)
|
|
65
|
+
|
|
66
|
+
def knn(self, query: NDArray, k: int, field: str = "embedding") -> QueryBuilder:
|
|
67
|
+
from uqa.operators.primitive import KNNOperator
|
|
68
|
+
|
|
69
|
+
op = KNNOperator(query, k, field)
|
|
70
|
+
return self._chain(op)
|
|
71
|
+
|
|
72
|
+
# -- Boolean algebra --
|
|
73
|
+
|
|
74
|
+
def and_(self, other: QueryBuilder) -> QueryBuilder:
|
|
75
|
+
from uqa.operators.boolean import IntersectOperator
|
|
76
|
+
|
|
77
|
+
if self._root is None or other._root is None:
|
|
78
|
+
raise ValueError("Both builders must have operators before combining")
|
|
79
|
+
op = IntersectOperator([self._root, other._root])
|
|
80
|
+
qb = QueryBuilder(self._engine)
|
|
81
|
+
qb._root = op
|
|
82
|
+
return qb
|
|
83
|
+
|
|
84
|
+
def or_(self, other: QueryBuilder) -> QueryBuilder:
|
|
85
|
+
from uqa.operators.boolean import UnionOperator
|
|
86
|
+
|
|
87
|
+
if self._root is None or other._root is None:
|
|
88
|
+
raise ValueError("Both builders must have operators before combining")
|
|
89
|
+
op = UnionOperator([self._root, other._root])
|
|
90
|
+
qb = QueryBuilder(self._engine)
|
|
91
|
+
qb._root = op
|
|
92
|
+
return qb
|
|
93
|
+
|
|
94
|
+
def not_(self) -> QueryBuilder:
|
|
95
|
+
from uqa.operators.boolean import ComplementOperator
|
|
96
|
+
|
|
97
|
+
if self._root is None:
|
|
98
|
+
raise ValueError("Builder must have an operator before negation")
|
|
99
|
+
op = ComplementOperator(self._root)
|
|
100
|
+
qb = QueryBuilder(self._engine)
|
|
101
|
+
qb._root = op
|
|
102
|
+
return qb
|
|
103
|
+
|
|
104
|
+
# -- Filter (Definition 3.1.4) --
|
|
105
|
+
|
|
106
|
+
def filter(self, field: str, predicate: Predicate) -> QueryBuilder:
|
|
107
|
+
from uqa.operators.primitive import FilterOperator
|
|
108
|
+
|
|
109
|
+
op = FilterOperator(field, predicate, self._root)
|
|
110
|
+
qb = QueryBuilder(self._engine)
|
|
111
|
+
qb._root = op
|
|
112
|
+
return qb
|
|
113
|
+
|
|
114
|
+
# -- Joins (Section 4, Paper 1) --
|
|
115
|
+
|
|
116
|
+
def join(
|
|
117
|
+
self,
|
|
118
|
+
other: QueryBuilder,
|
|
119
|
+
left_field: str,
|
|
120
|
+
right_field: str,
|
|
121
|
+
) -> QueryBuilder:
|
|
122
|
+
from uqa.joins.base import JoinCondition
|
|
123
|
+
from uqa.joins.inner import InnerJoinOperator
|
|
124
|
+
|
|
125
|
+
if self._root is None or other._root is None:
|
|
126
|
+
raise ValueError("Both builders must have operators before joining")
|
|
127
|
+
condition = JoinCondition(left_field, right_field)
|
|
128
|
+
op = InnerJoinOperator(self._root, other._root, condition)
|
|
129
|
+
qb = QueryBuilder(self._engine)
|
|
130
|
+
qb._root = op
|
|
131
|
+
return qb
|
|
132
|
+
|
|
133
|
+
def vector_join(
|
|
134
|
+
self,
|
|
135
|
+
other: QueryBuilder,
|
|
136
|
+
left_field: str,
|
|
137
|
+
right_field: str,
|
|
138
|
+
threshold: float,
|
|
139
|
+
) -> QueryBuilder:
|
|
140
|
+
from uqa.joins.cross_paradigm import VectorSimilarityJoinOperator
|
|
141
|
+
|
|
142
|
+
if self._root is None or other._root is None:
|
|
143
|
+
raise ValueError("Both builders must have operators before joining")
|
|
144
|
+
op = VectorSimilarityJoinOperator(
|
|
145
|
+
self._root, other._root, left_field, right_field, threshold
|
|
146
|
+
)
|
|
147
|
+
qb = QueryBuilder(self._engine)
|
|
148
|
+
qb._root = op
|
|
149
|
+
return qb
|
|
150
|
+
|
|
151
|
+
# -- Graph operations (Paper 2) --
|
|
152
|
+
|
|
153
|
+
def traverse(
|
|
154
|
+
self, start: int, label: str | None = None, max_hops: int = 1
|
|
155
|
+
) -> QueryBuilder:
|
|
156
|
+
from uqa.graph.operators import TraverseOperator
|
|
157
|
+
|
|
158
|
+
op = TraverseOperator(start, label, max_hops)
|
|
159
|
+
return self._chain(op)
|
|
160
|
+
|
|
161
|
+
def match_pattern(self, pattern: Any) -> QueryBuilder:
|
|
162
|
+
from uqa.graph.operators import PatternMatchOperator
|
|
163
|
+
|
|
164
|
+
op = PatternMatchOperator(pattern)
|
|
165
|
+
return self._chain(op)
|
|
166
|
+
|
|
167
|
+
def rpq(self, expr: str, start: int | None = None) -> QueryBuilder:
|
|
168
|
+
from uqa.graph.pattern import parse_rpq
|
|
169
|
+
from uqa.graph.operators import RegularPathQueryOperator
|
|
170
|
+
|
|
171
|
+
path_expr = parse_rpq(expr)
|
|
172
|
+
op = RegularPathQueryOperator(path_expr, start_vertex=start)
|
|
173
|
+
return self._chain(op)
|
|
174
|
+
|
|
175
|
+
# -- Aggregation (Section 5.1, Paper 1) --
|
|
176
|
+
|
|
177
|
+
def aggregate(self, field: str, agg: str) -> AggregateResult:
|
|
178
|
+
from uqa.operators.aggregation import (
|
|
179
|
+
AggregateOperator,
|
|
180
|
+
AvgMonoid,
|
|
181
|
+
CountMonoid,
|
|
182
|
+
MaxMonoid,
|
|
183
|
+
MinMonoid,
|
|
184
|
+
SumMonoid,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
monoid_map = {
|
|
188
|
+
"count": CountMonoid,
|
|
189
|
+
"sum": SumMonoid,
|
|
190
|
+
"avg": AvgMonoid,
|
|
191
|
+
"min": MinMonoid,
|
|
192
|
+
"max": MaxMonoid,
|
|
193
|
+
}
|
|
194
|
+
monoid_cls = monoid_map.get(agg.lower())
|
|
195
|
+
if monoid_cls is None:
|
|
196
|
+
raise ValueError(f"Unknown aggregation: {agg}")
|
|
197
|
+
|
|
198
|
+
monoid = monoid_cls()
|
|
199
|
+
agg_op = AggregateOperator(self._root, field, monoid)
|
|
200
|
+
ctx = self._engine._build_context()
|
|
201
|
+
result_pl = agg_op.execute(ctx)
|
|
202
|
+
|
|
203
|
+
if result_pl and len(result_pl) > 0:
|
|
204
|
+
entry = next(iter(result_pl))
|
|
205
|
+
return AggregateResult(entry.payload.fields.get("_aggregate"))
|
|
206
|
+
return AggregateResult(monoid.finalize(monoid.identity()))
|
|
207
|
+
|
|
208
|
+
def facet(self, field: str) -> FacetResult:
|
|
209
|
+
from uqa.operators.primitive import FacetOperator
|
|
210
|
+
|
|
211
|
+
op = FacetOperator(field, self._root)
|
|
212
|
+
ctx = self._engine._build_context()
|
|
213
|
+
result_pl = op.execute(ctx)
|
|
214
|
+
|
|
215
|
+
counts: dict[Any, int] = {}
|
|
216
|
+
for entry in result_pl:
|
|
217
|
+
val = entry.payload.fields.get("_facet_value")
|
|
218
|
+
count = int(entry.payload.fields.get("_facet_count", 0))
|
|
219
|
+
if val is not None:
|
|
220
|
+
counts[val] = count
|
|
221
|
+
return FacetResult(counts)
|
|
222
|
+
|
|
223
|
+
# -- Hierarchical (Section 5.2-5.3, Paper 1) --
|
|
224
|
+
|
|
225
|
+
def path_filter(self, path: PathExpr, predicate: Predicate) -> QueryBuilder:
|
|
226
|
+
from uqa.operators.hierarchical import PathFilterOperator
|
|
227
|
+
|
|
228
|
+
op = PathFilterOperator(path, predicate, self._root)
|
|
229
|
+
qb = QueryBuilder(self._engine)
|
|
230
|
+
qb._root = op
|
|
231
|
+
return qb
|
|
232
|
+
|
|
233
|
+
def path_project(self, *paths: PathExpr) -> QueryBuilder:
|
|
234
|
+
from uqa.operators.hierarchical import PathProjectOperator
|
|
235
|
+
|
|
236
|
+
op = PathProjectOperator(list(paths), self._root)
|
|
237
|
+
qb = QueryBuilder(self._engine)
|
|
238
|
+
qb._root = op
|
|
239
|
+
return qb
|
|
240
|
+
|
|
241
|
+
def unnest(self, path: PathExpr) -> QueryBuilder:
|
|
242
|
+
from uqa.operators.hierarchical import PathUnnestOperator
|
|
243
|
+
|
|
244
|
+
op = PathUnnestOperator(path, self._root)
|
|
245
|
+
qb = QueryBuilder(self._engine)
|
|
246
|
+
qb._root = op
|
|
247
|
+
return qb
|
|
248
|
+
|
|
249
|
+
# -- Scoring --
|
|
250
|
+
|
|
251
|
+
def score_bm25(self, query: str) -> QueryBuilder:
|
|
252
|
+
from uqa.operators.primitive import ScoreOperator
|
|
253
|
+
from uqa.scoring.bm25 import BM25Params, BM25Scorer
|
|
254
|
+
|
|
255
|
+
terms = query.lower().split()
|
|
256
|
+
ctx = self._engine._build_context()
|
|
257
|
+
scorer = BM25Scorer(BM25Params(), ctx.inverted_index.stats)
|
|
258
|
+
op = ScoreOperator(scorer, self._root, terms)
|
|
259
|
+
qb = QueryBuilder(self._engine)
|
|
260
|
+
qb._root = op
|
|
261
|
+
return qb
|
|
262
|
+
|
|
263
|
+
def score_bayesian_bm25(self, query: str) -> QueryBuilder:
|
|
264
|
+
from uqa.operators.primitive import ScoreOperator
|
|
265
|
+
from uqa.scoring.bayesian_bm25 import BayesianBM25Params, BayesianBM25Scorer
|
|
266
|
+
|
|
267
|
+
terms = query.lower().split()
|
|
268
|
+
ctx = self._engine._build_context()
|
|
269
|
+
scorer = BayesianBM25Scorer(BayesianBM25Params(), ctx.inverted_index.stats)
|
|
270
|
+
op = ScoreOperator(scorer, self._root, terms)
|
|
271
|
+
qb = QueryBuilder(self._engine)
|
|
272
|
+
qb._root = op
|
|
273
|
+
return qb
|
|
274
|
+
|
|
275
|
+
# -- Fusion (Paper 4) --
|
|
276
|
+
|
|
277
|
+
def fuse_log_odds(
|
|
278
|
+
self, *builders: QueryBuilder, alpha: float = 0.5
|
|
279
|
+
) -> QueryBuilder:
|
|
280
|
+
from uqa.fusion.log_odds import LogOddsFusion
|
|
281
|
+
from uqa.operators.boolean import UnionOperator
|
|
282
|
+
|
|
283
|
+
fusion = LogOddsFusion(confidence_alpha=alpha)
|
|
284
|
+
ctx = self._engine._build_context()
|
|
285
|
+
|
|
286
|
+
sources = []
|
|
287
|
+
for b in builders:
|
|
288
|
+
if b._root is not None:
|
|
289
|
+
sources.append(b)
|
|
290
|
+
|
|
291
|
+
if not sources:
|
|
292
|
+
return self
|
|
293
|
+
|
|
294
|
+
qb = QueryBuilder(self._engine)
|
|
295
|
+
qb._root = _FusionOperator(fusion, [b._root for b in sources])
|
|
296
|
+
return qb
|
|
297
|
+
|
|
298
|
+
def fuse_prob_and(self, *builders: QueryBuilder) -> QueryBuilder:
|
|
299
|
+
from uqa.fusion.boolean import ProbabilisticBoolean
|
|
300
|
+
|
|
301
|
+
qb = QueryBuilder(self._engine)
|
|
302
|
+
qb._root = _ProbBooleanOperator("and", [b._root for b in builders if b._root])
|
|
303
|
+
return qb
|
|
304
|
+
|
|
305
|
+
def fuse_prob_or(self, *builders: QueryBuilder) -> QueryBuilder:
|
|
306
|
+
from uqa.fusion.boolean import ProbabilisticBoolean
|
|
307
|
+
|
|
308
|
+
qb = QueryBuilder(self._engine)
|
|
309
|
+
qb._root = _ProbBooleanOperator("or", [b._root for b in builders if b._root])
|
|
310
|
+
return qb
|
|
311
|
+
|
|
312
|
+
# -- Execution --
|
|
313
|
+
|
|
314
|
+
def execute(self) -> PostingList:
|
|
315
|
+
from uqa.planner.executor import PlanExecutor
|
|
316
|
+
from uqa.planner.optimizer import QueryOptimizer
|
|
317
|
+
from uqa.core.posting_list import PostingList
|
|
318
|
+
|
|
319
|
+
if self._root is None:
|
|
320
|
+
return PostingList()
|
|
321
|
+
|
|
322
|
+
ctx = self._engine._build_context()
|
|
323
|
+
optimizer = QueryOptimizer(ctx.inverted_index.stats)
|
|
324
|
+
optimized = optimizer.optimize(self._root)
|
|
325
|
+
|
|
326
|
+
executor = PlanExecutor(ctx)
|
|
327
|
+
return executor.execute(optimized)
|
|
328
|
+
|
|
329
|
+
def explain(self) -> str:
|
|
330
|
+
from uqa.planner.executor import PlanExecutor
|
|
331
|
+
from uqa.planner.optimizer import QueryOptimizer
|
|
332
|
+
|
|
333
|
+
if self._root is None:
|
|
334
|
+
return "(empty query)"
|
|
335
|
+
|
|
336
|
+
ctx = self._engine._build_context()
|
|
337
|
+
optimizer = QueryOptimizer(ctx.inverted_index.stats)
|
|
338
|
+
optimized = optimizer.optimize(self._root)
|
|
339
|
+
|
|
340
|
+
executor = PlanExecutor(ctx)
|
|
341
|
+
return executor.explain(optimized)
|
|
342
|
+
|
|
343
|
+
# -- Internal --
|
|
344
|
+
|
|
345
|
+
def _chain(self, op: Any) -> QueryBuilder:
|
|
346
|
+
if self._root is not None:
|
|
347
|
+
from uqa.operators.boolean import IntersectOperator
|
|
348
|
+
|
|
349
|
+
op = IntersectOperator([self._root, op])
|
|
350
|
+
qb = QueryBuilder(self._engine)
|
|
351
|
+
qb._root = op
|
|
352
|
+
return qb
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
class _FusionOperator:
|
|
356
|
+
"""Internal operator for log-odds fusion across multiple sub-queries."""
|
|
357
|
+
|
|
358
|
+
def __init__(self, fusion: Any, sources: list[Any]):
|
|
359
|
+
self.fusion = fusion
|
|
360
|
+
self.sources = sources
|
|
361
|
+
|
|
362
|
+
def execute(self, context: Any) -> Any:
|
|
363
|
+
from uqa.core.types import Payload, PostingEntry
|
|
364
|
+
from uqa.core.posting_list import PostingList
|
|
365
|
+
|
|
366
|
+
results = [src.execute(context) for src in self.sources]
|
|
367
|
+
|
|
368
|
+
doc_scores: dict[int, list[float]] = {}
|
|
369
|
+
for result in results:
|
|
370
|
+
for entry in result:
|
|
371
|
+
if entry.doc_id not in doc_scores:
|
|
372
|
+
doc_scores[entry.doc_id] = []
|
|
373
|
+
doc_scores[entry.doc_id].append(entry.payload.score)
|
|
374
|
+
|
|
375
|
+
entries = []
|
|
376
|
+
for doc_id, scores in doc_scores.items():
|
|
377
|
+
while len(scores) < len(results):
|
|
378
|
+
scores.append(0.5)
|
|
379
|
+
fused_score = self.fusion.fuse(scores)
|
|
380
|
+
entries.append(PostingEntry(doc_id, Payload(score=fused_score)))
|
|
381
|
+
|
|
382
|
+
return PostingList(entries)
|
|
383
|
+
|
|
384
|
+
def compose(self, other: Any) -> Any:
|
|
385
|
+
from uqa.operators.base import ComposedOperator
|
|
386
|
+
return ComposedOperator([self, other])
|
|
387
|
+
|
|
388
|
+
def cost_estimate(self, stats: Any) -> float:
|
|
389
|
+
return sum(
|
|
390
|
+
getattr(s, "cost_estimate", lambda _: 100.0)(stats)
|
|
391
|
+
for s in self.sources
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
class _ProbBooleanOperator:
|
|
396
|
+
"""Internal operator for probabilistic boolean fusion."""
|
|
397
|
+
|
|
398
|
+
def __init__(self, mode: str, sources: list[Any]):
|
|
399
|
+
self.mode = mode
|
|
400
|
+
self.sources = sources
|
|
401
|
+
|
|
402
|
+
def execute(self, context: Any) -> Any:
|
|
403
|
+
from uqa.core.types import Payload, PostingEntry
|
|
404
|
+
from uqa.core.posting_list import PostingList
|
|
405
|
+
from uqa.fusion.boolean import ProbabilisticBoolean
|
|
406
|
+
|
|
407
|
+
results = [src.execute(context) for src in self.sources]
|
|
408
|
+
|
|
409
|
+
doc_scores: dict[int, list[float]] = {}
|
|
410
|
+
for result in results:
|
|
411
|
+
for entry in result:
|
|
412
|
+
if entry.doc_id not in doc_scores:
|
|
413
|
+
doc_scores[entry.doc_id] = []
|
|
414
|
+
doc_scores[entry.doc_id].append(entry.payload.score)
|
|
415
|
+
|
|
416
|
+
entries = []
|
|
417
|
+
for doc_id, scores in doc_scores.items():
|
|
418
|
+
if self.mode == "and":
|
|
419
|
+
while len(scores) < len(results):
|
|
420
|
+
scores.append(0.0)
|
|
421
|
+
fused = ProbabilisticBoolean.prob_and(scores)
|
|
422
|
+
else:
|
|
423
|
+
fused = ProbabilisticBoolean.prob_or(scores)
|
|
424
|
+
entries.append(PostingEntry(doc_id, Payload(score=fused)))
|
|
425
|
+
|
|
426
|
+
return PostingList(entries)
|
|
427
|
+
|
|
428
|
+
def compose(self, other: Any) -> Any:
|
|
429
|
+
from uqa.operators.base import ComposedOperator
|
|
430
|
+
return ComposedOperator([self, other])
|
|
431
|
+
|
|
432
|
+
def cost_estimate(self, stats: Any) -> float:
|
|
433
|
+
return sum(
|
|
434
|
+
getattr(s, "cost_estimate", lambda _: 100.0)(stats)
|
|
435
|
+
for s in self.sources
|
|
436
|
+
)
|
uqa/core/__init__.py
ADDED
uqa/core/hierarchical.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Unified Query Algebra
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2023-2026 Cognica, Inc.
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from uqa.core.types import DocId, PathExpr, Payload, PostingEntry
|
|
12
|
+
from uqa.core.posting_list import PostingList
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class HierarchicalDocument:
|
|
16
|
+
"""Recursive document structure (Definition 5.2.1, Paper 1)."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, doc_id: DocId, data: dict | list | Any):
|
|
19
|
+
self.doc_id = doc_id
|
|
20
|
+
self.data = data
|
|
21
|
+
|
|
22
|
+
def eval_path(self, path: PathExpr) -> Any:
|
|
23
|
+
"""Path evaluation (Definition 5.2.3)."""
|
|
24
|
+
current = self.data
|
|
25
|
+
for component in path:
|
|
26
|
+
if isinstance(current, dict) and isinstance(component, str):
|
|
27
|
+
current = current.get(component)
|
|
28
|
+
elif isinstance(current, list) and isinstance(component, int):
|
|
29
|
+
current = current[component] if component < len(current) else None
|
|
30
|
+
else:
|
|
31
|
+
return None
|
|
32
|
+
if current is None:
|
|
33
|
+
return None
|
|
34
|
+
return current
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def project_paths(
|
|
38
|
+
doc: HierarchicalDocument, paths: list[PathExpr]
|
|
39
|
+
) -> dict[str, Any]:
|
|
40
|
+
"""Project a document to a subset of paths (Definition 5.3.2)."""
|
|
41
|
+
result: dict[str, Any] = {}
|
|
42
|
+
for path in paths:
|
|
43
|
+
key = ".".join(str(c) for c in path)
|
|
44
|
+
result[key] = doc.eval_path(path)
|
|
45
|
+
return result
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def unnest_array(
|
|
49
|
+
doc: HierarchicalDocument, path: PathExpr
|
|
50
|
+
) -> list[HierarchicalDocument]:
|
|
51
|
+
"""Unnest an array at a path into separate documents (Definition 5.3.4)."""
|
|
52
|
+
arr = doc.eval_path(path)
|
|
53
|
+
if not isinstance(arr, list):
|
|
54
|
+
return []
|
|
55
|
+
result: list[HierarchicalDocument] = []
|
|
56
|
+
for i, item in enumerate(arr):
|
|
57
|
+
nested = dict(doc.data) if isinstance(doc.data, dict) else {}
|
|
58
|
+
path_key = ".".join(str(c) for c in path)
|
|
59
|
+
nested[path_key + "._unnested"] = item
|
|
60
|
+
nested["_unnest_index"] = i
|
|
61
|
+
result.append(HierarchicalDocument(doc.doc_id, nested))
|
|
62
|
+
return result
|