vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/otel.py ADDED
@@ -0,0 +1,406 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """VGI application-level OpenTelemetry and Sentry instrumentation.
4
+
5
+ Provides ``VgiTracer`` — a thin wrapper that enriches both OTel spans and
6
+ Sentry scopes with VGI-level attributes (function name, attach_opaque_data, etc.)
7
+ and creates ``vgi.execute.*`` per-batch records (OTel spans + Sentry
8
+ spans + Sentry breadcrumbs).
9
+
10
+ All OTel and Sentry imports are deferred to ``VgiTracer.create()`` so that
11
+ ``import vgi.otel`` works even when neither dependency is installed. When
12
+ both backends are disabled, all operations are zero-cost no-ops.
13
+
14
+ Despite the module name, this is the central instrumentation hook for both
15
+ backends. vgi-rpc's own Sentry auto-attach handles RPC-layer fields
16
+ (method name, server id, auth principal); the helpers here add VGI-layer
17
+ fields (function name, function type, attach id, transaction id, per-batch
18
+ row counts) on top.
19
+
20
+ Sentry spans: every exchange creates a child span ``op=vgi.execute``,
21
+ ``name=<function_name>`` under the active RPC transaction. Row counts and
22
+ byte sizes land as ``vgi.execute.*`` span attributes — searchable in Trace
23
+ Explorer (e.g. ``span.op:vgi.execute vgi.function.name:scan_orders ->
24
+ p99(span.duration) GROUP BY vgi.execute.input_rows``). Breadcrumbs are
25
+ still emitted for the Issues-side chronological view of recent batches
26
+ when an exception fires later. High-volume streams may hit Sentry's
27
+ per-transaction span cap (~1000); use ``traces_sample_rate`` to scale.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import sys
33
+ import time
34
+ from typing import TYPE_CHECKING, Any
35
+
36
+ if TYPE_CHECKING:
37
+ from vgi_rpc.otel import OtelConfig
38
+
39
+ __all__ = [
40
+ "VgiTracer",
41
+ "get_noop_tracer",
42
+ ]
43
+
44
+
45
+ def _sentry_active() -> bool:
46
+ """Return True when ``sentry_sdk`` is imported and initialised in this process.
47
+
48
+ The ``sys.modules`` check ensures we never force the optional dependency
49
+ on workers that have not opted into Sentry.
50
+ """
51
+ if "sentry_sdk" not in sys.modules:
52
+ return False
53
+ import sentry_sdk
54
+
55
+ return bool(sentry_sdk.is_initialized())
56
+
57
+
58
+ class _NoopSpan:
59
+ """No-op context manager + span. Zero imports."""
60
+
61
+ def __enter__(self) -> _NoopSpan:
62
+ return self
63
+
64
+ def __exit__(self, *a: object) -> None:
65
+ pass
66
+
67
+ def set_attribute(self, k: str, v: Any) -> None:
68
+ pass
69
+
70
+
71
+ _NOOP_SPAN = _NoopSpan()
72
+
73
+ _VGI_SCOPE = "vgi"
74
+
75
+
76
+ class VgiTracer:
77
+ """Wraps OTel tracer + meter or acts as no-op.
78
+
79
+ Use ``VgiTracer.create(otel_config)`` to build. When *otel_config* is
80
+ ``None``, returns the module-level ``_NOOP_TRACER`` singleton — all
81
+ methods become zero-cost no-ops.
82
+ """
83
+
84
+ __slots__ = (
85
+ "_enabled",
86
+ "_sentry_enabled",
87
+ "_tracer",
88
+ "_meter",
89
+ "_duration_histogram",
90
+ "_input_rows_counter",
91
+ "_output_rows_counter",
92
+ "_input_bytes_counter",
93
+ "_output_bytes_counter",
94
+ )
95
+
96
+ def __init__(self, *, enabled: bool = False, sentry_enabled: bool = False) -> None: # noqa: D107
97
+ self._enabled = enabled
98
+ self._sentry_enabled = sentry_enabled
99
+ self._tracer: Any = None
100
+ self._meter: Any = None
101
+ self._duration_histogram: Any = None
102
+ self._input_rows_counter: Any = None
103
+ self._output_rows_counter: Any = None
104
+ self._input_bytes_counter: Any = None
105
+ self._output_bytes_counter: Any = None
106
+
107
+ @staticmethod
108
+ def create(otel_config: OtelConfig | None) -> VgiTracer:
109
+ """Create a VgiTracer from an OtelConfig.
110
+
111
+ When *otel_config* is ``None`` and Sentry is not initialised, returns
112
+ the module-level noop tracer. When Sentry is initialised, returns a
113
+ tracer with Sentry enrichment active even if OTel is disabled, so VGI
114
+ scope context still flows into Sentry events.
115
+ """
116
+ sentry_enabled = _sentry_active()
117
+ if otel_config is None and not sentry_enabled:
118
+ return _NOOP_TRACER
119
+
120
+ if otel_config is None:
121
+ # Sentry-only tracer: no OTel state needed.
122
+ return VgiTracer(enabled=False, sentry_enabled=True)
123
+
124
+ from opentelemetry import metrics, trace
125
+
126
+ vt = VgiTracer(enabled=True, sentry_enabled=sentry_enabled)
127
+ vt._tracer = trace.get_tracer(_VGI_SCOPE)
128
+ vt._meter = metrics.get_meter(_VGI_SCOPE)
129
+
130
+ vt._duration_histogram = vt._meter.create_histogram(
131
+ name="vgi.function.duration",
132
+ description="User code processing time per batch",
133
+ unit="s",
134
+ )
135
+ vt._input_rows_counter = vt._meter.create_counter(
136
+ name="vgi.function.input_rows",
137
+ description="Total rows consumed",
138
+ )
139
+ vt._output_rows_counter = vt._meter.create_counter(
140
+ name="vgi.function.output_rows",
141
+ description="Total rows produced",
142
+ )
143
+ vt._input_bytes_counter = vt._meter.create_counter(
144
+ name="vgi.function.input_bytes",
145
+ description="Total logical bytes received",
146
+ )
147
+ vt._output_bytes_counter = vt._meter.create_counter(
148
+ name="vgi.function.output_bytes",
149
+ description="Total logical bytes sent",
150
+ )
151
+ return vt
152
+
153
+ @property
154
+ def enabled(self) -> bool:
155
+ """Return whether OTel instrumentation is active."""
156
+ return self._enabled
157
+
158
+ @property
159
+ def sentry_enabled(self) -> bool:
160
+ """Return whether Sentry enrichment is active."""
161
+ return self._sentry_enabled
162
+
163
+ def start_span(self, name: str, attributes: dict[str, Any] | None = None) -> Any:
164
+ """Start a child span. Returns ``_NOOP_SPAN`` when disabled."""
165
+ if not self._enabled:
166
+ return _NOOP_SPAN
167
+ return self._tracer.start_as_current_span(name, attributes=attributes)
168
+
169
+ def set_current_span_attributes(self, attributes: dict[str, Any]) -> None:
170
+ """Enrich the active OTel span and Sentry scope with VGI attributes.
171
+
172
+ Each non-``None`` value is set as both an OTel span attribute and
173
+ (when Sentry is initialised) a Sentry tag. Tags are merged into the
174
+ current scope, so calling this multiple times during a dispatch
175
+ accumulates context rather than overwriting it.
176
+ """
177
+ if not self._enabled and not self._sentry_enabled:
178
+ return
179
+ if self._enabled:
180
+ from opentelemetry import trace
181
+
182
+ span = trace.get_current_span()
183
+ for k, v in attributes.items():
184
+ if v is not None:
185
+ span.set_attribute(k, v)
186
+ if self._sentry_enabled:
187
+ import sentry_sdk
188
+
189
+ scope = sentry_sdk.get_current_scope()
190
+ for k, v in attributes.items():
191
+ if v is None:
192
+ continue
193
+ # Sentry tag values must be strings; bools render as
194
+ # ``"True"``/``"False"`` which is fine for searchable filters.
195
+ scope.set_tag(k, str(v))
196
+
197
+ def record_execute_metrics(
198
+ self,
199
+ *,
200
+ function_name: str,
201
+ function_type: str,
202
+ duration_s: float,
203
+ input_rows: int | None = None,
204
+ output_rows: int | None = None,
205
+ input_bytes: int | None = None,
206
+ output_bytes: int | None = None,
207
+ ) -> None:
208
+ """Record per-batch execution metrics."""
209
+ if not self._enabled:
210
+ return
211
+ labels = {"vgi.function.name": function_name, "vgi.function.type": function_type}
212
+ self._duration_histogram.record(duration_s, labels)
213
+ if input_rows is not None:
214
+ self._input_rows_counter.add(input_rows, labels)
215
+ if output_rows is not None:
216
+ self._output_rows_counter.add(output_rows, labels)
217
+ if input_bytes is not None:
218
+ self._input_bytes_counter.add(input_bytes, labels)
219
+ if output_bytes is not None:
220
+ self._output_bytes_counter.add(output_bytes, labels)
221
+
222
+
223
+ _NOOP_TRACER = VgiTracer(enabled=False)
224
+
225
+
226
+ def get_noop_tracer() -> VgiTracer:
227
+ """Return the module-level noop tracer singleton."""
228
+ return _NOOP_TRACER
229
+
230
+
231
+ def _batch_bytes(batch: Any) -> int:
232
+ """Return total buffer size of a RecordBatch, or 0 on failure."""
233
+ try:
234
+ return int(batch.get_total_buffer_size())
235
+ except Exception:
236
+ return 0
237
+
238
+
239
+ def _timed_exchange(
240
+ vgi_tracer: VgiTracer,
241
+ span_name: str,
242
+ function_name: str,
243
+ function_type: str,
244
+ execution_id: bytes | None,
245
+ ) -> _ExchangeTimer:
246
+ """Create an exchange timer for tracking per-batch metrics."""
247
+ return _ExchangeTimer(vgi_tracer, span_name, function_name, function_type, execution_id)
248
+
249
+
250
+ class _ExchangeTimer:
251
+ """Context manager that creates a span and records metrics for one exchange."""
252
+
253
+ __slots__ = (
254
+ "_vgi_tracer",
255
+ "_span_name",
256
+ "_function_name",
257
+ "_function_type",
258
+ "_execution_id",
259
+ "_span_ctx",
260
+ "_span",
261
+ "_sentry_span_ctx",
262
+ "_sentry_span",
263
+ "_start",
264
+ )
265
+
266
+ def __init__(
267
+ self,
268
+ vgi_tracer: VgiTracer,
269
+ span_name: str,
270
+ function_name: str,
271
+ function_type: str,
272
+ execution_id: bytes | None,
273
+ ) -> None:
274
+ self._vgi_tracer = vgi_tracer
275
+ self._span_name = span_name
276
+ self._function_name = function_name
277
+ self._function_type = function_type
278
+ self._execution_id = execution_id
279
+ self._span_ctx: Any = None
280
+ self._span: Any = None
281
+ self._sentry_span_ctx: Any = None
282
+ self._sentry_span: Any = None
283
+ self._start = 0.0
284
+
285
+ def __enter__(self) -> _ExchangeTimer:
286
+ if not self._vgi_tracer.enabled and not self._vgi_tracer.sentry_enabled:
287
+ return self
288
+ # Always start the wall clock so Sentry breadcrumbs report duration
289
+ # even in Sentry-only deployments without OTel.
290
+ self._start = time.monotonic()
291
+ if self._vgi_tracer.enabled:
292
+ attrs: dict[str, Any] = {
293
+ "vgi.function.name": self._function_name,
294
+ "vgi.function.type": self._function_type,
295
+ }
296
+ if self._execution_id is not None:
297
+ attrs["vgi.execute.execution_id"] = self._execution_id.hex()
298
+ self._span_ctx = self._vgi_tracer.start_span(self._span_name, attributes=attrs)
299
+ self._span = self._span_ctx.__enter__()
300
+ if self._vgi_tracer.sentry_enabled:
301
+ import sentry_sdk
302
+
303
+ # Child span under the RPC transaction's root span. ``op``
304
+ # groups all vgi user-code spans in Trace Explorer; ``name`` is
305
+ # the user function so per-function aggregations work directly.
306
+ # When tracing is sampled out (``traces_sample_rate=0``) Sentry
307
+ # returns a NoOpSpan that silently absorbs ``set_data``/__exit__.
308
+ self._sentry_span_ctx = sentry_sdk.start_span(
309
+ op="vgi.execute",
310
+ name=self._function_name,
311
+ )
312
+ self._sentry_span = self._sentry_span_ctx.__enter__()
313
+ self._sentry_span.set_data("vgi.function.name", self._function_name)
314
+ self._sentry_span.set_data("vgi.function.type", self._function_type)
315
+ if self._execution_id is not None:
316
+ self._sentry_span.set_data("vgi.execute.execution_id", self._execution_id.hex())
317
+ return self
318
+
319
+ def __exit__(self, exc_type: type[BaseException] | None, exc_val: BaseException | None, *a: object) -> None:
320
+ if self._span is not None:
321
+ from opentelemetry.trace import StatusCode
322
+
323
+ if exc_val is not None:
324
+ self._span.set_status(StatusCode.ERROR, str(exc_val))
325
+ self._span.record_exception(exc_val)
326
+ else:
327
+ self._span.set_status(StatusCode.OK)
328
+ if self._span_ctx is not None:
329
+ self._span_ctx.__exit__(exc_type, exc_val, *a)
330
+ if self._sentry_span is not None:
331
+ if exc_val is not None:
332
+ self._sentry_span.set_status("internal_error")
333
+ else:
334
+ self._sentry_span.set_status("ok")
335
+ if self._sentry_span_ctx is not None:
336
+ self._sentry_span_ctx.__exit__(exc_type, exc_val, *a)
337
+
338
+ def record(
339
+ self,
340
+ *,
341
+ input_rows: int | None = None,
342
+ output_rows: int | None = None,
343
+ input_bytes: int | None = None,
344
+ output_bytes: int | None = None,
345
+ ) -> None:
346
+ """Set span attributes and record metrics for this exchange."""
347
+ if not self._vgi_tracer.enabled and not self._vgi_tracer.sentry_enabled:
348
+ return
349
+ duration = time.monotonic() - self._start
350
+ if self._vgi_tracer.enabled:
351
+ if self._span is not None:
352
+ if input_rows is not None:
353
+ self._span.set_attribute("vgi.execute.input_rows", input_rows)
354
+ if output_rows is not None:
355
+ self._span.set_attribute("vgi.execute.output_rows", output_rows)
356
+ if input_bytes is not None:
357
+ self._span.set_attribute("vgi.execute.input_bytes", input_bytes)
358
+ if output_bytes is not None:
359
+ self._span.set_attribute("vgi.execute.output_bytes", output_bytes)
360
+ self._vgi_tracer.record_execute_metrics(
361
+ function_name=self._function_name,
362
+ function_type=self._function_type,
363
+ duration_s=duration,
364
+ input_rows=input_rows,
365
+ output_rows=output_rows,
366
+ input_bytes=input_bytes,
367
+ output_bytes=output_bytes,
368
+ )
369
+ if self._vgi_tracer.sentry_enabled:
370
+ import sentry_sdk
371
+
372
+ # Span attributes — searchable in Trace Explorer / Insights.
373
+ if self._sentry_span is not None:
374
+ if input_rows is not None:
375
+ self._sentry_span.set_data("vgi.execute.input_rows", input_rows)
376
+ if output_rows is not None:
377
+ self._sentry_span.set_data("vgi.execute.output_rows", output_rows)
378
+ if input_bytes is not None:
379
+ self._sentry_span.set_data("vgi.execute.input_bytes", input_bytes)
380
+ if output_bytes is not None:
381
+ self._sentry_span.set_data("vgi.execute.output_bytes", output_bytes)
382
+
383
+ # Breadcrumb — chronological diagnostic visible in the Issues
384
+ # panel if an exception fires later in the same transaction.
385
+ # Complementary to the span (which only shows in Performance).
386
+ data: dict[str, Any] = {
387
+ "function_name": self._function_name,
388
+ "function_type": self._function_type,
389
+ "duration_ms": round(duration * 1000.0, 3),
390
+ }
391
+ if self._execution_id is not None:
392
+ data["execution_id"] = self._execution_id.hex()
393
+ if input_rows is not None:
394
+ data["input_rows"] = input_rows
395
+ if output_rows is not None:
396
+ data["output_rows"] = output_rows
397
+ if input_bytes is not None:
398
+ data["input_bytes"] = input_bytes
399
+ if output_bytes is not None:
400
+ data["output_bytes"] = output_bytes
401
+ sentry_sdk.add_breadcrumb(
402
+ category="vgi.execute",
403
+ message=f"{self._function_name} batch",
404
+ level="info",
405
+ data=data,
406
+ )