vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/invocation.py ADDED
@@ -0,0 +1,154 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Response types for the VGI protocol.
4
+
5
+ This module defines response dataclasses and the FunctionType enum:
6
+
7
+ - FunctionType: Enum for scalar, table, and aggregate function types.
8
+ - BindResponse: Result of bind phase with output schema.
9
+ - BaseInitResponse: Base class for init responses.
10
+ - GlobalInitResponse: Result of init phase with max_workers.
11
+
12
+ Request types (BindRequest, InitRequest) are in ``vgi.protocol``.
13
+
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import uuid
19
+ from dataclasses import dataclass, field
20
+ from enum import Enum
21
+ from typing import Annotated
22
+
23
+ import pyarrow as pa
24
+ from vgi_rpc import ArrowSerializableDataclass, ArrowType
25
+
26
+ from vgi.arguments import SecretLookupEntry
27
+ from vgi.metadata import DEFAULT_MAX_WORKERS
28
+
29
+ __all__ = [
30
+ "FunctionType",
31
+ "BaseInitResponse",
32
+ "BindResponse",
33
+ "GlobalInitResponse",
34
+ ]
35
+
36
+
37
+ class FunctionType(Enum):
38
+ """Type of function being invoked.
39
+
40
+ Used in BindRequest to indicate which function category is being bound,
41
+ allowing the worker to apply appropriate validation and processing.
42
+
43
+ """
44
+
45
+ AGGREGATE = "aggregate"
46
+ SCALAR = "scalar"
47
+ TABLE = "table"
48
+ TABLE_BUFFERING = "table_buffering"
49
+
50
+
51
+ @dataclass(frozen=True, slots=True, kw_only=True)
52
+ class BindResponse(ArrowSerializableDataclass):
53
+ """The result of calling bind() on a function.
54
+
55
+ The bind result is created by calling bind() and importantly contains
56
+ the function's output characteristics. It is serialized and sent to the
57
+ client before any data processing begins.
58
+
59
+ When ``lookup_secret_types`` is non-empty, this is a **secret scope
60
+ request** rather than a normal bind response. C++ resolves the requested
61
+ secrets and retries bind with ``resolved_secrets_provided=True``. The
62
+ developer never constructs scope requests directly — the framework
63
+ generates them when ``SecretsAccessor`` has pending lookups after
64
+ ``on_bind()`` returns.
65
+
66
+ Attributes:
67
+ output_schema: Arrow schema describing the structure of output batches.
68
+ opaque_data: Serialized data that is opaque to the caller that must
69
+ be passed to any init() invocations.
70
+ lookup_secret_types: Secret types for scoped lookup requests (empty = normal response).
71
+ lookup_scopes: Scopes for scoped lookup requests (parallel to lookup_secret_types).
72
+ lookup_names: Names for scoped lookup requests (parallel to lookup_secret_types).
73
+
74
+ """
75
+
76
+ output_schema: Annotated[pa.Schema, ArrowType(pa.binary())]
77
+ # Wire-facing field — the bytes are produced by the framework calling
78
+ # ``.serialize_to_bytes()`` on the typed ``BindResult.opaque_data`` at
79
+ # the bind→response boundary (see vgi.scalar_function /
80
+ # vgi.table_function / vgi.table_in_out_function). Consumers
81
+ # reconstruct via ``MyConcreteDataclass.deserialize_from_bytes(raw)``;
82
+ # the abstract-base typed-roundtrip can't be done in Python without a
83
+ # class registry, so we kept the wire honest about being bytes.
84
+ opaque_data: Annotated[bytes | None, ArrowType(pa.binary())] = None
85
+ lookup_secret_types: list[str] = field(default_factory=list)
86
+ lookup_scopes: list[str] = field(default_factory=list)
87
+ lookup_names: list[str] = field(default_factory=list)
88
+
89
+ @property
90
+ def is_secret_scope_request(self) -> bool:
91
+ """True if this is a secret scope request, not a normal bind response."""
92
+ return len(self.lookup_secret_types) > 0
93
+
94
+ @staticmethod
95
+ def secret_scope_request(entries: list[SecretLookupEntry]) -> BindResponse:
96
+ """Create a secret scope request from lookup entries.
97
+
98
+ The framework calls this when ``SecretsAccessor`` has pending lookups.
99
+ C++ detects the non-empty ``lookup_secret_types`` and resolves them.
100
+ """
101
+ return BindResponse(
102
+ output_schema=pa.schema([]),
103
+ lookup_secret_types=[e.secret_type for e in entries],
104
+ lookup_scopes=[e.scope or "" for e in entries],
105
+ lookup_names=[e.secret_name or "" for e in entries],
106
+ )
107
+
108
+ def secret_scope_entries(self) -> list[SecretLookupEntry]:
109
+ """Convert lookup fields back to SecretLookupEntry objects."""
110
+ return [
111
+ SecretLookupEntry(
112
+ secret_type=t,
113
+ scope=s or None,
114
+ secret_name=n or None,
115
+ )
116
+ for t, s, n in zip(
117
+ self.lookup_secret_types,
118
+ self.lookup_scopes,
119
+ self.lookup_names,
120
+ strict=True,
121
+ )
122
+ ]
123
+
124
+
125
+ @dataclass(frozen=True, slots=True, kw_only=True)
126
+ class BaseInitResponse(ArrowSerializableDataclass):
127
+ """The result of calling init() on a function.
128
+
129
+ Attributes:
130
+ execution_id: A unique id for the function execution.
131
+ opaque_data: Serialized data that is opaque to the caller that must
132
+ be passed to any init() invocations.
133
+
134
+ """
135
+
136
+ execution_id: bytes = field(default_factory=lambda: uuid.uuid4().bytes)
137
+ # Wire-facing field — see comment on ``BindResponse.opaque_data``
138
+ # above for the typed-producer / bytes-wire / explicit-consumer
139
+ # contract.
140
+ opaque_data: Annotated[bytes | None, ArrowType(pa.binary())] = None
141
+
142
+
143
+ @dataclass(frozen=True, slots=True, kw_only=True)
144
+ class GlobalInitResponse(BaseInitResponse):
145
+ """The result of calling init() on a function.
146
+
147
+ Attributes:
148
+ max_workers: The maximum number of worker processes that may be
149
+ used for this function execution. This allows the function to control
150
+ parallelism.
151
+
152
+ """
153
+
154
+ max_workers: int = DEFAULT_MAX_WORKERS
vgi/logging_config.py ADDED
@@ -0,0 +1,93 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Shared logging configuration for VGI worker CLIs.
4
+
5
+ Provides enums, known-logger registry, and a configure function that
6
+ mirrors the vgi_rpc CLI logging setup so that ``--debug``, ``--log-level``,
7
+ ``--log-logger``, and ``--log-format`` behave identically across all
8
+ VGI workers.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ import sys
15
+ from enum import StrEnum
16
+
17
+
18
+ class LogLevel(StrEnum):
19
+ """Python logging level for ``--log-level``."""
20
+
21
+ DEBUG = "DEBUG"
22
+ INFO = "INFO"
23
+ WARNING = "WARNING"
24
+ ERROR = "ERROR"
25
+
26
+
27
+ class LogFormat(StrEnum):
28
+ """Stderr log format for ``--log-format``."""
29
+
30
+ text = "text"
31
+ json = "json"
32
+
33
+
34
+ # (name, description, typical-scenario)
35
+ _KNOWN_LOGGERS: list[tuple[str, str, str]] = [
36
+ ("vgi", "VGI root logger", "all VGI messages"),
37
+ ("vgi.worker", "Worker lifecycle", "startup, shutdown"),
38
+ ("vgi.client", "Client operations", "spawn, bind, exchange"),
39
+ ("vgi.client.cli", "CLI front-end", "argument parsing"),
40
+ ("vgi.filter_pushdown", "Filter pushdown debug", "filter deserialization / evaluation"),
41
+ ("vgi_rpc", "vgi_rpc root logger", "all vgi_rpc messages"),
42
+ ("vgi_rpc.access", "RPC access log (enriched by VGI)", "per-request structured access log"),
43
+ ("vgi_rpc.wire.request", "RPC wire request", "serialised request bytes"),
44
+ ("vgi_rpc.wire.response", "RPC wire response", "serialised response bytes"),
45
+ ("vgi_rpc.wire.transport", "Transport layer", "pipe / HTTP transport debug"),
46
+ ]
47
+
48
+
49
+ def configure_worker_logging(
50
+ *,
51
+ debug: bool = False,
52
+ log_level: LogLevel = LogLevel.INFO,
53
+ log_loggers: list[str] | None = None,
54
+ log_format: LogFormat = LogFormat.text,
55
+ ) -> int:
56
+ """Configure stdlib logging for a VGI worker process.
57
+
58
+ Args:
59
+ debug: If True, force DEBUG on all default loggers (overrides *log_level*).
60
+ log_level: Logging level when *debug* is False.
61
+ log_loggers: Logger names to configure. Defaults to ``["vgi", "vgi_rpc"]``.
62
+ log_format: Stderr output format (``text`` or ``json``).
63
+
64
+ Returns:
65
+ The effective numeric log level.
66
+
67
+ """
68
+ effective_level = logging.DEBUG if debug else getattr(logging, log_level.value)
69
+
70
+ handler = logging.StreamHandler(sys.stderr)
71
+
72
+ if log_format == LogFormat.json:
73
+ from vgi_rpc.logging_utils import VgiJsonFormatter
74
+
75
+ handler.setFormatter(VgiJsonFormatter())
76
+ else:
77
+ handler.setFormatter(
78
+ logging.Formatter("%(asctime)s %(name)-30s %(levelname)-5s %(message)s", datefmt="%H:%M:%S")
79
+ )
80
+
81
+ targets = log_loggers if log_loggers else ["vgi", "vgi_rpc"]
82
+
83
+ known_names = {name for name, _, _ in _KNOWN_LOGGERS}
84
+ for name in targets:
85
+ if name not in known_names:
86
+ # Still configure it — the user may know what they're doing
87
+ sys.stderr.write(f"warning: unknown logger {name!r}\n")
88
+ logger = logging.getLogger(name)
89
+ logger.handlers.clear()
90
+ logger.setLevel(effective_level)
91
+ logger.addHandler(handler)
92
+
93
+ return effective_level