vgi-python 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vgi/__init__.py CHANGED
@@ -39,6 +39,7 @@ from vgi.arguments import (
39
39
  Param,
40
40
  Returns,
41
41
  TableInput,
42
+ TaggedUnion,
42
43
  )
43
44
  from vgi.auth import AuthContext, CallContext
44
45
  from vgi.metadata import (
@@ -143,6 +144,7 @@ __all__ = [
143
144
  "TableInOutGenerator",
144
145
  "TableInput",
145
146
  "TableInputValidationError",
147
+ "TaggedUnion",
146
148
  "TypeMismatchError",
147
149
  "Worker",
148
150
  "functions_to_arrow",
@@ -61,7 +61,7 @@ class CountBatchArgs:
61
61
  """
62
62
 
63
63
  count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
64
- batch_size: Annotated[int, Arg("batch_size", default=1000, doc="Batch size for output", ge=1)]
64
+ batch_size: Annotated[int, Arg("batch_size", default=2048, doc="Batch size for output", ge=1)]
65
65
 
66
66
 
67
67
  @dataclass(slots=True, frozen=True)
@@ -91,7 +91,7 @@ class _BaseSequenceFunction(TableFunctionGenerator[Any, CountdownState]):
91
91
  NUMPY_DTYPE: ClassVar[type[np.generic]] = np.int64
92
92
  STATS_ARROW_TYPE: ClassVar[pa.DataType] = pa.int64()
93
93
  STATS_COLUMN_NAME: ClassVar[str] = "n"
94
- BATCH_SIZE_FALLBACK: ClassVar[int] = 1000
94
+ BATCH_SIZE_FALLBACK: ClassVar[int] = 2048
95
95
 
96
96
  @classmethod
97
97
  def initial_state(cls, params: ProcessParams[Any]) -> CountdownState:
@@ -320,6 +320,7 @@ _EXAMPLE_CATALOG = Catalog(
320
320
  default_schema="main",
321
321
  comment="Example VGI catalog for testing",
322
322
  tags={"source": "vgi-fixture-worker", "version": "1"},
323
+ source_url="https://github.com/query-farm/vgi-python",
323
324
  schemas=[
324
325
  Schema(
325
326
  name="main",
vgi/arguments.py CHANGED
@@ -159,6 +159,7 @@ __all__ = [
159
159
  "PYTHON_TO_ARROW",
160
160
  "Returns",
161
161
  "TableInput",
162
+ "TaggedUnion",
162
163
  "TypeBoundPredicate",
163
164
  "OutputLength",
164
165
  "Setting",
@@ -168,6 +169,58 @@ __all__ = [
168
169
  ]
169
170
 
170
171
 
172
+ @dataclass(frozen=True, slots=True)
173
+ class TaggedUnion:
174
+ """A decoded union-typed argument: which member is set (``tag``) and its ``value``.
175
+
176
+ DuckDB ``UNION`` / Arrow union arguments are *tagged*: the discriminator
177
+ (which member is present) lives in the Arrow ``UnionScalar.type_code``, not
178
+ in the member value. Plain ``Scalar.as_py()`` returns only the member value
179
+ and drops that tag, so union arguments are decoded into this wrapper
180
+ instead — ``tag`` is the active member's field name and ``value`` is its
181
+ Python value.
182
+
183
+ Example::
184
+
185
+ config: Annotated[TaggedUnion, Arg("config", arrow_type=pa.sparse_union([...]))]
186
+ ...
187
+ cfg = params.args.config # TaggedUnion(tag=..., value=...)
188
+ if cfg.tag == "random_forest_classifier":
189
+ grid = cfg.value # the member struct, as a dict
190
+
191
+ """
192
+
193
+ tag: str | None
194
+ value: Any
195
+
196
+
197
+ def _scalar_to_py(scalar: "Scalar[Any]") -> Any:
198
+ """Convert an argument scalar to a Python value, preserving union tags.
199
+
200
+ Identical to ``scalar.as_py()`` for every type except unions: a
201
+ ``UnionScalar`` is decoded to a [`TaggedUnion`][] so the member
202
+ discriminator (which ``as_py()`` discards) is retained.
203
+
204
+ Args:
205
+ scalar: The argument scalar to convert.
206
+
207
+ Returns:
208
+ ``scalar.as_py()`` for non-union scalars; a [`TaggedUnion`][] for unions.
209
+
210
+ """
211
+ if isinstance(scalar, pa.UnionScalar):
212
+ # Map the active ``type_code`` to its member field name via the union
213
+ # type's parallel ``type_codes`` / ``field()``. (``type_code`` is coerced
214
+ # to int — it is an integer at runtime regardless of the stub's typing.)
215
+ union_type = scalar.type
216
+ type_codes = list(union_type.type_codes)
217
+ code = int(scalar.type_code)
218
+ tag = union_type.field(type_codes.index(code)).name if code in type_codes else None
219
+ inner = scalar.value
220
+ return TaggedUnion(tag=tag, value=inner.as_py() if inner is not None else None)
221
+ return scalar.as_py()
222
+
223
+
171
224
  class TableInput:
172
225
  """Sentinel type for table input parameters in table-in-out functions.
173
226
 
@@ -377,7 +430,7 @@ class Arguments:
377
430
  else:
378
431
  raise TypeError(f"Argument '{key}': expected {type}, got {scalar.type}")
379
432
 
380
- return scalar.as_py()
433
+ return _scalar_to_py(scalar)
381
434
 
382
435
  def get_varargs(
383
436
  self,
@@ -410,7 +463,7 @@ class Arguments:
410
463
  if type is not None and scalar.type != type:
411
464
  raise TypeError(f"Argument {i}: expected {type}, got {scalar.type}")
412
465
 
413
- values.append(scalar.as_py())
466
+ values.append(_scalar_to_py(scalar))
414
467
 
415
468
  return tuple(values)
416
469
 
@@ -2319,6 +2319,7 @@ class ReadOnlyCatalogInterface(CatalogInterface):
2319
2319
  implementation_version=None,
2320
2320
  data_version_spec=None,
2321
2321
  attach_option_specs=[spec.serialize() for spec in self.attach_option_specs],
2322
+ source_url=self.catalog.source_url if self.catalog is not None else None,
2322
2323
  )
2323
2324
  ]
2324
2325
 
@@ -873,6 +873,10 @@ class Catalog:
873
873
  schemas: Sequence of Schema objects defining the catalog contents.
874
874
  comment: Optional comment describing the catalog.
875
875
  tags: Optional key-value tags associated with the catalog.
876
+ source_url: Where this worker's code lives — repo, build, or docs
877
+ homepage. ``None`` (the default) when the worker doesn't advertise
878
+ a source location. Surfaced via the ``catalog_catalogs()`` discovery
879
+ record (``CatalogInfo.source_url``).
876
880
 
877
881
  """
878
882
 
@@ -881,6 +885,7 @@ class Catalog:
881
885
  schemas: Sequence[Schema] = ()
882
886
  comment: str | None = None
883
887
  tags: dict[str, str] = field(default_factory=dict)
888
+ source_url: str | None = None
884
889
 
885
890
  def __post_init__(self) -> None:
886
891
  """Validate catalog configuration."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vgi-python
3
- Version: 0.8.2
3
+ Version: 0.8.4
4
4
  Summary: Vector Gateway Interface - Connect DuckDB to external programs via Apache Arrow
5
5
  Project-URL: Homepage, https://query.farm
6
6
  Project-URL: Repository, https://github.com/Query-farm/vgi-python
@@ -1,9 +1,9 @@
1
- vgi/__init__.py,sha256=RHimcHtz9s4swAt2q3qFTYBFGWSIpqi9ZXonaaqwuPk,3378
1
+ vgi/__init__.py,sha256=PRtFvXxhHEbY_0KVhyXIbGigZDYKHzMS-4gp0p6IJSQ,3414
2
2
  vgi/_duckdb.py,sha256=YB5D7N3Bwg_xP6X8a5QlumtlAovSej1A1Go5XlNGVko,2162
3
3
  vgi/_storage_profile.py,sha256=VkTsXojuE0tHEzurmteQSAiL1vI3CZSgYkL6D_h8GvE,5061
4
4
  vgi/aggregate_function.py,sha256=vn9TjQEHxAKJl_xzQOzdj5TY_6LplcZjv06JkQMnUyo,25184
5
5
  vgi/argument_spec.py,sha256=fVO17BDDfjnTMUrRoILNr2oFLTl4KKedMObFUk2GRrI,17072
6
- vgi/arguments.py,sha256=cnM9qsHlnUsyufSGVvQoCB4RjJGF1YfjExl1FMUlnJc,64940
6
+ vgi/arguments.py,sha256=02tIMGIR_cRS73u-bsgpFERxhje-rnTokjBgGsm9pQA,67019
7
7
  vgi/auth.py,sha256=3HD2zM-Mt0Ie-_HT5RorpND1OusUw2CPROjPNs7rgbo,1478
8
8
  vgi/exceptions.py,sha256=oX_sZc9xGWi7Xf8cJQf89fX19i3ocDEj_V_76GINgBQ,7294
9
9
  vgi/function.py,sha256=SLXA3qErHIsDsn4R0nm56z83Kcw12SMZMYXnPnCzhZg,8520
@@ -41,7 +41,7 @@ vgi/_test_fixtures/simple_writable.py,sha256=CGmDBUY8kthauEm8eJN2lV72cJ9_TRxOAQC
41
41
  vgi/_test_fixtures/table_in_out.py,sha256=7QckA3NJhYAYuSctcwZLul5yOM2V3KWvLuG_33K0B_w,50459
42
42
  vgi/_test_fixtures/versioned.py,sha256=Itm-x_Zt9WDwLGT4Dl4VzU5GtFF4HkcaJEqg9ErB8As,5784
43
43
  vgi/_test_fixtures/versioned_tables.py,sha256=KRllGGRrwH8JUtqH-tLHT1JL09rKN-EcEYZVeQdbaLs,22112
44
- vgi/_test_fixtures/worker.py,sha256=JPTVPONIoWL9VXN8mINoJH6_Puy1Byj5VOg_fsbw8ws,71171
44
+ vgi/_test_fixtures/worker.py,sha256=5G3shpiPoKWV_DP28UN1LYNp1wIJ2Q-TB8mJ3qok2q4,71230
45
45
  vgi/_test_fixtures/accumulate/__init__.py,sha256=4hYT8jqRoVHSjV9TB7v0Z1CMJtdLuPaDWSz4J2fvMDs,868
46
46
  vgi/_test_fixtures/accumulate/worker.py,sha256=yal9m-GjKNKUdLOLtwkCyFkeHVv_nnpUjh8amwueT48,30163
47
47
  vgi/_test_fixtures/aggregate/__init__.py,sha256=tjCVKdCuHlIAZL7uDi-o_q82oMieXsAyoKExesr-7a0,2156
@@ -71,7 +71,7 @@ vgi/_test_fixtures/scalar/type_info.py,sha256=2WeTxakT-_tcWybPfkCrAHVAMOadFN3tb8
71
71
  vgi/_test_fixtures/schema_reconcile/__init__.py,sha256=rCCtM5bd67-PTPeIYg9SCJaKUSglA6YeXsedQBEUlmA,1324
72
72
  vgi/_test_fixtures/schema_reconcile/worker.py,sha256=qkGRdKvI2AKItenlribd3cvUfvWUwPbAc2WrW7_7Ijc,23570
73
73
  vgi/_test_fixtures/table/__init__.py,sha256=PndeOVcsqi17XLwn0VnmPabjw3tFUfvOQFtETMOCjaU,7371
74
- vgi/_test_fixtures/table/_common.py,sha256=tO18gShWidcKcdHYn1FIEXDWzC2SwGsVMnA84r9Y3qs,5961
74
+ vgi/_test_fixtures/table/_common.py,sha256=9HYDW8aH7eD7V3CabTWTE7ZeVL_ELxUXdiJfuyuZjaQ,5961
75
75
  vgi/_test_fixtures/table/batch_index.py,sha256=P5ds0xgikuEQanSEWVWKMLbdvIzUeJraI-GuSoPdb6U,11641
76
76
  vgi/_test_fixtures/table/batch_index_broken.py,sha256=kZOGrLL7ZW1rmwPmNEYRmiF_vqIfHsfXioq5vKPWHk0,7314
77
77
  vgi/_test_fixtures/table/catalog_scans.py,sha256=5j1Sx02-HWK7bFurDu4e9HiS3Q9BmBukA3sAErH4GHE,5080
@@ -98,8 +98,8 @@ vgi/_test_fixtures/writable/worker.py,sha256=nH8KSZqyKv1gqdKn-_OFVh3h02FbyE0NFQ2
98
98
  vgi/catalog/__init__.py,sha256=SCpeP2TtPfhWhwaqK5qGmmtHCM2z-EL66OoCkyefy7c,2064
99
99
  vgi/catalog/_descriptor_spec.py,sha256=iMqId2fSBqlZ9HU6ct5AkYUAxtRGG_MVTLClDC1WPUs,7673
100
100
  vgi/catalog/attach_option.py,sha256=nLGxsfAFR_-NqDrw7v18dtkNNNQIYLr3fuhpVw4XhFc,1739
101
- vgi/catalog/catalog_interface.py,sha256=Ni3kkxXLQAtsQaDnRQMXih6r6Dcm6uACiHrQSE64SlY,120141
102
- vgi/catalog/descriptors.py,sha256=5LAP6xrWCebEG1EkD0lLNBYX-LBCcxylj_-q3SXxRA8,39047
101
+ vgi/catalog/catalog_interface.py,sha256=aCMSpYczcEb11TmOFdaGTN_F_yOywyR2go1Z6vC_0zY,120231
102
+ vgi/catalog/descriptors.py,sha256=oH-Ld02yQ5oUJb7J9sqGYwhZwaWzZiL3ZPtublmKXzc,39366
103
103
  vgi/catalog/duckdb_statistics.py,sha256=fARQupgq0fD46rDgxDXvdCFsgs-rvCOHhls7WXHmnFY,15615
104
104
  vgi/catalog/secret_type.py,sha256=MKtAypBa3xXyr-NC5CHjdX1R00JEnuMtvgboFWC2T9o,3336
105
105
  vgi/catalog/setting.py,sha256=06QfgaAR-0BKflIJ0du6PGqA5BPMdrkwr6u7f4nddII,1846
@@ -122,8 +122,8 @@ vgi/transactor/_duckdb_compat.py,sha256=sXVZ9JLKAQyGR1BjWczSwdQEavtr-TcZPoVZZnTr
122
122
  vgi/transactor/client.py,sha256=7DTeMksogsw6ANjQjGOPpKYrV76rg4_kGjktMJf54jg,4486
123
123
  vgi/transactor/protocol.py,sha256=Mtmll3CdrLFL1B4NY4NZUTO_yi3PT0qhvMQnzapuBWU,4780
124
124
  vgi/transactor/server.py,sha256=WpIqjzy2Mebw17Jui4-w7vyGEo9pD-pEZJG-3Ob1Sk8,29705
125
- vgi_python-0.8.2.dist-info/METADATA,sha256=N8axskbzq44l8JCTbwLv-g_bIEvNecVon3vkg-VwtlY,24725
126
- vgi_python-0.8.2.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
127
- vgi_python-0.8.2.dist-info/entry_points.txt,sha256=3Kz1vgodw3pOL_xjtSyDB55-ZRy-U2X-X_Bdr582x0Q,165
128
- vgi_python-0.8.2.dist-info/licenses/LICENSE,sha256=pbJb4zZasP6n5ifEV81wFu017TarjydaYVmGbHcehtY,6103
129
- vgi_python-0.8.2.dist-info/RECORD,,
125
+ vgi_python-0.8.4.dist-info/METADATA,sha256=AiJ8ZL_UJ96RP297iwcuoQsUlafMdGR1yr216r8apjA,24725
126
+ vgi_python-0.8.4.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
127
+ vgi_python-0.8.4.dist-info/entry_points.txt,sha256=3Kz1vgodw3pOL_xjtSyDB55-ZRy-U2X-X_Bdr582x0Q,165
128
+ vgi_python-0.8.4.dist-info/licenses/LICENSE,sha256=pbJb4zZasP6n5ifEV81wFu017TarjydaYVmGbHcehtY,6103
129
+ vgi_python-0.8.4.dist-info/RECORD,,