vcti-dataflow 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ Copyright (c) 2018-2026 Visual Collaboration Technologies Inc.
2
+ All Rights Reserved.
3
+
4
+ This software is proprietary and confidential. Unauthorized copying,
5
+ distribution, or use of this software, via any medium, is strictly
6
+ prohibited. Access is granted only to authorized VCollab developers
7
+ and individuals explicitly authorized by Visual Collaboration
8
+ Technologies Inc.
@@ -0,0 +1,147 @@
1
+ Metadata-Version: 2.4
2
+ Name: vcti-dataflow
3
+ Version: 2.0.0
4
+ Summary: The DataNode binding of vcti-flow: sources, transformers, reducers, and combiners for vcti-datanode payloads.
5
+ Author: Visual Collaboration Technologies Inc.
6
+ Requires-Python: <3.15,>=3.12
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: vcti-flow>=2.0.0
10
+ Requires-Dist: vcti-datanode>=2.0.0
11
+ Requires-Dist: numpy>=1.24
12
+ Provides-Extra: test
13
+ Requires-Dist: pytest; extra == "test"
14
+ Requires-Dist: pytest-cov; extra == "test"
15
+ Provides-Extra: lint
16
+ Requires-Dist: ruff; extra == "lint"
17
+ Provides-Extra: typecheck
18
+ Requires-Dist: mypy; extra == "typecheck"
19
+ Dynamic: license-file
20
+
21
+ # Data Flow
22
+
23
+ The DataNode binding of vcti-flow: sources, transformers, reducers, and combiners for vcti-datanode payloads.
24
+
25
+ ## Overview
26
+
27
+ [vcti-flow](https://github.com/vcollab/vcti-python-flow) is a payload-agnostic
28
+ framework for composing flow graphs — it never inspects the values flowing
29
+ through it. `vcti.flow.data` binds that framework to the
30
+ [vcti-datanode](https://github.com/vcollab/vcti-python-datanode) `DataNode`
31
+ payload (data plus layered attributes behind a data source), so you get
32
+ familiar, ready-bound node kinds — `Source`, `Transformer`, `Reducer`, `Sink` —
33
+ instead of writing `Source[DataNode]` everywhere, plus `from_array` (eager) and
34
+ `ArraySource` (lazy) for building payloads.
35
+
36
+ These node-kind names are the `vcti.flow` kinds bound to `DataNode`: in this
37
+ package `Source` *is* `Source[DataNode]`. It is a flow leaf node — distinct from
38
+ `vcti.datanode.DataSource`, the array-source ABC re-exported here as
39
+ `EagerDataSource` / `LazyDataSource`.
40
+
41
+ Nodes that need a *structured* (named-field) array — field-wise merge and
42
+ row-keyed iteration — live in the `vcti.flow.data.fields` submodule; the base
43
+ binding makes no assumption about array shape.
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ pip install vcti-dataflow
49
+ ```
50
+
51
+ ### In `pyproject.toml` dependencies
52
+
53
+ ```toml
54
+ dependencies = [
55
+ "vcti-dataflow>=2.0.0",
56
+ ]
57
+ ```
58
+
59
+ ---
60
+
61
+ ## Quick Start
62
+
63
+ ```python
64
+ import numpy as np
65
+ from vcti.flow.data import Source, Transformer, DataNode, from_array
66
+
67
+ # A source produces a DataNode
68
+ class Stress(Source):
69
+ def load(self) -> DataNode:
70
+ return from_array(np.array([1.0, 2.0, 3.0]), {"units": "MPa"})
71
+
72
+ # A transformer maps one DataNode to another
73
+ class Scale(Transformer):
74
+ def __init__(self, factor: float) -> None:
75
+ super().__init__()
76
+ self.factor = factor
77
+
78
+ def transform(self, record: DataNode) -> DataNode:
79
+ return from_array(record.load() * self.factor, record.attributes)
80
+
81
+ result = Scale(2.0).connect(Stress()).execute()
82
+ result.load() # array([2., 4., 6.])
83
+ result.attributes["units"] # "MPa"
84
+ ```
85
+
86
+ Reach for the array only when you need it (`record.load()`); a leaf source can
87
+ hand back a `LazyDataSource`-backed node to defer a heavy read.
88
+
89
+ ### Structured-array nodes
90
+
91
+ The `vcti.flow.data.fields` submodule adds nodes that assume a structured
92
+ (named-field) array — building tables, naming/selecting/computing fields,
93
+ merging field groups, and row-keyed iteration:
94
+
95
+ ```python
96
+ from vcti.flow.data import ArraySource # lazy leaf source (base binding)
97
+ from vcti.flow.data.fields import (
98
+ RowTableSource, NameFields, SelectFields, ComputeFields,
99
+ RenameFields, DropFields, CastFields, MergeFields, for_each_field,
100
+ )
101
+
102
+ # Build a structured table from dict rows (lazy — rows read on load())
103
+ mats = RowTableSource(lambda: material_rows(reader),
104
+ columns={"id": "MAT_ID", "EX": "Young's Modulus"})
105
+
106
+ # Name plain columns, select/rename, compute
107
+ coords = NameFields(["X", "Y", "Z"]).connect(ArraySource(lambda: reader.coords()))
108
+ picked = SelectFields({"X": "x", "Y": "y"}).connect(coords)
109
+ mag = ComputeFields({"mag": lambda a: np.hypot(a["X"], a["Y"])}).connect(coords)
110
+
111
+ # Rename, drop, cast (rename & drop return views — no data copy)
112
+ renamed = RenameFields({"X": "x"}).connect(coords) # rename, keep the rest
113
+ trimmed = DropFields(["Z"]).connect(coords) # drop, keep the rest
114
+ narrow = CastFields({"X": "f4"}).connect(coords) # change dtypes
115
+
116
+ # Merge field groups (same row count) into one structured array
117
+ combined = MergeFields().connect(ids).connect(coords).execute()
118
+
119
+ # One flow per row, keyed by a field
120
+ for case_id, flow in for_each_field(cases, build_case_flow, key_field="ID"):
121
+ flow.execute()
122
+ ```
123
+
124
+ ---
125
+
126
+ ## API
127
+
128
+ | Symbol | Purpose |
129
+ |--------|---------|
130
+ | `Source` / `Transformer` / `Reducer` / `Sink` | `vcti.flow` node kinds bound to `DataNode` |
131
+ | `Observer` | `vcti.flow` observer bound to `DataNode` |
132
+ | `from_array(array, attributes=None)` | Build a `DataNode` from an in-memory array (eager) |
133
+ | `ArraySource(load_fn, attributes=None)` | Lazy leaf source over a callable returning an array (the lazy counterpart of `from_array`) |
134
+ | `DataNode` / `EagerDataSource` / `LazyDataSource` | Re-exported from `vcti-datanode` for convenience |
135
+ | `fields.RowTableSource` | Lazy leaf source — a structured table from dict rows |
136
+ | `fields.NameFields` / `fields.SelectFields` / `fields.ComputeFields` | Name plain columns, select/rename fields, append/replace computed fields |
137
+ | `fields.RenameFields` / `fields.DropFields` / `fields.CastFields` | Rename or drop fields (views, no copy), or change field dtypes |
138
+ | `fields.MergeFields` | Field-wise merge of structured arrays |
139
+ | `fields.for_each_field` / `fields.field_items` | Row-keyed fan-out over a structured array |
140
+
141
+ ---
142
+
143
+ ## Dependencies
144
+
145
+ - [vcti-flow](https://github.com/vcollab/vcti-python-flow) (>=2.0.0) — the generic framework
146
+ - [vcti-datanode](https://github.com/vcollab/vcti-python-datanode) (>=2.0.0) — the `DataNode` payload
147
+ - [numpy](https://numpy.org/) (>=1.24)
@@ -0,0 +1,127 @@
1
+ # Data Flow
2
+
3
+ The DataNode binding of vcti-flow: sources, transformers, reducers, and combiners for vcti-datanode payloads.
4
+
5
+ ## Overview
6
+
7
+ [vcti-flow](https://github.com/vcollab/vcti-python-flow) is a payload-agnostic
8
+ framework for composing flow graphs — it never inspects the values flowing
9
+ through it. `vcti.flow.data` binds that framework to the
10
+ [vcti-datanode](https://github.com/vcollab/vcti-python-datanode) `DataNode`
11
+ payload (data plus layered attributes behind a data source), so you get
12
+ familiar, ready-bound node kinds — `Source`, `Transformer`, `Reducer`, `Sink` —
13
+ instead of writing `Source[DataNode]` everywhere, plus `from_array` (eager) and
14
+ `ArraySource` (lazy) for building payloads.
15
+
16
+ These node-kind names are the `vcti.flow` kinds bound to `DataNode`: in this
17
+ package `Source` *is* `Source[DataNode]`. It is a flow leaf node — distinct from
18
+ `vcti.datanode.DataSource`, the array-source ABC re-exported here as
19
+ `EagerDataSource` / `LazyDataSource`.
20
+
21
+ Nodes that need a *structured* (named-field) array — field-wise merge and
22
+ row-keyed iteration — live in the `vcti.flow.data.fields` submodule; the base
23
+ binding makes no assumption about array shape.
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ pip install vcti-dataflow
29
+ ```
30
+
31
+ ### In `pyproject.toml` dependencies
32
+
33
+ ```toml
34
+ dependencies = [
35
+ "vcti-dataflow>=2.0.0",
36
+ ]
37
+ ```
38
+
39
+ ---
40
+
41
+ ## Quick Start
42
+
43
+ ```python
44
+ import numpy as np
45
+ from vcti.flow.data import Source, Transformer, DataNode, from_array
46
+
47
+ # A source produces a DataNode
48
+ class Stress(Source):
49
+ def load(self) -> DataNode:
50
+ return from_array(np.array([1.0, 2.0, 3.0]), {"units": "MPa"})
51
+
52
+ # A transformer maps one DataNode to another
53
+ class Scale(Transformer):
54
+ def __init__(self, factor: float) -> None:
55
+ super().__init__()
56
+ self.factor = factor
57
+
58
+ def transform(self, record: DataNode) -> DataNode:
59
+ return from_array(record.load() * self.factor, record.attributes)
60
+
61
+ result = Scale(2.0).connect(Stress()).execute()
62
+ result.load() # array([2., 4., 6.])
63
+ result.attributes["units"] # "MPa"
64
+ ```
65
+
66
+ Reach for the array only when you need it (`record.load()`); a leaf source can
67
+ hand back a `LazyDataSource`-backed node to defer a heavy read.
68
+
69
+ ### Structured-array nodes
70
+
71
+ The `vcti.flow.data.fields` submodule adds nodes that assume a structured
72
+ (named-field) array — building tables, naming/selecting/computing fields,
73
+ merging field groups, and row-keyed iteration:
74
+
75
+ ```python
76
+ from vcti.flow.data import ArraySource # lazy leaf source (base binding)
77
+ from vcti.flow.data.fields import (
78
+ RowTableSource, NameFields, SelectFields, ComputeFields,
79
+ RenameFields, DropFields, CastFields, MergeFields, for_each_field,
80
+ )
81
+
82
+ # Build a structured table from dict rows (lazy — rows read on load())
83
+ mats = RowTableSource(lambda: material_rows(reader),
84
+ columns={"id": "MAT_ID", "EX": "Young's Modulus"})
85
+
86
+ # Name plain columns, select/rename, compute
87
+ coords = NameFields(["X", "Y", "Z"]).connect(ArraySource(lambda: reader.coords()))
88
+ picked = SelectFields({"X": "x", "Y": "y"}).connect(coords)
89
+ mag = ComputeFields({"mag": lambda a: np.hypot(a["X"], a["Y"])}).connect(coords)
90
+
91
+ # Rename, drop, cast (rename & drop return views — no data copy)
92
+ renamed = RenameFields({"X": "x"}).connect(coords) # rename, keep the rest
93
+ trimmed = DropFields(["Z"]).connect(coords) # drop, keep the rest
94
+ narrow = CastFields({"X": "f4"}).connect(coords) # change dtypes
95
+
96
+ # Merge field groups (same row count) into one structured array
97
+ combined = MergeFields().connect(ids).connect(coords).execute()
98
+
99
+ # One flow per row, keyed by a field
100
+ for case_id, flow in for_each_field(cases, build_case_flow, key_field="ID"):
101
+ flow.execute()
102
+ ```
103
+
104
+ ---
105
+
106
+ ## API
107
+
108
+ | Symbol | Purpose |
109
+ |--------|---------|
110
+ | `Source` / `Transformer` / `Reducer` / `Sink` | `vcti.flow` node kinds bound to `DataNode` |
111
+ | `Observer` | `vcti.flow` observer bound to `DataNode` |
112
+ | `from_array(array, attributes=None)` | Build a `DataNode` from an in-memory array (eager) |
113
+ | `ArraySource(load_fn, attributes=None)` | Lazy leaf source over a callable returning an array (the lazy counterpart of `from_array`) |
114
+ | `DataNode` / `EagerDataSource` / `LazyDataSource` | Re-exported from `vcti-datanode` for convenience |
115
+ | `fields.RowTableSource` | Lazy leaf source — a structured table from dict rows |
116
+ | `fields.NameFields` / `fields.SelectFields` / `fields.ComputeFields` | Name plain columns, select/rename fields, append/replace computed fields |
117
+ | `fields.RenameFields` / `fields.DropFields` / `fields.CastFields` | Rename or drop fields (views, no copy), or change field dtypes |
118
+ | `fields.MergeFields` | Field-wise merge of structured arrays |
119
+ | `fields.for_each_field` / `fields.field_items` | Row-keyed fan-out over a structured array |
120
+
121
+ ---
122
+
123
+ ## Dependencies
124
+
125
+ - [vcti-flow](https://github.com/vcollab/vcti-python-flow) (>=2.0.0) — the generic framework
126
+ - [vcti-datanode](https://github.com/vcollab/vcti-python-datanode) (>=2.0.0) — the `DataNode` payload
127
+ - [numpy](https://numpy.org/) (>=1.24)
@@ -0,0 +1,64 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "vcti-dataflow"
7
+ version = "2.0.0"
8
+ description = "The DataNode binding of vcti-flow: sources, transformers, reducers, and combiners for vcti-datanode payloads."
9
+ readme = "README.md"
10
+ authors = [
11
+ {name = "Visual Collaboration Technologies Inc."}
12
+ ]
13
+ requires-python = ">=3.12,<3.15"
14
+ dependencies = [
15
+ "vcti-flow>=2.0.0",
16
+ "vcti-datanode>=2.0.0",
17
+ "numpy>=1.24",
18
+ ]
19
+
20
+ [tool.setuptools.packages.find]
21
+ where = ["src"]
22
+ include = ["vcti.flow.data", "vcti.flow.data.*"]
23
+ namespaces = true
24
+
25
+ [tool.setuptools.package-data]
26
+ "vcti.flow.data" = ["py.typed"]
27
+
28
+ [project.optional-dependencies]
29
+ test = ["pytest", "pytest-cov"]
30
+ lint = ["ruff"]
31
+ typecheck = ["mypy"]
32
+
33
+ [tool.setuptools]
34
+ zip-safe = true
35
+
36
+ [tool.pytest.ini_options]
37
+ addopts = "--cov=vcti.flow.data --cov-report=term-missing --cov-fail-under=95"
38
+
39
+ [tool.mypy]
40
+ python_version = "3.12"
41
+ strict = true
42
+ files = ["src"]
43
+ namespace_packages = true
44
+ explicit_package_bases = true
45
+ mypy_path = ["src"]
46
+
47
+ [tool.coverage.run]
48
+ branch = true
49
+
50
+ [tool.coverage.report]
51
+ exclude_also = [
52
+ "raise NotImplementedError",
53
+ "if TYPE_CHECKING:",
54
+ "if __name__ == .__main__.:",
55
+ "@(abc\\.)?abstractmethod",
56
+ "\\.\\.\\.",
57
+ ]
58
+
59
+ [tool.ruff]
60
+ target-version = "py312"
61
+ line-length = 99
62
+
63
+ [tool.ruff.lint]
64
+ select = ["E", "F", "W", "I", "UP"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,40 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """vcti.flow.data — the DataNode binding of vcti.flow.
4
+
5
+ DataNode-bound node kinds (``Source``, ``Transformer``, ``Reducer``, ``Sink``,
6
+ ``Observer``) over the generic ``vcti.flow`` framework, plus ``from_array`` (eager)
7
+ and ``ArraySource`` (lazy) for building ``DataNode`` payloads. Structured-array-
8
+ specific nodes (field merge, row iteration, field shaping) live in the
9
+ ``vcti.flow.data.fields`` submodule.
10
+ """
11
+
12
+ from importlib.metadata import version
13
+
14
+ from vcti.datanode import DataNode, EagerDataSource, LazyDataSource
15
+
16
+ from .aliases import (
17
+ Observer,
18
+ Reducer,
19
+ Sink,
20
+ Source,
21
+ Transformer,
22
+ )
23
+ from .record import from_array
24
+ from .sources import ArraySource
25
+
26
+ __version__ = version("vcti-dataflow")
27
+
28
+ __all__ = [
29
+ "__version__",
30
+ "ArraySource",
31
+ "DataNode",
32
+ "EagerDataSource",
33
+ "LazyDataSource",
34
+ "Observer",
35
+ "Reducer",
36
+ "Sink",
37
+ "Source",
38
+ "Transformer",
39
+ "from_array",
40
+ ]
@@ -0,0 +1,27 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """DataNode-bound aliases for the vcti.flow node kinds.
4
+
5
+ These bind the generic ``vcti.flow`` framework to the ``DataNode`` payload, so
6
+ authors subclass ``Source`` / ``Transformer`` / … — the ``vcti.flow.data``
7
+ spelling of ``Source[DataNode]`` / ``Transformer[DataNode, DataNode]`` — instead
8
+ of repeating the type parameter everywhere. They are plain assignments (not PEP
9
+ 695 ``type`` aliases) so they remain usable as base classes.
10
+
11
+ The names intentionally shadow the generic ``vcti.flow.core`` kinds: within this
12
+ binding ``Source`` *is* ``Source[DataNode]``. Note this ``Source`` is a flow leaf
13
+ node and is unrelated to ``vcti.datanode.DataSource`` — the array-source ABC
14
+ behind the re-exported ``EagerDataSource`` / ``LazyDataSource``. Dropping the
15
+ ``Data`` prefix is what keeps that ``DataSource`` name from clashing here.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import vcti.flow.core as core
21
+ from vcti.datanode import DataNode
22
+
23
+ Source = core.Source[DataNode]
24
+ Transformer = core.Transformer[DataNode, DataNode]
25
+ Reducer = core.Reducer[DataNode, DataNode]
26
+ Sink = core.Sink[DataNode]
27
+ Observer = core.Observer[DataNode]
@@ -0,0 +1,41 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """vcti.flow.data.fields — nodes for structured-array DataNode payloads.
4
+
5
+ These assume the DataNode's array is a structured (named-field) array. The base
6
+ ``vcti.flow.data`` binding makes no such assumption (its ``ArraySource`` /
7
+ ``from_array`` build shape-agnostic payloads).
8
+
9
+ - Source: ``RowTableSource`` (table from mapping rows) — lazy.
10
+ - Transformers: ``NameFields`` (name plain columns), ``SelectFields`` (select /
11
+ rename), ``RenameFields`` (rename, keep the rest), ``DropFields`` (drop a
12
+ subset), ``CastFields`` (change dtypes), ``ComputeFields`` (append / replace
13
+ computed fields).
14
+ - Reduce: ``MergeFields`` (field-wise merge).
15
+ - Iterate: ``for_each_field`` / ``field_items`` (row-keyed fan-out).
16
+ """
17
+
18
+ from .iterate import field_items, for_each_field
19
+ from .merge import MergeFields
20
+ from .sources import RowTableSource
21
+ from .transforms import (
22
+ CastFields,
23
+ ComputeFields,
24
+ DropFields,
25
+ NameFields,
26
+ RenameFields,
27
+ SelectFields,
28
+ )
29
+
30
+ __all__ = [
31
+ "CastFields",
32
+ "ComputeFields",
33
+ "DropFields",
34
+ "MergeFields",
35
+ "NameFields",
36
+ "RenameFields",
37
+ "RowTableSource",
38
+ "SelectFields",
39
+ "field_items",
40
+ "for_each_field",
41
+ ]
@@ -0,0 +1,48 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """Row iteration over a structured-array DataNode, as a flow combinator."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from collections.abc import Callable, Iterable, Iterator
8
+ from typing import Any
9
+
10
+ from vcti.datanode import DataNode
11
+ from vcti.flow.core import Node, for_each
12
+
13
+
14
+ def field_items(key_field: str = "ID") -> Callable[[DataNode], Iterable[Any]]:
15
+ """Return an items-extractor that reads *key_field* from each row.
16
+
17
+ Suitable as the ``items`` argument to ``vcti.flow.core.for_each``. The
18
+ extractor loads the node's array and yields the value of *key_field* per row;
19
+ an absent or empty array yields nothing.
20
+
21
+ Raises:
22
+ ValueError: If the array is structured and lacks *key_field*.
23
+ """
24
+
25
+ def extract(record: DataNode) -> list[Any]:
26
+ arr = record.load()
27
+ if arr is None or arr.shape[0] == 0:
28
+ return []
29
+ if arr.dtype.names is not None and key_field not in arr.dtype.names:
30
+ raise ValueError(
31
+ f"Key field {key_field!r} not found in source fields {arr.dtype.names}."
32
+ )
33
+ return [row[key_field] for row in arr]
34
+
35
+ return extract
36
+
37
+
38
+ def for_each_field[U](
39
+ source: Node[DataNode],
40
+ factory: Callable[[Any], Node[U]],
41
+ key_field: str = "ID",
42
+ ) -> Iterator[tuple[Any, Node[U]]]:
43
+ """Fan a keys DataNode out into one flow per row, keyed by *key_field*.
44
+
45
+ A DataNode-specific convenience over ``vcti.flow.core.for_each``: it yields
46
+ ``(key, flow)`` pairs, one per row of the source's structured array.
47
+ """
48
+ return for_each(source, field_items(key_field), factory)
@@ -0,0 +1,53 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """MergeFields — merge structured-array DataNodes field-wise."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any
8
+
9
+ from numpy.lib import recfunctions as rfn
10
+ from vcti.datanode import DataNode
11
+
12
+ from ..aliases import Reducer
13
+ from ..record import from_array
14
+
15
+
16
+ class MergeFields(Reducer):
17
+ """Merge multiple DataNodes into one by combining fields horizontally.
18
+
19
+ Arrays are merged field-wise with ``numpy.lib.recfunctions.merge_arrays``.
20
+ All inputs must have the same number of rows. Attributes merge with
21
+ **last-wins** semantics. Inputs with no data are skipped; if no input has
22
+ data, an empty (metadata-only) DataNode is returned.
23
+
24
+ Raises:
25
+ ValueError: If the input arrays have different row counts.
26
+ """
27
+
28
+ def reduce(self, records: list[DataNode]) -> DataNode:
29
+ loaded = [(d, arr) for d in records if d.has_data and (arr := d.load()) is not None]
30
+ if not loaded:
31
+ return from_array()
32
+
33
+ arrays = [arr for _, arr in loaded]
34
+ if len(arrays) > 1:
35
+ first_length = len(arrays[0])
36
+ for i, arr in enumerate(arrays[1:], start=1):
37
+ if len(arr) != first_length:
38
+ raise ValueError(
39
+ f"Cannot merge arrays with different lengths. Array 0 has "
40
+ f"{first_length} rows, but array {i} has {len(arr)} rows."
41
+ )
42
+
43
+ if len(arrays) == 1:
44
+ merged = arrays[0]
45
+ else:
46
+ merged = rfn.merge_arrays(arrays, flatten=True, usemask=False)
47
+
48
+ combined: dict[str, Any] = {}
49
+ for d, _ in loaded:
50
+ if d.attributes:
51
+ combined.update(d.attributes)
52
+
53
+ return from_array(merged, combined or None)
@@ -0,0 +1,94 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """Leaf source that builds a structured-array DataNode from mapping rows (lazily)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from collections.abc import Callable, Mapping, Sequence
8
+ from typing import Any
9
+
10
+ import numpy as np
11
+ from vcti.datanode import DataNode, LazyDataSource
12
+
13
+ from ..aliases import Source
14
+
15
+
16
+ def _column_from_rows(rows: Sequence[Mapping[str, Any]], src_key: str) -> np.ndarray:
17
+ """Build one structured-array column from a sequence of mapping rows.
18
+
19
+ Gathers ``row[src_key]`` across rows (absent / ``None`` = missing) and picks
20
+ a dtype from the present scalar values: all-int → ``i8`` (missing 0),
21
+ all-str/bytes → ``U<maxlen>`` (missing ""), otherwise ``f8`` (missing NaN,
22
+ non-numeric coerced to NaN).
23
+ """
24
+ n = len(rows)
25
+ present = [(i, r[src_key]) for i, r in enumerate(rows) if r.get(src_key) is not None]
26
+ values = [v for _, v in present]
27
+
28
+ def _is_int(v: Any) -> bool:
29
+ return isinstance(v, (int, np.integer)) and not isinstance(v, bool)
30
+
31
+ def _is_str(v: Any) -> bool:
32
+ return isinstance(v, (str, bytes, np.bytes_, np.str_))
33
+
34
+ if values and all(_is_int(v) for v in values):
35
+ int_col = np.zeros(n, dtype="i8")
36
+ for i, v in present:
37
+ int_col[i] = int(v)
38
+ return int_col
39
+
40
+ if values and all(_is_str(v) for v in values):
41
+ decoded = [
42
+ v.decode("utf-8", "replace").rstrip("\x00")
43
+ if isinstance(v, (bytes, np.bytes_))
44
+ else str(v)
45
+ for v in values
46
+ ]
47
+ width = max((len(s) for s in decoded), default=1) or 1
48
+ str_col = np.zeros(n, dtype=f"U{width}")
49
+ for (i, _), s in zip(present, decoded):
50
+ str_col[i] = s
51
+ return str_col
52
+
53
+ float_col = np.full(n, np.nan, dtype="f8")
54
+ for i, v in present:
55
+ try:
56
+ float_col[i] = float(v)
57
+ except (TypeError, ValueError):
58
+ pass # non-scalar / non-numeric stays NaN
59
+ return float_col
60
+
61
+
62
+ class RowTableSource(Source):
63
+ """Build a structured-array DataNode from heterogeneous mapping rows.
64
+
65
+ ``columns`` selects and renames source keys into output fields; each
66
+ column's dtype is inferred from its present values, with missing entries
67
+ filled (0 / "" / NaN). ``rows_fn`` runs lazily — only when the resulting
68
+ node is loaded, not at ``execute()`` time.
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ rows_fn: Callable[[], Sequence[Mapping[str, Any]]],
74
+ columns: list[str] | dict[str, str],
75
+ *,
76
+ name: str | None = None,
77
+ attributes: Mapping[str, Any] | None = None,
78
+ ) -> None:
79
+ super().__init__(name=name)
80
+ self._rows_fn = rows_fn
81
+ self._mapping = dict(columns) if isinstance(columns, dict) else {c: c for c in columns}
82
+ self._attributes = attributes
83
+
84
+ def load(self) -> DataNode:
85
+ def build() -> np.ndarray:
86
+ rows = list(self._rows_fn())
87
+ cols = {out: _column_from_rows(rows, src) for src, out in self._mapping.items()}
88
+ dtype = np.dtype([(out, cols[out].dtype) for out in self._mapping.values()])
89
+ table = np.empty(len(rows), dtype=dtype)
90
+ for out in self._mapping.values():
91
+ table[out] = cols[out]
92
+ return table
93
+
94
+ return DataNode(data_source=LazyDataSource(build), source_attributes=self._attributes)