vcti-dataflow 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """vcti.flow.data — the DataNode binding of vcti.flow.
4
+
5
+ DataNode-bound node kinds (``Source``, ``Transformer``, ``Reducer``, ``Sink``,
6
+ ``Observer``) over the generic ``vcti.flow`` framework, plus ``from_array`` (eager)
7
+ and ``ArraySource`` (lazy) for building ``DataNode`` payloads. Structured-array-
8
+ specific nodes (field merge, row iteration, field shaping) live in the
9
+ ``vcti.flow.data.fields`` submodule.
10
+ """
11
+
12
+ from importlib.metadata import version
13
+
14
+ from vcti.datanode import DataNode, EagerDataSource, LazyDataSource
15
+
16
+ from .aliases import (
17
+ Observer,
18
+ Reducer,
19
+ Sink,
20
+ Source,
21
+ Transformer,
22
+ )
23
+ from .record import from_array
24
+ from .sources import ArraySource
25
+
26
+ __version__ = version("vcti-dataflow")
27
+
28
+ __all__ = [
29
+ "__version__",
30
+ "ArraySource",
31
+ "DataNode",
32
+ "EagerDataSource",
33
+ "LazyDataSource",
34
+ "Observer",
35
+ "Reducer",
36
+ "Sink",
37
+ "Source",
38
+ "Transformer",
39
+ "from_array",
40
+ ]
@@ -0,0 +1,27 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """DataNode-bound aliases for the vcti.flow node kinds.
4
+
5
+ These bind the generic ``vcti.flow`` framework to the ``DataNode`` payload, so
6
+ authors subclass ``Source`` / ``Transformer`` / … — the ``vcti.flow.data``
7
+ spelling of ``Source[DataNode]`` / ``Transformer[DataNode, DataNode]`` — instead
8
+ of repeating the type parameter everywhere. They are plain assignments (not PEP
9
+ 695 ``type`` aliases) so they remain usable as base classes.
10
+
11
+ The names intentionally shadow the generic ``vcti.flow.core`` kinds: within this
12
+ binding ``Source`` *is* ``Source[DataNode]``. Note this ``Source`` is a flow leaf
13
+ node and is unrelated to ``vcti.datanode.DataSource`` — the array-source ABC
14
+ behind the re-exported ``EagerDataSource`` / ``LazyDataSource``. Dropping the
15
+ ``Data`` prefix is what keeps that ``DataSource`` name from clashing here.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import vcti.flow.core as core
21
+ from vcti.datanode import DataNode
22
+
23
+ Source = core.Source[DataNode]
24
+ Transformer = core.Transformer[DataNode, DataNode]
25
+ Reducer = core.Reducer[DataNode, DataNode]
26
+ Sink = core.Sink[DataNode]
27
+ Observer = core.Observer[DataNode]
@@ -0,0 +1,41 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """vcti.flow.data.fields — nodes for structured-array DataNode payloads.
4
+
5
+ These assume the DataNode's array is a structured (named-field) array. The base
6
+ ``vcti.flow.data`` binding makes no such assumption (its ``ArraySource`` /
7
+ ``from_array`` build shape-agnostic payloads).
8
+
9
+ - Source: ``RowTableSource`` (table from mapping rows) — lazy.
10
+ - Transformers: ``NameFields`` (name plain columns), ``SelectFields`` (select /
11
+ rename), ``RenameFields`` (rename, keep the rest), ``DropFields`` (drop a
12
+ subset), ``CastFields`` (change dtypes), ``ComputeFields`` (append / replace
13
+ computed fields).
14
+ - Reduce: ``MergeFields`` (field-wise merge).
15
+ - Iterate: ``for_each_field`` / ``field_items`` (row-keyed fan-out).
16
+ """
17
+
18
+ from .iterate import field_items, for_each_field
19
+ from .merge import MergeFields
20
+ from .sources import RowTableSource
21
+ from .transforms import (
22
+ CastFields,
23
+ ComputeFields,
24
+ DropFields,
25
+ NameFields,
26
+ RenameFields,
27
+ SelectFields,
28
+ )
29
+
30
+ __all__ = [
31
+ "CastFields",
32
+ "ComputeFields",
33
+ "DropFields",
34
+ "MergeFields",
35
+ "NameFields",
36
+ "RenameFields",
37
+ "RowTableSource",
38
+ "SelectFields",
39
+ "field_items",
40
+ "for_each_field",
41
+ ]
@@ -0,0 +1,48 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """Row iteration over a structured-array DataNode, as a flow combinator."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from collections.abc import Callable, Iterable, Iterator
8
+ from typing import Any
9
+
10
+ from vcti.datanode import DataNode
11
+ from vcti.flow.core import Node, for_each
12
+
13
+
14
+ def field_items(key_field: str = "ID") -> Callable[[DataNode], Iterable[Any]]:
15
+ """Return an items-extractor that reads *key_field* from each row.
16
+
17
+ Suitable as the ``items`` argument to ``vcti.flow.core.for_each``. The
18
+ extractor loads the node's array and yields the value of *key_field* per row;
19
+ an absent or empty array yields nothing.
20
+
21
+ Raises:
22
+ ValueError: If the array is structured and lacks *key_field*.
23
+ """
24
+
25
+ def extract(record: DataNode) -> list[Any]:
26
+ arr = record.load()
27
+ if arr is None or arr.shape[0] == 0:
28
+ return []
29
+ if arr.dtype.names is not None and key_field not in arr.dtype.names:
30
+ raise ValueError(
31
+ f"Key field {key_field!r} not found in source fields {arr.dtype.names}."
32
+ )
33
+ return [row[key_field] for row in arr]
34
+
35
+ return extract
36
+
37
+
38
+ def for_each_field[U](
39
+ source: Node[DataNode],
40
+ factory: Callable[[Any], Node[U]],
41
+ key_field: str = "ID",
42
+ ) -> Iterator[tuple[Any, Node[U]]]:
43
+ """Fan a keys DataNode out into one flow per row, keyed by *key_field*.
44
+
45
+ A DataNode-specific convenience over ``vcti.flow.core.for_each``: it yields
46
+ ``(key, flow)`` pairs, one per row of the source's structured array.
47
+ """
48
+ return for_each(source, field_items(key_field), factory)
@@ -0,0 +1,53 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """MergeFields — merge structured-array DataNodes field-wise."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any
8
+
9
+ from numpy.lib import recfunctions as rfn
10
+ from vcti.datanode import DataNode
11
+
12
+ from ..aliases import Reducer
13
+ from ..record import from_array
14
+
15
+
16
+ class MergeFields(Reducer):
17
+ """Merge multiple DataNodes into one by combining fields horizontally.
18
+
19
+ Arrays are merged field-wise with ``numpy.lib.recfunctions.merge_arrays``.
20
+ All inputs must have the same number of rows. Attributes merge with
21
+ **last-wins** semantics. Inputs with no data are skipped; if no input has
22
+ data, an empty (metadata-only) DataNode is returned.
23
+
24
+ Raises:
25
+ ValueError: If the input arrays have different row counts.
26
+ """
27
+
28
+ def reduce(self, records: list[DataNode]) -> DataNode:
29
+ loaded = [(d, arr) for d in records if d.has_data and (arr := d.load()) is not None]
30
+ if not loaded:
31
+ return from_array()
32
+
33
+ arrays = [arr for _, arr in loaded]
34
+ if len(arrays) > 1:
35
+ first_length = len(arrays[0])
36
+ for i, arr in enumerate(arrays[1:], start=1):
37
+ if len(arr) != first_length:
38
+ raise ValueError(
39
+ f"Cannot merge arrays with different lengths. Array 0 has "
40
+ f"{first_length} rows, but array {i} has {len(arr)} rows."
41
+ )
42
+
43
+ if len(arrays) == 1:
44
+ merged = arrays[0]
45
+ else:
46
+ merged = rfn.merge_arrays(arrays, flatten=True, usemask=False)
47
+
48
+ combined: dict[str, Any] = {}
49
+ for d, _ in loaded:
50
+ if d.attributes:
51
+ combined.update(d.attributes)
52
+
53
+ return from_array(merged, combined or None)
@@ -0,0 +1,94 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """Leaf source that builds a structured-array DataNode from mapping rows (lazily)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from collections.abc import Callable, Mapping, Sequence
8
+ from typing import Any
9
+
10
+ import numpy as np
11
+ from vcti.datanode import DataNode, LazyDataSource
12
+
13
+ from ..aliases import Source
14
+
15
+
16
+ def _column_from_rows(rows: Sequence[Mapping[str, Any]], src_key: str) -> np.ndarray:
17
+ """Build one structured-array column from a sequence of mapping rows.
18
+
19
+ Gathers ``row[src_key]`` across rows (absent / ``None`` = missing) and picks
20
+ a dtype from the present scalar values: all-int → ``i8`` (missing 0),
21
+ all-str/bytes → ``U<maxlen>`` (missing ""), otherwise ``f8`` (missing NaN,
22
+ non-numeric coerced to NaN).
23
+ """
24
+ n = len(rows)
25
+ present = [(i, r[src_key]) for i, r in enumerate(rows) if r.get(src_key) is not None]
26
+ values = [v for _, v in present]
27
+
28
+ def _is_int(v: Any) -> bool:
29
+ return isinstance(v, (int, np.integer)) and not isinstance(v, bool)
30
+
31
+ def _is_str(v: Any) -> bool:
32
+ return isinstance(v, (str, bytes, np.bytes_, np.str_))
33
+
34
+ if values and all(_is_int(v) for v in values):
35
+ int_col = np.zeros(n, dtype="i8")
36
+ for i, v in present:
37
+ int_col[i] = int(v)
38
+ return int_col
39
+
40
+ if values and all(_is_str(v) for v in values):
41
+ decoded = [
42
+ v.decode("utf-8", "replace").rstrip("\x00")
43
+ if isinstance(v, (bytes, np.bytes_))
44
+ else str(v)
45
+ for v in values
46
+ ]
47
+ width = max((len(s) for s in decoded), default=1) or 1
48
+ str_col = np.zeros(n, dtype=f"U{width}")
49
+ for (i, _), s in zip(present, decoded):
50
+ str_col[i] = s
51
+ return str_col
52
+
53
+ float_col = np.full(n, np.nan, dtype="f8")
54
+ for i, v in present:
55
+ try:
56
+ float_col[i] = float(v)
57
+ except (TypeError, ValueError):
58
+ pass # non-scalar / non-numeric stays NaN
59
+ return float_col
60
+
61
+
62
+ class RowTableSource(Source):
63
+ """Build a structured-array DataNode from heterogeneous mapping rows.
64
+
65
+ ``columns`` selects and renames source keys into output fields; each
66
+ column's dtype is inferred from its present values, with missing entries
67
+ filled (0 / "" / NaN). ``rows_fn`` runs lazily — only when the resulting
68
+ node is loaded, not at ``execute()`` time.
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ rows_fn: Callable[[], Sequence[Mapping[str, Any]]],
74
+ columns: list[str] | dict[str, str],
75
+ *,
76
+ name: str | None = None,
77
+ attributes: Mapping[str, Any] | None = None,
78
+ ) -> None:
79
+ super().__init__(name=name)
80
+ self._rows_fn = rows_fn
81
+ self._mapping = dict(columns) if isinstance(columns, dict) else {c: c for c in columns}
82
+ self._attributes = attributes
83
+
84
+ def load(self) -> DataNode:
85
+ def build() -> np.ndarray:
86
+ rows = list(self._rows_fn())
87
+ cols = {out: _column_from_rows(rows, src) for src, out in self._mapping.items()}
88
+ dtype = np.dtype([(out, cols[out].dtype) for out in self._mapping.values()])
89
+ table = np.empty(len(rows), dtype=dtype)
90
+ for out in self._mapping.values():
91
+ table[out] = cols[out]
92
+ return table
93
+
94
+ return DataNode(data_source=LazyDataSource(build), source_attributes=self._attributes)
@@ -0,0 +1,203 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """Transformers that reshape structured-array DataNodes (eager)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from collections.abc import Callable
8
+ from typing import Any
9
+
10
+ import numpy as np
11
+ from numpy.lib import recfunctions as rfn
12
+ from vcti.datanode import DataNode
13
+
14
+ from ..aliases import Transformer
15
+ from ..record import from_array
16
+
17
+
18
+ class NameFields(Transformer):
19
+ """Name the columns of a plain array, producing a structured field group.
20
+
21
+ A 1-D array becomes a single named field; a 2-D ``(N, K)`` array becomes K
22
+ named fields (e.g. ``["X", "Y", "Z"]`` for coordinates).
23
+ """
24
+
25
+ def __init__(self, names: str | list[str], *, name: str | None = None) -> None:
26
+ super().__init__(name=name)
27
+ self._names = [names] if isinstance(names, str) else list(names)
28
+
29
+ def transform(self, record: DataNode) -> DataNode:
30
+ arr = record.load()
31
+ if arr is None:
32
+ raise ValueError(f"{self.name}: input record has no data")
33
+ if arr.dtype.names is not None:
34
+ raise ValueError(f"{self.name}: expected a plain array, got a structured one")
35
+ cols = arr.reshape(len(arr), -1)
36
+ if cols.shape[1] != len(self._names):
37
+ raise ValueError(
38
+ f"{self.name}: array has {cols.shape[1]} column(s) "
39
+ f"but {len(self._names)} name(s) were given"
40
+ )
41
+ out_dtype = np.dtype([(n, arr.dtype) for n in self._names])
42
+ out = np.empty(len(arr), dtype=out_dtype)
43
+ for i, field in enumerate(self._names):
44
+ out[field] = cols[:, i]
45
+ return from_array(out, record.attributes)
46
+
47
+
48
+ class SelectFields(Transformer):
49
+ """Select and optionally rename fields of a structured field group.
50
+
51
+ Pass a list to select by name, or a dict ``{source: output}`` to rename
52
+ while selecting.
53
+ """
54
+
55
+ def __init__(self, fields: list[str] | dict[str, str], *, name: str | None = None) -> None:
56
+ super().__init__(name=name)
57
+ self._mapping = dict(fields) if isinstance(fields, dict) else {f: f for f in fields}
58
+
59
+ def transform(self, record: DataNode) -> DataNode:
60
+ arr = record.load()
61
+ if arr is None:
62
+ raise ValueError(f"{self.name}: requires a structured field group")
63
+ fields = arr.dtype.fields
64
+ if fields is None:
65
+ raise ValueError(f"{self.name}: requires a structured field group")
66
+ missing = [s for s in self._mapping if s not in fields]
67
+ if missing:
68
+ raise ValueError(f"{self.name}: unknown field(s) {missing}; available: {list(fields)}")
69
+ out_dtype = np.dtype([(out, fields[src][0]) for src, out in self._mapping.items()])
70
+ out = np.empty(arr.shape, dtype=out_dtype)
71
+ for src, dst in self._mapping.items():
72
+ out[dst] = arr[src]
73
+ return from_array(out, record.attributes)
74
+
75
+
76
+ class ComputeFields(Transformer):
77
+ """Append (or replace) one or more computed fields in a structured field group.
78
+
79
+ Pass ``{name: fn}``; each ``fn`` receives the structured array and returns a
80
+ 1-D array of the same length. Fields are computed in iteration order against
81
+ the running result, so a later field may reference an earlier one; a name that
82
+ already exists is replaced. Example::
83
+
84
+ ComputeFields({"vmag": lambda a: np.sqrt(a["UX"] ** 2 + a["UY"] ** 2)})
85
+ """
86
+
87
+ def __init__(
88
+ self,
89
+ fields: dict[str, Callable[[np.ndarray], Any]],
90
+ *,
91
+ name: str | None = None,
92
+ ) -> None:
93
+ super().__init__(name=name)
94
+ self._fields = dict(fields)
95
+
96
+ def transform(self, record: DataNode) -> DataNode:
97
+ arr = record.load()
98
+ if arr is None or arr.dtype.names is None:
99
+ raise ValueError(f"{self.name}: requires a structured field group")
100
+ n = len(arr)
101
+ out = arr
102
+ for field_name, fn in self._fields.items():
103
+ values = np.asarray(fn(out))
104
+ if len(values) != n:
105
+ raise ValueError(
106
+ f"{self.name}: computed field {field_name!r} has {len(values)} "
107
+ f"row(s), expected {n}"
108
+ )
109
+ if field_name in (out.dtype.names or ()):
110
+ out = out.copy()
111
+ out[field_name] = values
112
+ else:
113
+ out = rfn.append_fields(out, field_name, values, usemask=False)
114
+ return from_array(out, record.attributes)
115
+
116
+
117
+ class RenameFields(Transformer):
118
+ """Rename fields of a structured field group, keeping the rest.
119
+
120
+ Pass ``{old: new}``; unlisted fields pass through unchanged and in order.
121
+ Renaming only relabels the dtype, so no array data is copied — the result
122
+ shares memory with the input.
123
+ """
124
+
125
+ def __init__(self, names: dict[str, str], *, name: str | None = None) -> None:
126
+ super().__init__(name=name)
127
+ self._mapping = dict(names)
128
+
129
+ def transform(self, record: DataNode) -> DataNode:
130
+ arr = record.load()
131
+ if arr is None:
132
+ raise ValueError(f"{self.name}: requires a structured field group")
133
+ names = arr.dtype.names
134
+ if names is None:
135
+ raise ValueError(f"{self.name}: requires a structured field group")
136
+ unknown = [s for s in self._mapping if s not in names]
137
+ if unknown:
138
+ raise ValueError(f"{self.name}: unknown field(s) {unknown}; available: {list(names)}")
139
+ renamed = [self._mapping.get(n, n) for n in names]
140
+ if len(set(renamed)) != len(renamed):
141
+ raise ValueError(f"{self.name}: rename produces duplicate field name(s) in {renamed}")
142
+ return from_array(rfn.rename_fields(arr, self._mapping), record.attributes)
143
+
144
+
145
+ class DropFields(Transformer):
146
+ """Drop fields from a structured field group, keeping the rest.
147
+
148
+ The remaining fields are returned as a view over the input array (no data
149
+ copy). The inverse of ``SelectFields`` — ergonomic when you keep most fields.
150
+
151
+ Raises:
152
+ ValueError: If a named field is absent, or if every field would be dropped.
153
+ """
154
+
155
+ def __init__(self, fields: list[str], *, name: str | None = None) -> None:
156
+ super().__init__(name=name)
157
+ self._drop = list(fields)
158
+
159
+ def transform(self, record: DataNode) -> DataNode:
160
+ arr = record.load()
161
+ if arr is None:
162
+ raise ValueError(f"{self.name}: requires a structured field group")
163
+ names = arr.dtype.names
164
+ if names is None:
165
+ raise ValueError(f"{self.name}: requires a structured field group")
166
+ unknown = [f for f in self._drop if f not in names]
167
+ if unknown:
168
+ raise ValueError(f"{self.name}: unknown field(s) {unknown}; available: {list(names)}")
169
+ drop = set(self._drop)
170
+ kept = [n for n in names if n not in drop]
171
+ if not kept:
172
+ raise ValueError(f"{self.name}: dropping all fields leaves nothing")
173
+ return from_array(arr[kept], record.attributes)
174
+
175
+
176
+ class CastFields(Transformer):
177
+ """Cast the dtypes of selected fields in a structured field group.
178
+
179
+ Pass ``{field: dtype}``; unlisted fields keep their dtype. A structured array
180
+ is contiguous in memory, so casting rebuilds it — this is the one field op
181
+ that copies. Narrowing casts (e.g. ``f8`` -> ``f4``) lose precision by design.
182
+ """
183
+
184
+ def __init__(self, dtypes: dict[str, Any], *, name: str | None = None) -> None:
185
+ super().__init__(name=name)
186
+ self._dtypes = {field: np.dtype(dt) for field, dt in dtypes.items()}
187
+
188
+ def transform(self, record: DataNode) -> DataNode:
189
+ arr = record.load()
190
+ if arr is None:
191
+ raise ValueError(f"{self.name}: requires a structured field group")
192
+ names = arr.dtype.names
193
+ fields = arr.dtype.fields
194
+ if names is None or fields is None:
195
+ raise ValueError(f"{self.name}: requires a structured field group")
196
+ unknown = [f for f in self._dtypes if f not in names]
197
+ if unknown:
198
+ raise ValueError(f"{self.name}: unknown field(s) {unknown}; available: {list(names)}")
199
+ out_dtype = np.dtype([(n, self._dtypes.get(n, fields[n][0])) for n in names])
200
+ out = np.empty(arr.shape, dtype=out_dtype)
201
+ for n in names:
202
+ out[n] = arr[n]
203
+ return from_array(out, record.attributes)
File without changes
@@ -0,0 +1,34 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """Build DataNode payloads for flow graphs from in-memory arrays."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from collections.abc import Mapping
8
+ from typing import Any
9
+
10
+ import numpy as np
11
+ from vcti.datanode import DataNode, EagerDataSource
12
+
13
+
14
+ def from_array(
15
+ array: np.ndarray | None = None,
16
+ attributes: Mapping[str, Any] | None = None,
17
+ ) -> DataNode:
18
+ """Build a DataNode wrapping an in-memory array.
19
+
20
+ The array (when given) is held by an ``EagerDataSource`` — it is already
21
+ resident, so deferring it would be pointless. Attributes go in the enriched
22
+ layer (the convention for derived metadata); a leaf source reading external
23
+ data uses ``source_attributes`` directly instead. When *array* is ``None``
24
+ the node has no data source — a metadata-only node.
25
+
26
+ Args:
27
+ array: The array to wrap, or ``None`` for a metadata-only node.
28
+ attributes: Optional metadata, stored in ``enriched_attributes``.
29
+
30
+ Returns:
31
+ A ``DataNode`` with an eager data source, or none when *array* is ``None``.
32
+ """
33
+ source = EagerDataSource(array) if array is not None else None
34
+ return DataNode(data_source=source, enriched_attributes=attributes)
@@ -0,0 +1,40 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """Lazy leaf source over a callable that returns an array."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from collections.abc import Callable, Mapping
8
+ from typing import Any
9
+
10
+ import numpy as np
11
+ from vcti.datanode import DataNode, LazyDataSource
12
+
13
+ from .aliases import Source
14
+
15
+
16
+ class ArraySource(Source):
17
+ """Expose a callable that returns an array as a lazy DataNode source.
18
+
19
+ ``load_fn`` returns a numpy array and runs only when the node is loaded, so a
20
+ heavy read (file, service, reader accessor) is deferred past ``execute()``.
21
+ This is the lazy counterpart of :func:`from_array` (which is eager); it makes
22
+ no assumption about array shape.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ load_fn: Callable[[], np.ndarray],
28
+ *,
29
+ name: str | None = None,
30
+ attributes: Mapping[str, Any] | None = None,
31
+ ) -> None:
32
+ super().__init__(name=name)
33
+ self._load_fn = load_fn
34
+ self._attributes = attributes
35
+
36
+ def load(self) -> DataNode:
37
+ return DataNode(
38
+ data_source=LazyDataSource(self._load_fn),
39
+ source_attributes=self._attributes,
40
+ )
@@ -0,0 +1,147 @@
1
+ Metadata-Version: 2.4
2
+ Name: vcti-dataflow
3
+ Version: 2.0.0
4
+ Summary: The DataNode binding of vcti-flow: sources, transformers, reducers, and combiners for vcti-datanode payloads.
5
+ Author: Visual Collaboration Technologies Inc.
6
+ Requires-Python: <3.15,>=3.12
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: vcti-flow>=2.0.0
10
+ Requires-Dist: vcti-datanode>=2.0.0
11
+ Requires-Dist: numpy>=1.24
12
+ Provides-Extra: test
13
+ Requires-Dist: pytest; extra == "test"
14
+ Requires-Dist: pytest-cov; extra == "test"
15
+ Provides-Extra: lint
16
+ Requires-Dist: ruff; extra == "lint"
17
+ Provides-Extra: typecheck
18
+ Requires-Dist: mypy; extra == "typecheck"
19
+ Dynamic: license-file
20
+
21
+ # Data Flow
22
+
23
+ The DataNode binding of vcti-flow: sources, transformers, reducers, and combiners for vcti-datanode payloads.
24
+
25
+ ## Overview
26
+
27
+ [vcti-flow](https://github.com/vcollab/vcti-python-flow) is a payload-agnostic
28
+ framework for composing flow graphs — it never inspects the values flowing
29
+ through it. `vcti.flow.data` binds that framework to the
30
+ [vcti-datanode](https://github.com/vcollab/vcti-python-datanode) `DataNode`
31
+ payload (data plus layered attributes behind a data source), so you get
32
+ familiar, ready-bound node kinds — `Source`, `Transformer`, `Reducer`, `Sink` —
33
+ instead of writing `Source[DataNode]` everywhere, plus `from_array` (eager) and
34
+ `ArraySource` (lazy) for building payloads.
35
+
36
+ These node-kind names are the `vcti.flow` kinds bound to `DataNode`: in this
37
+ package `Source` *is* `Source[DataNode]`. It is a flow leaf node — distinct from
38
+ `vcti.datanode.DataSource`, the array-source ABC re-exported here as
39
+ `EagerDataSource` / `LazyDataSource`.
40
+
41
+ Nodes that need a *structured* (named-field) array — field-wise merge and
42
+ row-keyed iteration — live in the `vcti.flow.data.fields` submodule; the base
43
+ binding makes no assumption about array shape.
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ pip install vcti-dataflow
49
+ ```
50
+
51
+ ### In `pyproject.toml` dependencies
52
+
53
+ ```toml
54
+ dependencies = [
55
+ "vcti-dataflow>=2.0.0",
56
+ ]
57
+ ```
58
+
59
+ ---
60
+
61
+ ## Quick Start
62
+
63
+ ```python
64
+ import numpy as np
65
+ from vcti.flow.data import Source, Transformer, DataNode, from_array
66
+
67
+ # A source produces a DataNode
68
+ class Stress(Source):
69
+ def load(self) -> DataNode:
70
+ return from_array(np.array([1.0, 2.0, 3.0]), {"units": "MPa"})
71
+
72
+ # A transformer maps one DataNode to another
73
+ class Scale(Transformer):
74
+ def __init__(self, factor: float) -> None:
75
+ super().__init__()
76
+ self.factor = factor
77
+
78
+ def transform(self, record: DataNode) -> DataNode:
79
+ return from_array(record.load() * self.factor, record.attributes)
80
+
81
+ result = Scale(2.0).connect(Stress()).execute()
82
+ result.load() # array([2., 4., 6.])
83
+ result.attributes["units"] # "MPa"
84
+ ```
85
+
86
+ Reach for the array only when you need it (`record.load()`); a leaf source can
87
+ hand back a `LazyDataSource`-backed node to defer a heavy read.
88
+
89
+ ### Structured-array nodes
90
+
91
+ The `vcti.flow.data.fields` submodule adds nodes that assume a structured
92
+ (named-field) array — building tables, naming/selecting/computing fields,
93
+ merging field groups, and row-keyed iteration:
94
+
95
+ ```python
96
+ from vcti.flow.data import ArraySource # lazy leaf source (base binding)
97
+ from vcti.flow.data.fields import (
98
+ RowTableSource, NameFields, SelectFields, ComputeFields,
99
+ RenameFields, DropFields, CastFields, MergeFields, for_each_field,
100
+ )
101
+
102
+ # Build a structured table from dict rows (lazy — rows read on load())
103
+ mats = RowTableSource(lambda: material_rows(reader),
104
+ columns={"id": "MAT_ID", "EX": "Young's Modulus"})
105
+
106
+ # Name plain columns, select/rename, compute
107
+ coords = NameFields(["X", "Y", "Z"]).connect(ArraySource(lambda: reader.coords()))
108
+ picked = SelectFields({"X": "x", "Y": "y"}).connect(coords)
109
+ mag = ComputeFields({"mag": lambda a: np.hypot(a["X"], a["Y"])}).connect(coords)
110
+
111
+ # Rename, drop, cast (rename & drop return views — no data copy)
112
+ renamed = RenameFields({"X": "x"}).connect(coords) # rename, keep the rest
113
+ trimmed = DropFields(["Z"]).connect(coords) # drop, keep the rest
114
+ narrow = CastFields({"X": "f4"}).connect(coords) # change dtypes
115
+
116
+ # Merge field groups (same row count) into one structured array
117
+ combined = MergeFields().connect(ids).connect(coords).execute()
118
+
119
+ # One flow per row, keyed by a field
120
+ for case_id, flow in for_each_field(cases, build_case_flow, key_field="ID"):
121
+ flow.execute()
122
+ ```
123
+
124
+ ---
125
+
126
+ ## API
127
+
128
+ | Symbol | Purpose |
129
+ |--------|---------|
130
+ | `Source` / `Transformer` / `Reducer` / `Sink` | `vcti.flow` node kinds bound to `DataNode` |
131
+ | `Observer` | `vcti.flow` observer bound to `DataNode` |
132
+ | `from_array(array, attributes=None)` | Build a `DataNode` from an in-memory array (eager) |
133
+ | `ArraySource(load_fn, attributes=None)` | Lazy leaf source over a callable returning an array (the lazy counterpart of `from_array`) |
134
+ | `DataNode` / `EagerDataSource` / `LazyDataSource` | Re-exported from `vcti-datanode` for convenience |
135
+ | `fields.RowTableSource` | Lazy leaf source — a structured table from dict rows |
136
+ | `fields.NameFields` / `fields.SelectFields` / `fields.ComputeFields` | Name plain columns, select/rename fields, append/replace computed fields |
137
+ | `fields.RenameFields` / `fields.DropFields` / `fields.CastFields` | Rename or drop fields (views, no copy), or change field dtypes |
138
+ | `fields.MergeFields` | Field-wise merge of structured arrays |
139
+ | `fields.for_each_field` / `fields.field_items` | Row-keyed fan-out over a structured array |
140
+
141
+ ---
142
+
143
+ ## Dependencies
144
+
145
+ - [vcti-flow](https://github.com/vcollab/vcti-python-flow) (>=2.0.0) — the generic framework
146
+ - [vcti-datanode](https://github.com/vcollab/vcti-python-datanode) (>=2.0.0) — the `DataNode` payload
147
+ - [numpy](https://numpy.org/) (>=1.24)
@@ -0,0 +1,16 @@
1
+ vcti/flow/data/__init__.py,sha256=TBauzupjsaAMKvlZrGTLizBBp0eUaADCi9GuY2hx7MM,1026
2
+ vcti/flow/data/aliases.py,sha256=2Tdby1qYkSig9JkhuZe1hjSXjxXaNbGi6OG4U5_WpJQ,1228
3
+ vcti/flow/data/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ vcti/flow/data/record.py,sha256=b6k_sg6ZzTNuVvv_diVAyOUK6k-i1DSi_KognAgS8_o,1287
5
+ vcti/flow/data/sources.py,sha256=M0MTCcnWBfBsXsJ6sXbP5cBgD5SJLhLRMVbp4gl8RbI,1225
6
+ vcti/flow/data/fields/__init__.py,sha256=c9bdahN3I1PtsRUb_H0gX_b3u77d0c2BgYy_rq0FHjI,1277
7
+ vcti/flow/data/fields/iterate.py,sha256=v0p_JICHOP_15RcSl20NSz7QCfBr-7o0o_5KBqVhIRU,1691
8
+ vcti/flow/data/fields/merge.py,sha256=OoKEQCCuPA-gs2U5y-NTsoPzhz07aH2ugmzsVn38UpI,1845
9
+ vcti/flow/data/fields/sources.py,sha256=FrEb1wupcFezUhU8dEavdwi5_fp3TonY9IuSbiaycFc,3455
10
+ vcti/flow/data/fields/transforms.py,sha256=dK87paRSFVC7J1GdbaHuiyYbyuruS_pl3ozqrCBr_Bw,8358
11
+ vcti_dataflow-2.0.0.dist-info/licenses/LICENSE,sha256=gqRj-E4YRsT7mZ52W76LG6aTTFv6iEOK9QR_fV5EdrI,369
12
+ vcti_dataflow-2.0.0.dist-info/METADATA,sha256=wwypvajpJ1Rio6bPWzFO2DnirS-2TbMSjendrgOT-88,5705
13
+ vcti_dataflow-2.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
14
+ vcti_dataflow-2.0.0.dist-info/top_level.txt,sha256=Jl6AIAI3Xhru_BFQAhD_13VeXLmZQd9BqBNUaAKNgKs,5
15
+ vcti_dataflow-2.0.0.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
16
+ vcti_dataflow-2.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,8 @@
1
+ Copyright (c) 2018-2026 Visual Collaboration Technologies Inc.
2
+ All Rights Reserved.
3
+
4
+ This software is proprietary and confidential. Unauthorized copying,
5
+ distribution, or use of this software, via any medium, is strictly
6
+ prohibited. Access is granted only to authorized VCollab developers
7
+ and individuals explicitly authorized by Visual Collaboration
8
+ Technologies Inc.
@@ -0,0 +1 @@
1
+ vcti
@@ -0,0 +1 @@
1
+