ztensor 0.1.0__py3-none-manylinux_2_5_i686.manylinux1_i686.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ztensor might be problematic. Click here for more details.

ztensor/__init__.py ADDED
@@ -0,0 +1,216 @@
1
+ import numpy as np
2
+ from .ztensor import ffi, lib
3
+
4
+
5
+ # --- Pythonic Wrapper ---
6
+ class ZTensorError(Exception):
7
+ """Custom exception for ztensor-related errors."""
8
+ pass
9
+
10
+
11
+ # A custom ndarray subclass to safely manage the lifetime of the CFFI pointer.
12
+ class _ZTensorView(np.ndarray):
13
+ def __new__(cls, buffer, dtype, shape, view_ptr):
14
+ # Create an array from the buffer, reshape it, and cast it to our custom type.
15
+ obj = np.frombuffer(buffer, dtype=dtype).reshape(shape).view(cls)
16
+ # Attach the object that owns the memory to an attribute.
17
+ obj._owner = view_ptr
18
+ return obj
19
+
20
+ def __array_finalize__(self, obj):
21
+ # This ensures that views and slices of our array also hold the reference.
22
+ if obj is None: return
23
+ self._owner = getattr(obj, '_owner', None)
24
+
25
+
26
+ def _get_last_error():
27
+ """Retrieves the last error message from the Rust library."""
28
+ err_msg_ptr = lib.ztensor_last_error_message()
29
+ if err_msg_ptr != ffi.NULL:
30
+ return ffi.string(err_msg_ptr).decode('utf-8')
31
+ return "Unknown FFI error"
32
+
33
+
34
+ def _check_ptr(ptr, func_name=""):
35
+ """Checks if a pointer from the FFI is null and raises an error if it is."""
36
+ if ptr == ffi.NULL:
37
+ raise ZTensorError(f"Error in {func_name}: {_get_last_error()}")
38
+ return ptr
39
+
40
+
41
+ def _check_status(status, func_name=""):
42
+ """Checks the integer status code from an FFI call and raises on failure."""
43
+ if status != 0:
44
+ raise ZTensorError(f"Error in {func_name}: {_get_last_error()}")
45
+
46
+
47
+ # Type Mappings between NumPy and ztensor
48
+ DTYPE_NP_TO_ZT = {
49
+ np.dtype('float64'): 'float64', np.dtype('float32'): 'float32',
50
+ np.dtype('int64'): 'int64', np.dtype('int32'): 'int32',
51
+ np.dtype('int16'): 'int16', np.dtype('int8'): 'int8',
52
+ np.dtype('uint64'): 'uint64', np.dtype('uint32'): 'uint32',
53
+ np.dtype('uint16'): 'uint16', np.dtype('uint8'): 'uint8',
54
+ np.dtype('bool'): 'bool',
55
+ }
56
+ DTYPE_ZT_TO_NP = {v: k for k, v in DTYPE_NP_TO_ZT.items()}
57
+
58
+
59
+ class TensorMetadata:
60
+ """A Pythonic wrapper around the CTensorMetadata pointer."""
61
+
62
+ def __init__(self, meta_ptr):
63
+ # The pointer is now automatically garbage collected by CFFI when this object dies.
64
+ self._ptr = ffi.gc(meta_ptr, lib.ztensor_metadata_free)
65
+ _check_ptr(self._ptr, "TensorMetadata constructor")
66
+ self._name = None
67
+ self._dtype_str = None
68
+ self._shape = None
69
+
70
+ @property
71
+ def name(self):
72
+ if self._name is None:
73
+ name_ptr = lib.ztensor_metadata_get_name(self._ptr)
74
+ _check_ptr(name_ptr, "get_name")
75
+ # ffi.string creates a copy, so we must free the Rust-allocated original.
76
+ self._name = ffi.string(name_ptr).decode('utf-8')
77
+ lib.ztensor_free_string(name_ptr)
78
+ return self._name
79
+
80
+ @property
81
+ def dtype_str(self):
82
+ if self._dtype_str is None:
83
+ dtype_ptr = lib.ztensor_metadata_get_dtype_str(self._ptr)
84
+ _check_ptr(dtype_ptr, "get_dtype_str")
85
+ # ffi.string creates a copy, so we must free the Rust-allocated original.
86
+ self._dtype_str = ffi.string(dtype_ptr).decode('utf-8')
87
+ lib.ztensor_free_string(dtype_ptr)
88
+ return self._dtype_str
89
+
90
+ @property
91
+ def dtype(self):
92
+ """Returns the numpy dtype for this tensor."""
93
+ return DTYPE_ZT_TO_NP.get(self.dtype_str)
94
+
95
+ # RE-ENABLED: This property now works because the underlying FFI functions are available.
96
+ @property
97
+ def shape(self):
98
+ if self._shape is None:
99
+ shape_len = lib.ztensor_metadata_get_shape_len(self._ptr)
100
+ if shape_len > 0:
101
+ shape_data_ptr = lib.ztensor_metadata_get_shape_data(self._ptr)
102
+ _check_ptr(shape_data_ptr, "get_shape_data")
103
+ self._shape = tuple(shape_data_ptr[i] for i in range(shape_len))
104
+ # Free the array that was allocated on the Rust side.
105
+ lib.ztensor_free_u64_array(shape_data_ptr, shape_len)
106
+ else:
107
+ self._shape = tuple()
108
+ return self._shape
109
+
110
+
111
+ class Reader:
112
+ """A Pythonic context manager for reading zTensor files."""
113
+
114
+ def __init__(self, file_path):
115
+ path_bytes = file_path.encode('utf-8')
116
+ ptr = lib.ztensor_reader_open(path_bytes)
117
+ _check_ptr(ptr, f"Reader open: {file_path}")
118
+ # The pointer is automatically garbage collected by CFFI.
119
+ self._ptr = ffi.gc(ptr, lib.ztensor_reader_free)
120
+
121
+ def __enter__(self):
122
+ return self
123
+
124
+ def __exit__(self, exc_type, exc_val, exc_tb):
125
+ # CFFI's garbage collector handles freeing the reader pointer automatically.
126
+ # No explicit free is needed here, simplifying the context manager.
127
+ self._ptr = None
128
+
129
+ def get_metadata(self, name: str) -> TensorMetadata:
130
+ """Retrieves metadata for a tensor by its name."""
131
+ if self._ptr is None: raise ZTensorError("Reader is closed.")
132
+ name_bytes = name.encode('utf-8')
133
+ meta_ptr = lib.ztensor_reader_get_metadata_by_name(self._ptr, name_bytes)
134
+ _check_ptr(meta_ptr, f"get_metadata: {name}")
135
+ return TensorMetadata(meta_ptr)
136
+
137
+ def read_tensor(self, name: str) -> np.ndarray:
138
+ """Reads a tensor by name and returns it as a NumPy array (zero-copy)."""
139
+ metadata = self.get_metadata(name)
140
+ view_ptr = lib.ztensor_reader_read_tensor_view(self._ptr, metadata._ptr)
141
+ _check_ptr(view_ptr, f"read_tensor: {name}")
142
+
143
+ # Let CFFI manage the lifetime of the view pointer.
144
+ view_ptr = ffi.gc(view_ptr, lib.ztensor_free_tensor_view)
145
+
146
+ # CORRECTED: Create array using the subclass, which handles reshaping and memory.
147
+ array = _ZTensorView(
148
+ buffer=ffi.buffer(view_ptr.data, view_ptr.len),
149
+ dtype=metadata.dtype,
150
+ shape=metadata.shape,
151
+ view_ptr=view_ptr
152
+ )
153
+
154
+ return array
155
+
156
+
157
+ class Writer:
158
+ """A Pythonic context manager for writing zTensor files."""
159
+
160
+ def __init__(self, file_path):
161
+ path_bytes = file_path.encode('utf-8')
162
+ ptr = lib.ztensor_writer_create(path_bytes)
163
+ _check_ptr(ptr, f"Writer create: {file_path}")
164
+ # The pointer is consumed by finalize, so we don't use ffi.gc here.
165
+ # The writer should be freed via finalize or ztensor_writer_free if finalize fails.
166
+ self._ptr = ptr
167
+ self._finalized = False
168
+
169
+ def __enter__(self):
170
+ return self
171
+
172
+ def __exit__(self, exc_type, exc_val, exc_tb):
173
+ # Automatically finalize on exit if not already done and no error occurred.
174
+ if self._ptr and not self._finalized:
175
+ if exc_type is None:
176
+ self.finalize()
177
+ else:
178
+ # If an error occurred, don't finalize, just free the writer to prevent leaks.
179
+ lib.ztensor_writer_free(self._ptr)
180
+ self._ptr = None
181
+
182
+ def add_tensor(self, name: str, tensor: np.ndarray):
183
+ """Adds a NumPy array as a tensor to the file."""
184
+ if not self._ptr: raise ZTensorError("Writer is closed or finalized.")
185
+
186
+ name_bytes = name.encode('utf-8')
187
+ tensor = np.ascontiguousarray(tensor) # Ensure data is contiguous.
188
+
189
+ shape_array = np.array(tensor.shape, dtype=np.uint64)
190
+ shape_ptr = ffi.cast("uint64_t*", shape_array.ctypes.data)
191
+
192
+ dtype_str = DTYPE_NP_TO_ZT.get(tensor.dtype)
193
+ if not dtype_str:
194
+ raise ZTensorError(f"Unsupported NumPy dtype: {tensor.dtype}")
195
+ dtype_bytes = dtype_str.encode('utf-8')
196
+
197
+ # CORRECTED: Cast to `unsigned char*` to match the CFFI definition and Rust FFI.
198
+ data_ptr = ffi.cast("unsigned char*", tensor.ctypes.data)
199
+
200
+ status = lib.ztensor_writer_add_tensor(
201
+ self._ptr, name_bytes, shape_ptr, len(tensor.shape),
202
+ dtype_bytes, data_ptr, tensor.nbytes
203
+ )
204
+ _check_status(status, f"add_tensor: {name}")
205
+
206
+ def finalize(self):
207
+ """Finalizes the zTensor file, writing the metadata index."""
208
+ if not self._ptr: raise ZTensorError("Writer is already closed or finalized.")
209
+
210
+ status = lib.ztensor_writer_finalize(self._ptr)
211
+ self._ptr = None # The writer pointer is consumed and invalidated by the Rust call.
212
+ self._finalized = True
213
+ _check_status(status, "finalize")
214
+
215
+
216
+ __all__ = ["Reader", "Writer", "TensorMetadata", "ZTensorError"]
@@ -0,0 +1,7 @@
1
+ __all__ = ["lib", "ffi"]
2
+
3
+ import os
4
+ from .ffi import ffi
5
+
6
+ lib = ffi.dlopen(os.path.join(os.path.dirname(__file__), 'libztensor.so'))
7
+ del os
ztensor/ztensor/ffi.py ADDED
@@ -0,0 +1,10 @@
1
+ # auto-generated file
2
+ import _cffi_backend
3
+
4
+ ffi = _cffi_backend.FFI('ffi',
5
+ _version = 0x2601,
6
+ _types = b'\x00\x00\x29\x0D\x00\x00\x3D\x03\x00\x00\x3C\x03\x00\x00\x00\x0F\x00\x00\x2C\x0D\x00\x00\x3D\x03\x00\x00\x3F\x03\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x06\x11\x00\x00\x00\x0F\x00\x00\x14\x0D\x00\x00\x06\x11\x00\x00\x00\x0F\x00\x00\x35\x0D\x00\x00\x02\x11\x00\x00\x00\x0F\x00\x00\x06\x0D\x00\x00\x00\x0F\x00\x00\x40\x0D\x00\x00\x3E\x03\x00\x00\x00\x0F\x00\x00\x40\x0D\x00\x00\x14\x11\x00\x00\x06\x11\x00\x00\x41\x03\x00\x00\x1C\x01\x00\x00\x06\x11\x00\x00\x42\x03\x00\x00\x1C\x01\x00\x00\x00\x0F\x00\x00\x1A\x0D\x00\x00\x02\x11\x00\x00\x00\x0F\x00\x00\x1A\x0D\x00\x00\x05\x11\x00\x00\x00\x0F\x00\x00\x38\x0D\x00\x00\x02\x11\x00\x00\x00\x0F\x00\x00\x44\x0D\x00\x00\x3B\x03\x00\x00\x00\x0F\x00\x00\x44\x0D\x00\x00\x3C\x03\x00\x00\x00\x0F\x00\x00\x44\x0D\x00\x00\x01\x11\x00\x00\x00\x0F\x00\x00\x44\x0D\x00\x00\x14\x11\x00\x00\x00\x0F\x00\x00\x44\x0D\x00\x00\x3F\x03\x00\x00\x00\x0F\x00\x00\x44\x0D\x00\x00\x41\x03\x00\x00\x1C\x01\x00\x00\x00\x0F\x00\x00\x00\x09\x00\x00\x01\x09\x00\x00\x02\x09\x00\x00\x03\x09\x00\x00\x02\x01\x00\x00\x07\x01\x00\x00\x18\x01\x00\x00\x04\x01\x00\x00\x44\x03\x00\x00\x00\x01',
7
+ _globals = (b'\xFF\xFF\xFF\x1FALIGNMENT',64,b'\x00\x00\x34\x23ztensor_free_string',0,b'\x00\x00\x28\x23ztensor_free_tensor_view',0,b'\x00\x00\x37\x23ztensor_free_u64_array',0,b'\x00\x00\x11\x23ztensor_last_error_message',0,b'\x00\x00\x2B\x23ztensor_metadata_free',0,b'\x00\x00\x0E\x23ztensor_metadata_get_dtype_str',0,b'\x00\x00\x0E\x23ztensor_metadata_get_name',0,b'\x00\x00\x25\x23ztensor_metadata_get_shape_data',0,b'\x00\x00\x1F\x23ztensor_metadata_get_shape_len',0,b'\x00\x00\x2E\x23ztensor_reader_free',0,b'\x00\x00\x04\x23ztensor_reader_get_metadata_by_name',0,b'\x00\x00\x22\x23ztensor_reader_get_metadata_count',0,b'\x00\x00\x08\x23ztensor_reader_open',0,b'\x00\x00\x00\x23ztensor_reader_read_tensor_view',0,b'\x00\x00\x16\x23ztensor_writer_add_tensor',0,b'\x00\x00\x0B\x23ztensor_writer_create',0,b'\x00\x00\x13\x23ztensor_writer_finalize',0,b'\x00\x00\x31\x23ztensor_writer_free',0),
8
+ _struct_unions = ((b'\x00\x00\x00\x3B\x00\x00\x00\x02CTensorDataView',b'\x00\x00\x1C\x11data',b'\x00\x00\x1A\x11len',b'\x00\x00\x43\x11_owner'),(b'\x00\x00\x00\x3C\x00\x00\x00\x10TensorMetadata',),(b'\x00\x00\x00\x3D\x00\x00\x00\x10ZTensorReader_BufReader_File',),(b'\x00\x00\x00\x3E\x00\x00\x00\x10ZTensorWriter_BufWriter_File',)),
9
+ _typenames = (b'\x00\x00\x00\x3BCTensorDataView',b'\x00\x00\x00\x3CCTensorMetadata',b'\x00\x00\x00\x3DCZTensorReader',b'\x00\x00\x00\x3ECZTensorWriter',b'\x00\x00\x00\x3CTensorMetadata',b'\x00\x00\x00\x3DZTensorReader_BufReader_File',b'\x00\x00\x00\x3EZTensorWriter_BufWriter_File'),
10
+ )
Binary file
@@ -0,0 +1,105 @@
1
+ Metadata-Version: 2.4
2
+ Name: ztensor
3
+ Version: 0.1.0
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: 3
6
+ Classifier: License :: OSI Approved :: MIT License
7
+ Classifier: Operating System :: OS Independent
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: Topic :: Scientific/Engineering
10
+ Requires-Dist: numpy
11
+ Requires-Dist: cffi
12
+ License-File: LICENSE
13
+ Summary: Python bindings for the zTensor library.
14
+ Author: In Gim <in.gim@yale.edu>
15
+ Author-email: In Gim <in.gim@yale.edu>
16
+ License: MIT
17
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
18
+ Project-URL: Homepage, https://github.com/pie-project/ztensor
19
+ Project-URL: Source, https://github.com/pie-project/ztensor
20
+
21
+ # zTensor File Format
22
+
23
+ **Version 0.1.0**
24
+
25
+ zTensor is a binary format for storing large multi-dimensional arrays (tensors), designed for efficient, safe, and flexible access. It supports raw and compressed (zstd) encodings, quantized and sparse layouts, and is extensible.
26
+
27
+ For dense tensors stored with encoding: "raw" and matching endianness, zTensor enables zero-copy access to tensor data.
28
+
29
+ ## File Layout
30
+
31
+ ```
32
+ +-------------------------------+
33
+ | Magic Number (8 bytes) |
34
+ +-------------------------------+
35
+ | Tensor Blob 0 (aligned) |
36
+ +-------------------------------+
37
+ | Padding (if needed) |
38
+ +-------------------------------+
39
+ | Tensor Blob 1 (aligned) |
40
+ +-------------------------------+
41
+ | ... |
42
+ +-------------------------------+
43
+ | CBOR Metadata Array |
44
+ +-------------------------------+
45
+ | CBOR Array Size (8 bytes) |
46
+ +-------------------------------+
47
+ ```
48
+
49
+ - **Magic Number:** ASCII "ZTEN0001" at offset 0.
50
+ - **Tensor Blobs:** Each tensor's data, starting at a 64-byte aligned offset. No per-blob headers. Padding (undefined value, usually zero) is inserted as needed.
51
+ - **CBOR Metadata Array:** At the end of the file, a CBOR-encoded array of metadata maps (one per tensor).
52
+ - **CBOR Array Size:** Last 8 bytes, little-endian uint64, gives the size of the CBOR array.
53
+
54
+ ## Tensor Metadata (CBOR Map)
55
+ Each tensor's metadata is a CBOR map with these required fields:
56
+ - `name` (string): Tensor name.
57
+ - `offset` (uint64): Absolute file offset to tensor data (multiple of 64).
58
+ - `size` (uint64): On-disk size in bytes (compressed if applicable).
59
+ - `dtype` (string): Data type (see below).
60
+ - `shape` (array): Array of dimensions (empty for scalar).
61
+ - `encoding` (string): Data encoding (see below).
62
+ - `layout` (string): "dense" (default) or "sparse". For sparse, see below.
63
+
64
+ Optional fields:
65
+ - `data_endianness` (string): "little" or "big" (default: little, for raw multi-byte types).
66
+ - `checksum` (string): e.g., "crc32c:0x1234ABCD" or "sha256:...".
67
+ - Custom fields are allowed; unknown keys are ignored by readers.
68
+
69
+ ## Supported Data Types (`dtype`)
70
+ - `float64`, `float32`, `float16`, `bfloat16`
71
+ - `int64`, `int32`, `int16`, `int8`
72
+ - `uint64`, `uint32`, `uint16`, `uint8`
73
+ - `bool`
74
+
75
+ ## Supported Encodings (`encoding`)
76
+ - `raw`: Direct binary dump of tensor elements (with `data_endianness` if multi-byte).
77
+ - `zstd`: Zstandard-compressed data. `size` is compressed size.
78
+
79
+ ## Layouts
80
+ - `dense` (default): Standard contiguous tensor data.
81
+ - `sparse`: Data is stored in a sparse format. The metadata map must include a `sparse_format` field (e.g., "csr", "coo") and any additional fields required to describe the sparse structure (such as index arrays, indptr, etc.).
82
+
83
+ ## Index Reading
84
+ To read the index:
85
+ 1. Read the last 8 bytes for the CBOR array size.
86
+ 2. Seek backwards by that amount to read the CBOR metadata array.
87
+
88
+ As a result, the start offset of the metadata is: (file size) - (size of the metadata) - (8 byte).
89
+
90
+ ## Zero-Tensor Files
91
+ A valid zTensor file may contain zero tensors:
92
+ - 8 bytes magic, 1 byte empty CBOR array (`0x80`), 8 bytes size (`0x01...00`).
93
+ - Total: 17 bytes.
94
+
95
+ ## Extensibility
96
+ - New `dtype`, `encoding`, and `layout` values may be added in future versions.
97
+ - Custom metadata fields are allowed; unknown fields are ignored.
98
+
99
+ ## Notes
100
+ - All offsets are absolute and account for the 8-byte magic number.
101
+ - All tensor data blobs are 64-byte aligned.
102
+ - No per-blob headers; all metadata is in the CBOR array at the end of the file.
103
+ - For `encoding: "raw"` and multi-byte `dtype`, data is little-endian unless `data_endianness` is specified.
104
+ - For sparse tensors, the metadata must fully describe the sparse structure.
105
+
@@ -0,0 +1,8 @@
1
+ ztensor-0.1.0.dist-info/METADATA,sha256=PE8RgtYKK6sfQK8drIfswyyZxYCHwT1J2Iw1WR6vjtA,4421
2
+ ztensor-0.1.0.dist-info/WHEEL,sha256=SpGB6IWzB_riHhA8KE-DtiG1EenXfRr8HYaf2B05sKc,118
3
+ ztensor-0.1.0.dist-info/licenses/LICENSE,sha256=AoeyV1LzTyOz9sbr6uOzk_P0lW963DvhJHnVNVQlI3Y,1063
4
+ ztensor/__init__.py,sha256=sr2eKuzJBZLDbqIAntJ6WDxSZLBfWoi9_UN464jf8lw,8403
5
+ ztensor/ztensor/__init__.py,sha256=sIpB0pJYFX20TdZapIoPMxqMz37wKJyxCkAlTemWDq4,140
6
+ ztensor/ztensor/ffi.py,sha256=J7CG26lx0Xu0IjYR7GWev-BDgGQXseKA4cjkh0IhnLE,2746
7
+ ztensor/ztensor/libztensor.so,sha256=UQ1w5wToRUBAf80aoZtyzgS6DuM0WM7PfmlJ2qi238I,1799904
8
+ ztensor-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.9.0)
3
+ Root-Is-Purelib: false
4
+ Tag: py3-none-manylinux_2_5_i686.manylinux1_i686
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 In Gim
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.