varuintarray 0.1.0b1__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/.github/workflows/test.yml +10 -7
- varuintarray-1.0.1/CHANGELOG.md +11 -0
- varuintarray-1.0.1/LICENSE +21 -0
- varuintarray-1.0.1/PKG-INFO +291 -0
- varuintarray-1.0.1/README.md +280 -0
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/pyproject.toml +3 -3
- varuintarray-1.0.1/src/varuintarray/__init__.py +62 -0
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/src/varuintarray/array.py +106 -14
- varuintarray-1.0.1/test_readme.py +305 -0
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/tests/test_serialize.py +5 -5
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/tests/test_ufuncs.py +1 -0
- varuintarray-1.0.1/uv.lock +571 -0
- varuintarray-0.1.0b1/PKG-INFO +0 -280
- varuintarray-0.1.0b1/README.md +0 -270
- varuintarray-0.1.0b1/src/varuintarray/__init__.py +0 -3
- varuintarray-0.1.0b1/uv.lock +0 -2726
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/.github/workflows/release.yml +0 -0
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/.gitignore +0 -0
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/.python-version +0 -0
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/ruff.toml +0 -0
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/tests/__init__.py +0 -0
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/tests/strategies.py +0 -0
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/tests/test_array.py +0 -0
- {varuintarray-0.1.0b1 → varuintarray-1.0.1}/tests/test_functions.py +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
name: Test
|
|
2
2
|
|
|
3
|
-
on:
|
|
4
|
-
push:
|
|
5
|
-
|
|
6
|
-
pull_request:
|
|
7
|
-
|
|
3
|
+
on: [push, pull_request]
|
|
4
|
+
# push:
|
|
5
|
+
# branches: [ "main" ]
|
|
6
|
+
# pull_request:
|
|
7
|
+
# branches: [ "main" ]
|
|
8
8
|
|
|
9
9
|
jobs:
|
|
10
10
|
build:
|
|
@@ -26,5 +26,8 @@ jobs:
|
|
|
26
26
|
- name: Install the project
|
|
27
27
|
run: uv sync --locked --all-extras --dev
|
|
28
28
|
|
|
29
|
-
- name: Run
|
|
30
|
-
run: uv run pytest tests
|
|
29
|
+
- name: Run pytest
|
|
30
|
+
run: uv run pytest tests/
|
|
31
|
+
|
|
32
|
+
- name: Run doctest
|
|
33
|
+
run: uv run python test_readme.py
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/).
|
|
6
|
+
|
|
7
|
+
## [1.0.1] - 2026-02-24
|
|
8
|
+
|
|
9
|
+
### Fixed
|
|
10
|
+
|
|
11
|
+
- `__array_wrap__` now accepts `context` and `return_scalar` as positional-only arguments, fixing a deprecation warning in NumPy 2.x.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jonathan Olsten
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: varuintarray
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: A variable-length unsinged integer array
|
|
5
|
+
Author-email: Jonathan Olsten <jolsten@gmail.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Requires-Dist: lark>=1.2
|
|
9
|
+
Requires-Dist: numpy>=2.0
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# VarUIntArray
|
|
13
|
+
|
|
14
|
+
A NumPy subclass for working with variable-length unsigned integers that don't fit standard machine word sizes.
|
|
15
|
+
|
|
16
|
+
## Overview
|
|
17
|
+
|
|
18
|
+
`VarUIntArray` extends `numpy.ndarray` to handle arbitrary bit-width unsigned integers (e.g., 3-bit, 10-bit, 12-bit) while correctly managing padding bits when using NumPy's universal functions (ufuncs). This is particularly useful when working with:
|
|
19
|
+
|
|
20
|
+
- Custom binary formats with non-standard word sizes
|
|
21
|
+
- Packed bit arrays where words don't align to 8, 16, 32, or 64 bits
|
|
22
|
+
- Data structures that require precise bit-width control
|
|
23
|
+
|
|
24
|
+
## Key Features
|
|
25
|
+
|
|
26
|
+
- **Arbitrary Word Sizes**: Support for any word size from 1 to 64 bits
|
|
27
|
+
- **Automatic Padding Management**: Correctly handles padding bits in bitwise operations
|
|
28
|
+
- **NumPy Integration**: Works seamlessly with NumPy ufuncs and array operations
|
|
29
|
+
- **Pack/Unpack Operations**: Convert between bit arrays and packed integer arrays
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
This module can be installed from PyPi:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install varuintarray
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
### Create a VarUIntArray with 10-bit words
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
>>> arr = VarUIntArray([1, 2, 1023], word_size=10)
|
|
45
|
+
>>> arr
|
|
46
|
+
VarUIntArray([ 1, 2, 1023], dtype='>u2', word_size=10)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Bitwise operations respect word_size
|
|
50
|
+
```python
|
|
51
|
+
>>> inverted = arr.invert()
|
|
52
|
+
>>> inverted
|
|
53
|
+
VarUIntArray([1022, 1021, 0], dtype='>u2', word_size=10)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Unpack to individual bits
|
|
57
|
+
```python
|
|
58
|
+
>>> bits = arr.unpackbits()
|
|
59
|
+
>>> bits.shape
|
|
60
|
+
(3, 10)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Pack bits back into words
|
|
64
|
+
```python
|
|
65
|
+
>>> packed = VarUIntArray.packbits(bits)
|
|
66
|
+
>>> packed
|
|
67
|
+
VarUIntArray([ 1, 2, 1023], dtype='>u2', word_size=10)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Core Concepts
|
|
71
|
+
|
|
72
|
+
### Word Size vs Machine Size
|
|
73
|
+
|
|
74
|
+
Standard computers work with word sizes of 8, 16, 32, or 64 bits. When you need a 10-bit word, it must be stored in a 16-bit container, leaving 6 padding bits unused. `VarUIntArray` automatically:
|
|
75
|
+
|
|
76
|
+
1. Selects the appropriate machine word size (8, 16, 32, or 64 bits)
|
|
77
|
+
2. Tracks the actual word size you care about
|
|
78
|
+
3. Ensures padding bits are handled correctly in operations
|
|
79
|
+
|
|
80
|
+
### Padding Bit Handling
|
|
81
|
+
|
|
82
|
+
The most important feature is correct handling of padding bits during bitwise operations. For example:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
# 3-bit word stored in 8-bit container
|
|
86
|
+
>>> arr = VarUIntArray([5], word_size=3) # Binary: 101
|
|
87
|
+
|
|
88
|
+
# Standard NumPy invert would give 11111010 (250)
|
|
89
|
+
# VarUIntArray.invert() gives 010 (2) - correct for 3-bit word
|
|
90
|
+
>>> inverted = arr.invert()
|
|
91
|
+
>>> int(inverted[0])
|
|
92
|
+
2
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
## API Reference
|
|
97
|
+
|
|
98
|
+
### VarUIntArray Class
|
|
99
|
+
|
|
100
|
+
#### Constructor
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
VarUIntArray(input_array, word_size)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
**Parameters:**
|
|
107
|
+
- `input_array`: Array-like data to convert
|
|
108
|
+
- `word_size`: Number of significant bits per word (1-64)
|
|
109
|
+
|
|
110
|
+
#### Methods
|
|
111
|
+
|
|
112
|
+
- `invert()`: Bitwise invert respecting word_size
|
|
113
|
+
- `unpackbits()`: Unpack to individual bits (adds one dimension)
|
|
114
|
+
- `packbits(data)`: Class method to pack bit array into VarUIntArray
|
|
115
|
+
- `to_dict()`: Serialize to a dictionary
|
|
116
|
+
- `from_dict(data)`: Static method to deserialize from a dictionary
|
|
117
|
+
- `to_json()`: Serialize to a JSON string
|
|
118
|
+
- `from_json(string)`: Class method to deserialize from a JSON string
|
|
119
|
+
|
|
120
|
+
#### Attributes
|
|
121
|
+
|
|
122
|
+
- `word_size`: Number of significant bits per word
|
|
123
|
+
|
|
124
|
+
### Functions
|
|
125
|
+
|
|
126
|
+
#### `unpackbits(array)`
|
|
127
|
+
|
|
128
|
+
Unpack a VarUIntArray into individual bits, excluding padding.
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
>>> arr = VarUIntArray([5, 3], word_size=3)
|
|
132
|
+
>>> unpackbits(arr)
|
|
133
|
+
array([[1, 0, 1],
|
|
134
|
+
[0, 1, 1]], dtype=uint8)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
**Parameters:**
|
|
138
|
+
- `array`: VarUIntArray to unpack
|
|
139
|
+
|
|
140
|
+
**Returns:** ndarray with shape `(*original_shape, word_size)`
|
|
141
|
+
|
|
142
|
+
#### `packbits(array)`
|
|
143
|
+
|
|
144
|
+
Pack a bit array into a VarUIntArray.
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
>>> bits = np.array([[1, 0, 1], [0, 1, 1]], dtype=np.uint8)
|
|
148
|
+
>>> packbits(bits)
|
|
149
|
+
VarUIntArray([5, 3], dtype=uint8, word_size=3)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Parameters:**
|
|
153
|
+
- `array`: ndarray of uint8 containing 0s and 1s, where the last dimension contains bits for each word
|
|
154
|
+
|
|
155
|
+
**Returns:** VarUIntArray with one fewer dimension
|
|
156
|
+
|
|
157
|
+
#### `VarUIntArray.to_dict()`
|
|
158
|
+
|
|
159
|
+
Serialize VarUIntArray to JSON-compatible dictionary.
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
>>> arr = VarUIntArray([1, 2, 3], word_size=10)
|
|
163
|
+
>>> arr.to_dict()
|
|
164
|
+
{'word_size': 10, 'values': [1, 2, 3]}
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
#### `VarUIntArray.from_dict(data)`
|
|
168
|
+
|
|
169
|
+
Convert various formats to VarUIntArray.
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
# From dictionary
|
|
173
|
+
>>> VarUIntArray.from_dict({'values': [1, 2, 3], 'word_size': 10})
|
|
174
|
+
VarUIntArray([1, 2, 3], dtype='>u2', word_size=10)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
#### `VarUIntArray.to_json()`
|
|
178
|
+
|
|
179
|
+
Serialize VarUIntArray to a JSON string.
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
>>> arr = VarUIntArray([1, 2, 3], word_size=10)
|
|
183
|
+
>>> arr.to_json()
|
|
184
|
+
'{"word_size": 10, "values": [1, 2, 3]}'
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
#### `VarUIntArray.from_json(string)`
|
|
188
|
+
|
|
189
|
+
Deserialize a VarUIntArray from a JSON string.
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
>>> json_str = '{"word_size": 10, "values": [1, 2, 3]}'
|
|
193
|
+
>>> VarUIntArray.from_json(json_str)
|
|
194
|
+
VarUIntArray([1, 2, 3], dtype='>u2', word_size=10)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Use Cases
|
|
198
|
+
|
|
199
|
+
### Custom Binary Protocols
|
|
200
|
+
|
|
201
|
+
Working with network protocols or file formats that use non-standard bit widths:
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
# 12-bit color values (common in some image formats)
|
|
205
|
+
>>> colors = VarUIntArray([4095, 2048, 0], word_size=12)
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### Bit Manipulation
|
|
209
|
+
|
|
210
|
+
Performing bitwise operations on packed data:
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
>>> data = VarUIntArray([0b1010, 0b0101], word_size=4)
|
|
214
|
+
>>> mask = VarUIntArray([0b1100, 0b0011], word_size=4)
|
|
215
|
+
>>> result = data & mask # Bitwise AND
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Implementation Details
|
|
219
|
+
|
|
220
|
+
### Memory Layout
|
|
221
|
+
|
|
222
|
+
- VarUIntArray uses big-endian byte order (`'>'` dtype prefix) for consistency.
|
|
223
|
+
- Data is stored in the smallest standard NumPy unsigned integer type that can hold the specified word_size.
|
|
224
|
+
|
|
225
|
+
### Limitations
|
|
226
|
+
|
|
227
|
+
- Maximum word size: 64 bits
|
|
228
|
+
- Only unsigned integers are supported
|
|
229
|
+
- The `axis` parameter is not supported for `np.unpackbits` on VarUIntArray
|
|
230
|
+
|
|
231
|
+
## Examples
|
|
232
|
+
|
|
233
|
+
### Complete Workflow
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
>>> import numpy as np
|
|
237
|
+
>>> from varuintarray import VarUIntArray
|
|
238
|
+
|
|
239
|
+
# Create some 5-bit values
|
|
240
|
+
>>> data = VarUIntArray([31, 16, 0, 15], word_size=5)
|
|
241
|
+
|
|
242
|
+
# Unpack to bits
|
|
243
|
+
>>> bits = data.unpackbits()
|
|
244
|
+
>>> bits
|
|
245
|
+
array([[1, 1, 1, 1, 1],
|
|
246
|
+
[1, 0, 0, 0, 0],
|
|
247
|
+
[0, 0, 0, 0, 0],
|
|
248
|
+
[0, 1, 1, 1, 1]], dtype=uint8)
|
|
249
|
+
|
|
250
|
+
# Flip specific bits
|
|
251
|
+
>>> bits[:, 0] = 1 - bits[:, 0] # Flip first bit
|
|
252
|
+
|
|
253
|
+
# Pack back
|
|
254
|
+
>>> result = VarUIntArray.packbits(bits)
|
|
255
|
+
>>> result
|
|
256
|
+
VarUIntArray([15, 0, 16, 31], dtype=uint8, word_size=5)
|
|
257
|
+
|
|
258
|
+
# Bitwise operations
|
|
259
|
+
>>> result.invert()
|
|
260
|
+
VarUIntArray([16, 31, 15, 0], dtype=uint8, word_size=5)
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### Serialization
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
>>> from varuintarray import VarUIntArray
|
|
267
|
+
>>> import json
|
|
268
|
+
|
|
269
|
+
# Serialize dict
|
|
270
|
+
>>> arr = VarUIntArray([100, 200, 300], word_size=12)
|
|
271
|
+
>>> serialized = arr.to_dict()
|
|
272
|
+
>>> serialized
|
|
273
|
+
{'word_size': 12, 'values': [100, 200, 300]}
|
|
274
|
+
|
|
275
|
+
# Deserialize dict
|
|
276
|
+
>>> VarUIntArray.from_dict(serialized)
|
|
277
|
+
VarUIntArray([100, 200, 300], dtype='>u2', word_size=12)
|
|
278
|
+
|
|
279
|
+
# Serialize JSON
|
|
280
|
+
>>> serialized = arr.to_json()
|
|
281
|
+
>>> serialized
|
|
282
|
+
'{"word_size": 12, "values": [100, 200, 300]}'
|
|
283
|
+
|
|
284
|
+
# Deserialize JSON
|
|
285
|
+
>>> VarUIntArray.from_json(serialized)
|
|
286
|
+
VarUIntArray([100, 200, 300], dtype='>u2', word_size=12)
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
## License
|
|
290
|
+
|
|
291
|
+
`varuintarray` is licensed under the MIT License - see the LICENSE file for details
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# VarUIntArray
|
|
2
|
+
|
|
3
|
+
A NumPy subclass for working with variable-length unsigned integers that don't fit standard machine word sizes.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
`VarUIntArray` extends `numpy.ndarray` to handle arbitrary bit-width unsigned integers (e.g., 3-bit, 10-bit, 12-bit) while correctly managing padding bits when using NumPy's universal functions (ufuncs). This is particularly useful when working with:
|
|
8
|
+
|
|
9
|
+
- Custom binary formats with non-standard word sizes
|
|
10
|
+
- Packed bit arrays where words don't align to 8, 16, 32, or 64 bits
|
|
11
|
+
- Data structures that require precise bit-width control
|
|
12
|
+
|
|
13
|
+
## Key Features
|
|
14
|
+
|
|
15
|
+
- **Arbitrary Word Sizes**: Support for any word size from 1 to 64 bits
|
|
16
|
+
- **Automatic Padding Management**: Correctly handles padding bits in bitwise operations
|
|
17
|
+
- **NumPy Integration**: Works seamlessly with NumPy ufuncs and array operations
|
|
18
|
+
- **Pack/Unpack Operations**: Convert between bit arrays and packed integer arrays
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
This module can be installed from PyPi:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install varuintarray
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
### Create a VarUIntArray with 10-bit words
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
>>> arr = VarUIntArray([1, 2, 1023], word_size=10)
|
|
34
|
+
>>> arr
|
|
35
|
+
VarUIntArray([ 1, 2, 1023], dtype='>u2', word_size=10)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Bitwise operations respect word_size
|
|
39
|
+
```python
|
|
40
|
+
>>> inverted = arr.invert()
|
|
41
|
+
>>> inverted
|
|
42
|
+
VarUIntArray([1022, 1021, 0], dtype='>u2', word_size=10)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Unpack to individual bits
|
|
46
|
+
```python
|
|
47
|
+
>>> bits = arr.unpackbits()
|
|
48
|
+
>>> bits.shape
|
|
49
|
+
(3, 10)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Pack bits back into words
|
|
53
|
+
```python
|
|
54
|
+
>>> packed = VarUIntArray.packbits(bits)
|
|
55
|
+
>>> packed
|
|
56
|
+
VarUIntArray([ 1, 2, 1023], dtype='>u2', word_size=10)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Core Concepts
|
|
60
|
+
|
|
61
|
+
### Word Size vs Machine Size
|
|
62
|
+
|
|
63
|
+
Standard computers work with word sizes of 8, 16, 32, or 64 bits. When you need a 10-bit word, it must be stored in a 16-bit container, leaving 6 padding bits unused. `VarUIntArray` automatically:
|
|
64
|
+
|
|
65
|
+
1. Selects the appropriate machine word size (8, 16, 32, or 64 bits)
|
|
66
|
+
2. Tracks the actual word size you care about
|
|
67
|
+
3. Ensures padding bits are handled correctly in operations
|
|
68
|
+
|
|
69
|
+
### Padding Bit Handling
|
|
70
|
+
|
|
71
|
+
The most important feature is correct handling of padding bits during bitwise operations. For example:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
# 3-bit word stored in 8-bit container
|
|
75
|
+
>>> arr = VarUIntArray([5], word_size=3) # Binary: 101
|
|
76
|
+
|
|
77
|
+
# Standard NumPy invert would give 11111010 (250)
|
|
78
|
+
# VarUIntArray.invert() gives 010 (2) - correct for 3-bit word
|
|
79
|
+
>>> inverted = arr.invert()
|
|
80
|
+
>>> int(inverted[0])
|
|
81
|
+
2
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
## API Reference
|
|
86
|
+
|
|
87
|
+
### VarUIntArray Class
|
|
88
|
+
|
|
89
|
+
#### Constructor
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
VarUIntArray(input_array, word_size)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Parameters:**
|
|
96
|
+
- `input_array`: Array-like data to convert
|
|
97
|
+
- `word_size`: Number of significant bits per word (1-64)
|
|
98
|
+
|
|
99
|
+
#### Methods
|
|
100
|
+
|
|
101
|
+
- `invert()`: Bitwise invert respecting word_size
|
|
102
|
+
- `unpackbits()`: Unpack to individual bits (adds one dimension)
|
|
103
|
+
- `packbits(data)`: Class method to pack bit array into VarUIntArray
|
|
104
|
+
- `to_dict()`: Serialize to a dictionary
|
|
105
|
+
- `from_dict(data)`: Static method to deserialize from a dictionary
|
|
106
|
+
- `to_json()`: Serialize to a JSON string
|
|
107
|
+
- `from_json(string)`: Class method to deserialize from a JSON string
|
|
108
|
+
|
|
109
|
+
#### Attributes
|
|
110
|
+
|
|
111
|
+
- `word_size`: Number of significant bits per word
|
|
112
|
+
|
|
113
|
+
### Functions
|
|
114
|
+
|
|
115
|
+
#### `unpackbits(array)`
|
|
116
|
+
|
|
117
|
+
Unpack a VarUIntArray into individual bits, excluding padding.
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
>>> arr = VarUIntArray([5, 3], word_size=3)
|
|
121
|
+
>>> unpackbits(arr)
|
|
122
|
+
array([[1, 0, 1],
|
|
123
|
+
[0, 1, 1]], dtype=uint8)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**Parameters:**
|
|
127
|
+
- `array`: VarUIntArray to unpack
|
|
128
|
+
|
|
129
|
+
**Returns:** ndarray with shape `(*original_shape, word_size)`
|
|
130
|
+
|
|
131
|
+
#### `packbits(array)`
|
|
132
|
+
|
|
133
|
+
Pack a bit array into a VarUIntArray.
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
>>> bits = np.array([[1, 0, 1], [0, 1, 1]], dtype=np.uint8)
|
|
137
|
+
>>> packbits(bits)
|
|
138
|
+
VarUIntArray([5, 3], dtype=uint8, word_size=3)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Parameters:**
|
|
142
|
+
- `array`: ndarray of uint8 containing 0s and 1s, where the last dimension contains bits for each word
|
|
143
|
+
|
|
144
|
+
**Returns:** VarUIntArray with one fewer dimension
|
|
145
|
+
|
|
146
|
+
#### `VarUIntArray.to_dict()`
|
|
147
|
+
|
|
148
|
+
Serialize VarUIntArray to JSON-compatible dictionary.
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
>>> arr = VarUIntArray([1, 2, 3], word_size=10)
|
|
152
|
+
>>> arr.to_dict()
|
|
153
|
+
{'word_size': 10, 'values': [1, 2, 3]}
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
#### `VarUIntArray.from_dict(data)`
|
|
157
|
+
|
|
158
|
+
Convert various formats to VarUIntArray.
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
# From dictionary
|
|
162
|
+
>>> VarUIntArray.from_dict({'values': [1, 2, 3], 'word_size': 10})
|
|
163
|
+
VarUIntArray([1, 2, 3], dtype='>u2', word_size=10)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
#### `VarUIntArray.to_json()`
|
|
167
|
+
|
|
168
|
+
Serialize VarUIntArray to a JSON string.
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
>>> arr = VarUIntArray([1, 2, 3], word_size=10)
|
|
172
|
+
>>> arr.to_json()
|
|
173
|
+
'{"word_size": 10, "values": [1, 2, 3]}'
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
#### `VarUIntArray.from_json(string)`
|
|
177
|
+
|
|
178
|
+
Deserialize a VarUIntArray from a JSON string.
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
>>> json_str = '{"word_size": 10, "values": [1, 2, 3]}'
|
|
182
|
+
>>> VarUIntArray.from_json(json_str)
|
|
183
|
+
VarUIntArray([1, 2, 3], dtype='>u2', word_size=10)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Use Cases
|
|
187
|
+
|
|
188
|
+
### Custom Binary Protocols
|
|
189
|
+
|
|
190
|
+
Working with network protocols or file formats that use non-standard bit widths:
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
# 12-bit color values (common in some image formats)
|
|
194
|
+
>>> colors = VarUIntArray([4095, 2048, 0], word_size=12)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Bit Manipulation
|
|
198
|
+
|
|
199
|
+
Performing bitwise operations on packed data:
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
>>> data = VarUIntArray([0b1010, 0b0101], word_size=4)
|
|
203
|
+
>>> mask = VarUIntArray([0b1100, 0b0011], word_size=4)
|
|
204
|
+
>>> result = data & mask # Bitwise AND
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Implementation Details
|
|
208
|
+
|
|
209
|
+
### Memory Layout
|
|
210
|
+
|
|
211
|
+
- VarUIntArray uses big-endian byte order (`'>'` dtype prefix) for consistency.
|
|
212
|
+
- Data is stored in the smallest standard NumPy unsigned integer type that can hold the specified word_size.
|
|
213
|
+
|
|
214
|
+
### Limitations
|
|
215
|
+
|
|
216
|
+
- Maximum word size: 64 bits
|
|
217
|
+
- Only unsigned integers are supported
|
|
218
|
+
- The `axis` parameter is not supported for `np.unpackbits` on VarUIntArray
|
|
219
|
+
|
|
220
|
+
## Examples
|
|
221
|
+
|
|
222
|
+
### Complete Workflow
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
>>> import numpy as np
|
|
226
|
+
>>> from varuintarray import VarUIntArray
|
|
227
|
+
|
|
228
|
+
# Create some 5-bit values
|
|
229
|
+
>>> data = VarUIntArray([31, 16, 0, 15], word_size=5)
|
|
230
|
+
|
|
231
|
+
# Unpack to bits
|
|
232
|
+
>>> bits = data.unpackbits()
|
|
233
|
+
>>> bits
|
|
234
|
+
array([[1, 1, 1, 1, 1],
|
|
235
|
+
[1, 0, 0, 0, 0],
|
|
236
|
+
[0, 0, 0, 0, 0],
|
|
237
|
+
[0, 1, 1, 1, 1]], dtype=uint8)
|
|
238
|
+
|
|
239
|
+
# Flip specific bits
|
|
240
|
+
>>> bits[:, 0] = 1 - bits[:, 0] # Flip first bit
|
|
241
|
+
|
|
242
|
+
# Pack back
|
|
243
|
+
>>> result = VarUIntArray.packbits(bits)
|
|
244
|
+
>>> result
|
|
245
|
+
VarUIntArray([15, 0, 16, 31], dtype=uint8, word_size=5)
|
|
246
|
+
|
|
247
|
+
# Bitwise operations
|
|
248
|
+
>>> result.invert()
|
|
249
|
+
VarUIntArray([16, 31, 15, 0], dtype=uint8, word_size=5)
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
### Serialization
|
|
253
|
+
|
|
254
|
+
```python
|
|
255
|
+
>>> from varuintarray import VarUIntArray
|
|
256
|
+
>>> import json
|
|
257
|
+
|
|
258
|
+
# Serialize dict
|
|
259
|
+
>>> arr = VarUIntArray([100, 200, 300], word_size=12)
|
|
260
|
+
>>> serialized = arr.to_dict()
|
|
261
|
+
>>> serialized
|
|
262
|
+
{'word_size': 12, 'values': [100, 200, 300]}
|
|
263
|
+
|
|
264
|
+
# Deserialize dict
|
|
265
|
+
>>> VarUIntArray.from_dict(serialized)
|
|
266
|
+
VarUIntArray([100, 200, 300], dtype='>u2', word_size=12)
|
|
267
|
+
|
|
268
|
+
# Serialize JSON
|
|
269
|
+
>>> serialized = arr.to_json()
|
|
270
|
+
>>> serialized
|
|
271
|
+
'{"word_size": 12, "values": [100, 200, 300]}'
|
|
272
|
+
|
|
273
|
+
# Deserialize JSON
|
|
274
|
+
>>> VarUIntArray.from_json(serialized)
|
|
275
|
+
VarUIntArray([100, 200, 300], dtype='>u2', word_size=12)
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## License
|
|
279
|
+
|
|
280
|
+
`varuintarray` is licensed under the MIT License - see the LICENSE file for details
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "varuintarray"
|
|
3
|
-
version = "0.1
|
|
3
|
+
version = "1.0.1"
|
|
4
4
|
description = "A variable-length unsinged integer array"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -8,7 +8,7 @@ authors = [
|
|
|
8
8
|
]
|
|
9
9
|
requires-python = ">=3.9"
|
|
10
10
|
dependencies = [
|
|
11
|
-
"lark>=1.
|
|
11
|
+
"lark>=1.2",
|
|
12
12
|
"numpy>=2.0",
|
|
13
13
|
]
|
|
14
14
|
|
|
@@ -19,7 +19,7 @@ build-backend = "hatchling.build"
|
|
|
19
19
|
[dependency-groups]
|
|
20
20
|
dev = [
|
|
21
21
|
"hypothesis>=6.141.1",
|
|
22
|
-
"jupyterlab>=4.5.2",
|
|
23
22
|
"pytest>=8.4.2",
|
|
23
|
+
"rich>=14.3.2",
|
|
24
24
|
"ruff>=0.14.14",
|
|
25
25
|
]
|