xlspy 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xlspy-0.1.3/LICENSE +21 -0
- xlspy-0.1.3/PKG-INFO +204 -0
- xlspy-0.1.3/README.md +182 -0
- xlspy-0.1.3/pyproject.toml +51 -0
- xlspy-0.1.3/setup.cfg +4 -0
- xlspy-0.1.3/src/xlspy/__init__.py +6 -0
- xlspy-0.1.3/src/xlspy/_accel.py +35 -0
- xlspy-0.1.3/src/xlspy/biff_reader.py +292 -0
- xlspy-0.1.3/src/xlspy/c_ext/_c_core.c +912 -0
- xlspy-0.1.3/src/xlspy/c_ext/setup.py +12 -0
- xlspy-0.1.3/src/xlspy/reader.py +770 -0
- xlspy-0.1.3/src/xlspy/writer.py +690 -0
- xlspy-0.1.3/src/xlspy/xlsx_writer.py +421 -0
- xlspy-0.1.3/src/xlspy.egg-info/PKG-INFO +204 -0
- xlspy-0.1.3/src/xlspy.egg-info/SOURCES.txt +19 -0
- xlspy-0.1.3/src/xlspy.egg-info/dependency_links.txt +1 -0
- xlspy-0.1.3/src/xlspy.egg-info/requires.txt +8 -0
- xlspy-0.1.3/src/xlspy.egg-info/top_level.txt +1 -0
- xlspy-0.1.3/tests/test_comprehensive.py +261 -0
- xlspy-0.1.3/tests/test_reader.py +282 -0
- xlspy-0.1.3/tests/test_real_data.py +121 -0
xlspy-0.1.3/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Krzysztof Duśko
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
xlspy-0.1.3/PKG-INFO
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xlspy
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Read and write XLSB and XLSX files efficiently.
|
|
5
|
+
Author: Krzysztof Duśko
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/KrzysztofDusko/xlspy/
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Topic :: Office/Business :: Office Suites
|
|
11
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
12
|
+
Requires-Python: >=3.12
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Provides-Extra: test
|
|
16
|
+
Requires-Dist: openpyxl; extra == "test"
|
|
17
|
+
Requires-Dist: pytest; extra == "test"
|
|
18
|
+
Requires-Dist: xlsxwriter; extra == "test"
|
|
19
|
+
Provides-Extra: examples
|
|
20
|
+
Requires-Dist: memory-profiler; extra == "examples"
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# Python XLSB Reader & Writer
|
|
24
|
+
|
|
25
|
+
A Python library for reading and writing XLSB and XLSX files efficiently.
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install xlspy
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
### Basic Example
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from xlspy import XlsbWriter
|
|
39
|
+
import datetime
|
|
40
|
+
from decimal import Decimal
|
|
41
|
+
|
|
42
|
+
data = [
|
|
43
|
+
["Name", "Age", "City", "info"],
|
|
44
|
+
[-123, 2147483647, 2147483648, 2147483999],
|
|
45
|
+
["x", "y", "z", datetime.datetime.today()],
|
|
46
|
+
["Alice", 25, "New York", datetime.date.today()],
|
|
47
|
+
["Bob", 30, "London", Decimal(3.14)],
|
|
48
|
+
["Charlie", 35, "Paris", datetime.datetime.now()],
|
|
49
|
+
[True, False, None, datetime.datetime.utcnow()]
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
# Initialize writer with a specific compression level
|
|
53
|
+
with XlsbWriter("output.xlsb", compressionLevel=6) as writer:
|
|
54
|
+
# Add a visible sheet
|
|
55
|
+
writer.add_sheet("Visible Sheet")
|
|
56
|
+
writer.write_sheet(data)
|
|
57
|
+
|
|
58
|
+
# Add a hidden sheet
|
|
59
|
+
writer.add_sheet("Hidden Sheet", hidden=True)
|
|
60
|
+
writer.write_sheet([["This sheet is hidden."]])
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### XlsxWriter Example
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from xlspy import XlsxWriter
|
|
67
|
+
import datetime
|
|
68
|
+
from decimal import Decimal
|
|
69
|
+
|
|
70
|
+
data = [
|
|
71
|
+
["Name", "Age", "City", "info"],
|
|
72
|
+
[-123, 2147483647, 2147483648, 2147483999],
|
|
73
|
+
["x", "y", "z", datetime.datetime.today()],
|
|
74
|
+
["Alice", 25, "New York", datetime.date.today()],
|
|
75
|
+
["Bob", 30, "London", Decimal(3.14)],
|
|
76
|
+
["Charlie", 35, "Paris", datetime.datetime.now()],
|
|
77
|
+
[True, False, None, datetime.datetime.utcnow()]
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
# Initialize writer with a specific compression level
|
|
81
|
+
with XlsxWriter("output.xlsx", compressionLevel=6) as writer:
|
|
82
|
+
# Add a visible sheet
|
|
83
|
+
writer.add_sheet("Visible Sheet")
|
|
84
|
+
writer.write_sheet(data)
|
|
85
|
+
|
|
86
|
+
# Add a hidden sheet
|
|
87
|
+
writer.add_sheet("Hidden Sheet", hidden=True)
|
|
88
|
+
writer.write_sheet([["This sheet is hidden."]])
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Reading XLSB and XLSX Files
|
|
92
|
+
|
|
93
|
+
Reading files is done via the `ExcelReader` class, which automatically detects the format.
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from xlspy import ExcelReader
|
|
97
|
+
|
|
98
|
+
with ExcelReader("input.xlsx") as reader: # or .xlsb
|
|
99
|
+
names = reader.get_sheet_names()
|
|
100
|
+
print(f"Sheets: {names}")
|
|
101
|
+
|
|
102
|
+
for sheet_name in names:
|
|
103
|
+
rows = reader.read_all(sheet_name)
|
|
104
|
+
for row in rows:
|
|
105
|
+
print(row)
|
|
106
|
+
|
|
107
|
+
# Generator usage (memory efficient for large files):
|
|
108
|
+
with ExcelReader("large_file.xlsb") as reader:
|
|
109
|
+
for row in reader.get_rows("Sheet1"):
|
|
110
|
+
print(row)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Streaming from a Database (Netezza)
|
|
114
|
+
|
|
115
|
+
This example shows how to stream data directly from a database query into an XLSB file using `nzpy-extended`. This is highly memory-efficient as it doesn't load the entire dataset into memory.
|
|
116
|
+
|
|
117
|
+
First, ensure you have `nzpy-extended` installed:
|
|
118
|
+
```bash
|
|
119
|
+
pip install nzpy-extended
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Then, you can use a generator function to feed data to `XlsbWriter`.
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import os
|
|
126
|
+
from typing import Generator
|
|
127
|
+
from xlspy import XlsbWriter
|
|
128
|
+
|
|
129
|
+
# --- Configuration ---
|
|
130
|
+
NZ_CONFIG = {
|
|
131
|
+
"host": os.environ.get("NZ_DEV_HOST", "your_host"),
|
|
132
|
+
"port": int(os.environ.get("NZ_DEV_PORT", "5480")),
|
|
133
|
+
"database": os.environ.get("NZ_DEV_DB", "your_db"),
|
|
134
|
+
"user": os.environ.get("NZ_DEV_USER", "your_user"),
|
|
135
|
+
"password": os.environ.get("NZ_DEV_PASSWORD", "your_password"),
|
|
136
|
+
}
|
|
137
|
+
QUERY = "SELECT * FROM YourTable"
|
|
138
|
+
OUTPUT_FILENAME = "db_output.xlsb"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def row_generator(cursor) -> Generator[list, None, None]:
|
|
142
|
+
"""Yields column headers first, then each data row."""
|
|
143
|
+
headers = [column[0] for column in cursor.description]
|
|
144
|
+
yield headers
|
|
145
|
+
while row := cursor.fetchone():
|
|
146
|
+
yield list(row)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# --- Main Execution ---
|
|
150
|
+
try:
|
|
151
|
+
import nzpy_extended.sync as nzpy
|
|
152
|
+
|
|
153
|
+
with nzpy.connect(**NZ_CONFIG) as conn:
|
|
154
|
+
cursor = conn.cursor()
|
|
155
|
+
cursor.execute(QUERY)
|
|
156
|
+
|
|
157
|
+
with XlsbWriter(OUTPUT_FILENAME) as writer:
|
|
158
|
+
writer.add_sheet("Database Export")
|
|
159
|
+
writer.write_sheet(row_generator(cursor))
|
|
160
|
+
writer.add_sheet("SQL Query", hidden=True)
|
|
161
|
+
writer.write_sheet([["SQL"], [QUERY]])
|
|
162
|
+
|
|
163
|
+
print(f"Successfully created '{OUTPUT_FILENAME}'")
|
|
164
|
+
|
|
165
|
+
except Exception as e:
|
|
166
|
+
print(f"An unexpected error occurred: {e}")
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Performance
|
|
170
|
+
|
|
171
|
+
`xlspy` is designed for high performance. Since version 0.1.0, the library includes a **C extension** (`_c_core`) that accelerates XLSB read and write. The C extension is **enabled by default** (compiled automatically on install). Set `XLSPY_DISABLE_C_EXT=1` to force the pure Python fallback.
|
|
172
|
+
|
|
173
|
+
All benchmarks: **50000 × 50** dataset (2.5M cells). Tests performed on **Windows 11** (Python 3.14, AMD64).
|
|
174
|
+
|
|
175
|
+
### Write
|
|
176
|
+
|
|
177
|
+
| Library | Format | Time | Size |
|
|
178
|
+
|---------|--------|------|------|
|
|
179
|
+
| **xlspy (C_EXT)** | XLSB | **1.02 s** | 7.20 MB |
|
|
180
|
+
| xlspy (Python) | XLSB | 2.54 s | 7.20 MB |
|
|
181
|
+
| xlspy | XLSX | 3.66 s | 6.32 MB |
|
|
182
|
+
| [xlsxwriter](https://pypi.org/project/xlsxwriter/) | XLSX | 8.67 s | 11.44 MB |
|
|
183
|
+
|
|
184
|
+
### Read
|
|
185
|
+
|
|
186
|
+
| Library | Format | Time | Notes |
|
|
187
|
+
|---------|--------|------|-------|
|
|
188
|
+
| **xlspy (C_EXT)** | XLSB | **1.39 s** | default, compiled C |
|
|
189
|
+
| xlspy | XLSX | 4.72 s | uses expat XML parser (C) |
|
|
190
|
+
| xlspy (Python) | XLSB | 6.41 s | pure Python fallback |
|
|
191
|
+
| [openpyxl](https://pypi.org/project/openpyxl/) | XLSX | 7.85 s | read-only mode |
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
### Analysis
|
|
195
|
+
|
|
196
|
+
The **4.6× read speedup** comes from two factors:
|
|
197
|
+
- **~60–70%** — native C compilation, no interpreter overhead per record
|
|
198
|
+
- **~30–40%** — algorithm simplification: flat array indexed by `col − first_col` instead of `Dict[int, Any]`, no `isinstance` per cell, no `BiffReader.read_worksheet()` method call per record
|
|
199
|
+
|
|
200
|
+
Run the benchmarks yourself with `examples/performance_test.py`.
|
|
201
|
+
|
|
202
|
+
## Repository
|
|
203
|
+
|
|
204
|
+
<https://github.com/KrzysztofDusko/xlspy/>
|
xlspy-0.1.3/README.md
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# Python XLSB Reader & Writer
|
|
2
|
+
|
|
3
|
+
A Python library for reading and writing XLSB and XLSX files efficiently.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install xlspy
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
### Basic Example
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from xlspy import XlsbWriter
|
|
17
|
+
import datetime
|
|
18
|
+
from decimal import Decimal
|
|
19
|
+
|
|
20
|
+
data = [
|
|
21
|
+
["Name", "Age", "City", "info"],
|
|
22
|
+
[-123, 2147483647, 2147483648, 2147483999],
|
|
23
|
+
["x", "y", "z", datetime.datetime.today()],
|
|
24
|
+
["Alice", 25, "New York", datetime.date.today()],
|
|
25
|
+
["Bob", 30, "London", Decimal(3.14)],
|
|
26
|
+
["Charlie", 35, "Paris", datetime.datetime.now()],
|
|
27
|
+
[True, False, None, datetime.datetime.utcnow()]
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
# Initialize writer with a specific compression level
|
|
31
|
+
with XlsbWriter("output.xlsb", compressionLevel=6) as writer:
|
|
32
|
+
# Add a visible sheet
|
|
33
|
+
writer.add_sheet("Visible Sheet")
|
|
34
|
+
writer.write_sheet(data)
|
|
35
|
+
|
|
36
|
+
# Add a hidden sheet
|
|
37
|
+
writer.add_sheet("Hidden Sheet", hidden=True)
|
|
38
|
+
writer.write_sheet([["This sheet is hidden."]])
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### XlsxWriter Example
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from xlspy import XlsxWriter
|
|
45
|
+
import datetime
|
|
46
|
+
from decimal import Decimal
|
|
47
|
+
|
|
48
|
+
data = [
|
|
49
|
+
["Name", "Age", "City", "info"],
|
|
50
|
+
[-123, 2147483647, 2147483648, 2147483999],
|
|
51
|
+
["x", "y", "z", datetime.datetime.today()],
|
|
52
|
+
["Alice", 25, "New York", datetime.date.today()],
|
|
53
|
+
["Bob", 30, "London", Decimal(3.14)],
|
|
54
|
+
["Charlie", 35, "Paris", datetime.datetime.now()],
|
|
55
|
+
[True, False, None, datetime.datetime.utcnow()]
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
# Initialize writer with a specific compression level
|
|
59
|
+
with XlsxWriter("output.xlsx", compressionLevel=6) as writer:
|
|
60
|
+
# Add a visible sheet
|
|
61
|
+
writer.add_sheet("Visible Sheet")
|
|
62
|
+
writer.write_sheet(data)
|
|
63
|
+
|
|
64
|
+
# Add a hidden sheet
|
|
65
|
+
writer.add_sheet("Hidden Sheet", hidden=True)
|
|
66
|
+
writer.write_sheet([["This sheet is hidden."]])
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Reading XLSB and XLSX Files
|
|
70
|
+
|
|
71
|
+
Reading files is done via the `ExcelReader` class, which automatically detects the format.
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from xlspy import ExcelReader
|
|
75
|
+
|
|
76
|
+
with ExcelReader("input.xlsx") as reader: # or .xlsb
|
|
77
|
+
names = reader.get_sheet_names()
|
|
78
|
+
print(f"Sheets: {names}")
|
|
79
|
+
|
|
80
|
+
for sheet_name in names:
|
|
81
|
+
rows = reader.read_all(sheet_name)
|
|
82
|
+
for row in rows:
|
|
83
|
+
print(row)
|
|
84
|
+
|
|
85
|
+
# Generator usage (memory efficient for large files):
|
|
86
|
+
with ExcelReader("large_file.xlsb") as reader:
|
|
87
|
+
for row in reader.get_rows("Sheet1"):
|
|
88
|
+
print(row)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Streaming from a Database (Netezza)
|
|
92
|
+
|
|
93
|
+
This example shows how to stream data directly from a database query into an XLSB file using `nzpy-extended`. This is highly memory-efficient as it doesn't load the entire dataset into memory.
|
|
94
|
+
|
|
95
|
+
First, ensure you have `nzpy-extended` installed:
|
|
96
|
+
```bash
|
|
97
|
+
pip install nzpy-extended
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Then, you can use a generator function to feed data to `XlsbWriter`.
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
import os
|
|
104
|
+
from typing import Generator
|
|
105
|
+
from xlspy import XlsbWriter
|
|
106
|
+
|
|
107
|
+
# --- Configuration ---
|
|
108
|
+
NZ_CONFIG = {
|
|
109
|
+
"host": os.environ.get("NZ_DEV_HOST", "your_host"),
|
|
110
|
+
"port": int(os.environ.get("NZ_DEV_PORT", "5480")),
|
|
111
|
+
"database": os.environ.get("NZ_DEV_DB", "your_db"),
|
|
112
|
+
"user": os.environ.get("NZ_DEV_USER", "your_user"),
|
|
113
|
+
"password": os.environ.get("NZ_DEV_PASSWORD", "your_password"),
|
|
114
|
+
}
|
|
115
|
+
QUERY = "SELECT * FROM YourTable"
|
|
116
|
+
OUTPUT_FILENAME = "db_output.xlsb"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def row_generator(cursor) -> Generator[list, None, None]:
|
|
120
|
+
"""Yields column headers first, then each data row."""
|
|
121
|
+
headers = [column[0] for column in cursor.description]
|
|
122
|
+
yield headers
|
|
123
|
+
while row := cursor.fetchone():
|
|
124
|
+
yield list(row)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# --- Main Execution ---
|
|
128
|
+
try:
|
|
129
|
+
import nzpy_extended.sync as nzpy
|
|
130
|
+
|
|
131
|
+
with nzpy.connect(**NZ_CONFIG) as conn:
|
|
132
|
+
cursor = conn.cursor()
|
|
133
|
+
cursor.execute(QUERY)
|
|
134
|
+
|
|
135
|
+
with XlsbWriter(OUTPUT_FILENAME) as writer:
|
|
136
|
+
writer.add_sheet("Database Export")
|
|
137
|
+
writer.write_sheet(row_generator(cursor))
|
|
138
|
+
writer.add_sheet("SQL Query", hidden=True)
|
|
139
|
+
writer.write_sheet([["SQL"], [QUERY]])
|
|
140
|
+
|
|
141
|
+
print(f"Successfully created '{OUTPUT_FILENAME}'")
|
|
142
|
+
|
|
143
|
+
except Exception as e:
|
|
144
|
+
print(f"An unexpected error occurred: {e}")
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Performance
|
|
148
|
+
|
|
149
|
+
`xlspy` is designed for high performance. Since version 0.1.0, the library includes a **C extension** (`_c_core`) that accelerates XLSB read and write. The C extension is **enabled by default** (compiled automatically on install). Set `XLSPY_DISABLE_C_EXT=1` to force the pure Python fallback.
|
|
150
|
+
|
|
151
|
+
All benchmarks: **50000 × 50** dataset (2.5M cells). Tests performed on **Windows 11** (Python 3.14, AMD64).
|
|
152
|
+
|
|
153
|
+
### Write
|
|
154
|
+
|
|
155
|
+
| Library | Format | Time | Size |
|
|
156
|
+
|---------|--------|------|------|
|
|
157
|
+
| **xlspy (C_EXT)** | XLSB | **1.02 s** | 7.20 MB |
|
|
158
|
+
| xlspy (Python) | XLSB | 2.54 s | 7.20 MB |
|
|
159
|
+
| xlspy | XLSX | 3.66 s | 6.32 MB |
|
|
160
|
+
| [xlsxwriter](https://pypi.org/project/xlsxwriter/) | XLSX | 8.67 s | 11.44 MB |
|
|
161
|
+
|
|
162
|
+
### Read
|
|
163
|
+
|
|
164
|
+
| Library | Format | Time | Notes |
|
|
165
|
+
|---------|--------|------|-------|
|
|
166
|
+
| **xlspy (C_EXT)** | XLSB | **1.39 s** | default, compiled C |
|
|
167
|
+
| xlspy | XLSX | 4.72 s | uses expat XML parser (C) |
|
|
168
|
+
| xlspy (Python) | XLSB | 6.41 s | pure Python fallback |
|
|
169
|
+
| [openpyxl](https://pypi.org/project/openpyxl/) | XLSX | 7.85 s | read-only mode |
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
### Analysis
|
|
173
|
+
|
|
174
|
+
The **4.6× read speedup** comes from two factors:
|
|
175
|
+
- **~60–70%** — native C compilation, no interpreter overhead per record
|
|
176
|
+
- **~30–40%** — algorithm simplification: flat array indexed by `col − first_col` instead of `Dict[int, Any]`, no `isinstance` per cell, no `BiffReader.read_worksheet()` method call per record
|
|
177
|
+
|
|
178
|
+
Run the benchmarks yourself with `examples/performance_test.py`.
|
|
179
|
+
|
|
180
|
+
## Repository
|
|
181
|
+
|
|
182
|
+
<https://github.com/KrzysztofDusko/xlspy/>
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[[tool.setuptools.ext-modules]]
|
|
6
|
+
name = "xlspy._c_core"
|
|
7
|
+
sources = ["src/xlspy/c_ext/_c_core.c"]
|
|
8
|
+
|
|
9
|
+
[tool.setuptools.packages.find]
|
|
10
|
+
where = ["src"]
|
|
11
|
+
|
|
12
|
+
[project]
|
|
13
|
+
name = "xlspy"
|
|
14
|
+
version = "0.1.3"
|
|
15
|
+
authors = [
|
|
16
|
+
{ name="Krzysztof Duśko"},
|
|
17
|
+
]
|
|
18
|
+
description = "Read and write XLSB and XLSX files efficiently."
|
|
19
|
+
readme = "README.md"
|
|
20
|
+
requires-python = ">=3.12"
|
|
21
|
+
license = "MIT"
|
|
22
|
+
classifiers = [
|
|
23
|
+
"Programming Language :: Python :: 3",
|
|
24
|
+
"Operating System :: OS Independent",
|
|
25
|
+
"Topic :: Office/Business :: Office Suites",
|
|
26
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
27
|
+
]
|
|
28
|
+
dependencies = [
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Repository = "https://github.com/KrzysztofDusko/xlspy/"
|
|
33
|
+
|
|
34
|
+
[project.optional-dependencies]
|
|
35
|
+
test = [
|
|
36
|
+
"openpyxl",
|
|
37
|
+
"pytest",
|
|
38
|
+
"xlsxwriter",
|
|
39
|
+
]
|
|
40
|
+
examples = [
|
|
41
|
+
"memory-profiler"
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[tool.cibuildwheel]
|
|
45
|
+
build = "cp312-* cp313-* cp314-*"
|
|
46
|
+
archs = "auto64"
|
|
47
|
+
|
|
48
|
+
[tool.pytest.ini_options]
|
|
49
|
+
testpaths = ["tests"]
|
|
50
|
+
|
|
51
|
+
|
xlspy-0.1.3/setup.cfg
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import os as _os
|
|
2
|
+
|
|
3
|
+
HAVE_C_EXT = False
|
|
4
|
+
_c_encode_xlsb_row = None
|
|
5
|
+
_c_calc_column_widths = None
|
|
6
|
+
_c_read_xlsb_worksheet = None
|
|
7
|
+
|
|
8
|
+
if _os.environ.get('XLSPY_DISABLE_C_EXT', '0') != '1':
|
|
9
|
+
try:
|
|
10
|
+
from xlspy._c_core import (
|
|
11
|
+
encode_xlsb_row as _c_encode_xlsb_row,
|
|
12
|
+
calc_column_widths as _c_calc_column_widths,
|
|
13
|
+
read_xlsb_worksheet as _c_read_xlsb_worksheet,
|
|
14
|
+
)
|
|
15
|
+
HAVE_C_EXT = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def encode_xlsb_row(row, ss_dict, ss_list, sst_unique_count, sst_all_count, row_idx):
|
|
21
|
+
if HAVE_C_EXT:
|
|
22
|
+
return _c_encode_xlsb_row(row, ss_dict, ss_list, sst_unique_count, sst_all_count, row_idx)
|
|
23
|
+
return None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def calc_column_widths(rows, max_cols):
|
|
27
|
+
if HAVE_C_EXT:
|
|
28
|
+
return _c_calc_column_widths(rows, max_cols)
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def read_xlsb_worksheet(data, shared_strings, styles_list, date_num_fmts):
|
|
33
|
+
if HAVE_C_EXT:
|
|
34
|
+
return _c_read_xlsb_worksheet(data, shared_strings, styles_list, date_num_fmts)
|
|
35
|
+
return None
|