xlspy 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xlspy-0.1.3/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Krzysztof Duśko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
xlspy-0.1.3/PKG-INFO ADDED
@@ -0,0 +1,204 @@
1
+ Metadata-Version: 2.4
2
+ Name: xlspy
3
+ Version: 0.1.3
4
+ Summary: Read and write XLSB and XLSX files efficiently.
5
+ Author: Krzysztof Duśko
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/KrzysztofDusko/xlspy/
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Topic :: Office/Business :: Office Suites
11
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
12
+ Requires-Python: >=3.12
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Provides-Extra: test
16
+ Requires-Dist: openpyxl; extra == "test"
17
+ Requires-Dist: pytest; extra == "test"
18
+ Requires-Dist: xlsxwriter; extra == "test"
19
+ Provides-Extra: examples
20
+ Requires-Dist: memory-profiler; extra == "examples"
21
+ Dynamic: license-file
22
+
23
+ # Python XLSB Reader & Writer
24
+
25
+ A Python library for reading and writing XLSB and XLSX files efficiently.
26
+
27
+ ## Installation
28
+
29
+ ```bash
30
+ pip install xlspy
31
+ ```
32
+
33
+ ## Usage
34
+
35
+ ### Basic Example
36
+
37
+ ```python
38
+ from xlspy import XlsbWriter
39
+ import datetime
40
+ from decimal import Decimal
41
+
42
+ data = [
43
+ ["Name", "Age", "City", "info"],
44
+ [-123, 2147483647, 2147483648, 2147483999],
45
+ ["x", "y", "z", datetime.datetime.today()],
46
+ ["Alice", 25, "New York", datetime.date.today()],
47
+ ["Bob", 30, "London", Decimal(3.14)],
48
+ ["Charlie", 35, "Paris", datetime.datetime.now()],
49
+ [True, False, None, datetime.datetime.utcnow()]
50
+ ]
51
+
52
+ # Initialize writer with a specific compression level
53
+ with XlsbWriter("output.xlsb", compressionLevel=6) as writer:
54
+ # Add a visible sheet
55
+ writer.add_sheet("Visible Sheet")
56
+ writer.write_sheet(data)
57
+
58
+ # Add a hidden sheet
59
+ writer.add_sheet("Hidden Sheet", hidden=True)
60
+ writer.write_sheet([["This sheet is hidden."]])
61
+ ```
62
+
63
+ ### XlsxWriter Example
64
+
65
+ ```python
66
+ from xlspy import XlsxWriter
67
+ import datetime
68
+ from decimal import Decimal
69
+
70
+ data = [
71
+ ["Name", "Age", "City", "info"],
72
+ [-123, 2147483647, 2147483648, 2147483999],
73
+ ["x", "y", "z", datetime.datetime.today()],
74
+ ["Alice", 25, "New York", datetime.date.today()],
75
+ ["Bob", 30, "London", Decimal(3.14)],
76
+ ["Charlie", 35, "Paris", datetime.datetime.now()],
77
+ [True, False, None, datetime.datetime.utcnow()]
78
+ ]
79
+
80
+ # Initialize writer with a specific compression level
81
+ with XlsxWriter("output.xlsx", compressionLevel=6) as writer:
82
+ # Add a visible sheet
83
+ writer.add_sheet("Visible Sheet")
84
+ writer.write_sheet(data)
85
+
86
+ # Add a hidden sheet
87
+ writer.add_sheet("Hidden Sheet", hidden=True)
88
+ writer.write_sheet([["This sheet is hidden."]])
89
+ ```
90
+
91
+ ### Reading XLSB and XLSX Files
92
+
93
+ Reading files is done via the `ExcelReader` class, which automatically detects the format.
94
+
95
+ ```python
96
+ from xlspy import ExcelReader
97
+
98
+ with ExcelReader("input.xlsx") as reader: # or .xlsb
99
+ names = reader.get_sheet_names()
100
+ print(f"Sheets: {names}")
101
+
102
+ for sheet_name in names:
103
+ rows = reader.read_all(sheet_name)
104
+ for row in rows:
105
+ print(row)
106
+
107
+ # Generator usage (memory efficient for large files):
108
+ with ExcelReader("large_file.xlsb") as reader:
109
+ for row in reader.get_rows("Sheet1"):
110
+ print(row)
111
+ ```
112
+
113
+ ### Streaming from a Database (Netezza)
114
+
115
+ This example shows how to stream data directly from a database query into an XLSB file using `nzpy-extended`. This is highly memory-efficient as it doesn't load the entire dataset into memory.
116
+
117
+ First, ensure you have `nzpy-extended` installed:
118
+ ```bash
119
+ pip install nzpy-extended
120
+ ```
121
+
122
+ Then, you can use a generator function to feed data to `XlsbWriter`.
123
+
124
+ ```python
125
+ import os
126
+ from typing import Generator
127
+ from xlspy import XlsbWriter
128
+
129
+ # --- Configuration ---
130
+ NZ_CONFIG = {
131
+ "host": os.environ.get("NZ_DEV_HOST", "your_host"),
132
+ "port": int(os.environ.get("NZ_DEV_PORT", "5480")),
133
+ "database": os.environ.get("NZ_DEV_DB", "your_db"),
134
+ "user": os.environ.get("NZ_DEV_USER", "your_user"),
135
+ "password": os.environ.get("NZ_DEV_PASSWORD", "your_password"),
136
+ }
137
+ QUERY = "SELECT * FROM YourTable"
138
+ OUTPUT_FILENAME = "db_output.xlsb"
139
+
140
+
141
+ def row_generator(cursor) -> Generator[list, None, None]:
142
+ """Yields column headers first, then each data row."""
143
+ headers = [column[0] for column in cursor.description]
144
+ yield headers
145
+ while row := cursor.fetchone():
146
+ yield list(row)
147
+
148
+
149
+ # --- Main Execution ---
150
+ try:
151
+ import nzpy_extended.sync as nzpy
152
+
153
+ with nzpy.connect(**NZ_CONFIG) as conn:
154
+ cursor = conn.cursor()
155
+ cursor.execute(QUERY)
156
+
157
+ with XlsbWriter(OUTPUT_FILENAME) as writer:
158
+ writer.add_sheet("Database Export")
159
+ writer.write_sheet(row_generator(cursor))
160
+ writer.add_sheet("SQL Query", hidden=True)
161
+ writer.write_sheet([["SQL"], [QUERY]])
162
+
163
+ print(f"Successfully created '{OUTPUT_FILENAME}'")
164
+
165
+ except Exception as e:
166
+ print(f"An unexpected error occurred: {e}")
167
+ ```
168
+
169
+ ## Performance
170
+
171
+ `xlspy` is designed for high performance. Since version 0.1.0, the library includes a **C extension** (`_c_core`) that accelerates XLSB read and write. The C extension is **enabled by default** (compiled automatically on install). Set `XLSPY_DISABLE_C_EXT=1` to force the pure Python fallback.
172
+
173
+ All benchmarks: **50000 × 50** dataset (2.5M cells). Tests performed on **Windows 11** (Python 3.14, AMD64).
174
+
175
+ ### Write
176
+
177
+ | Library | Format | Time | Size |
178
+ |---------|--------|------|------|
179
+ | **xlspy (C_EXT)** | XLSB | **1.02 s** | 7.20 MB |
180
+ | xlspy (Python) | XLSB | 2.54 s | 7.20 MB |
181
+ | xlspy | XLSX | 3.66 s | 6.32 MB |
182
+ | [xlsxwriter](https://pypi.org/project/xlsxwriter/) | XLSX | 8.67 s | 11.44 MB |
183
+
184
+ ### Read
185
+
186
+ | Library | Format | Time | Notes |
187
+ |---------|--------|------|-------|
188
+ | **xlspy (C_EXT)** | XLSB | **1.39 s** | default, compiled C |
189
+ | xlspy | XLSX | 4.72 s | uses expat XML parser (C) |
190
+ | xlspy (Python) | XLSB | 6.41 s | pure Python fallback |
191
+ | [openpyxl](https://pypi.org/project/openpyxl/) | XLSX | 7.85 s | read-only mode |
192
+
193
+
194
+ ### Analysis
195
+
196
+ The **4.6× read speedup** comes from two factors:
197
+ - **~60–70%** — native C compilation, no interpreter overhead per record
198
+ - **~30–40%** — algorithm simplification: flat array indexed by `col − first_col` instead of `Dict[int, Any]`, no `isinstance` per cell, no `BiffReader.read_worksheet()` method call per record
199
+
200
+ Run the benchmarks yourself with `examples/performance_test.py`.
201
+
202
+ ## Repository
203
+
204
+ <https://github.com/KrzysztofDusko/xlspy/>
xlspy-0.1.3/README.md ADDED
@@ -0,0 +1,182 @@
1
+ # Python XLSB Reader & Writer
2
+
3
+ A Python library for reading and writing XLSB and XLSX files efficiently.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install xlspy
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ### Basic Example
14
+
15
+ ```python
16
+ from xlspy import XlsbWriter
17
+ import datetime
18
+ from decimal import Decimal
19
+
20
+ data = [
21
+ ["Name", "Age", "City", "info"],
22
+ [-123, 2147483647, 2147483648, 2147483999],
23
+ ["x", "y", "z", datetime.datetime.today()],
24
+ ["Alice", 25, "New York", datetime.date.today()],
25
+ ["Bob", 30, "London", Decimal(3.14)],
26
+ ["Charlie", 35, "Paris", datetime.datetime.now()],
27
+ [True, False, None, datetime.datetime.utcnow()]
28
+ ]
29
+
30
+ # Initialize writer with a specific compression level
31
+ with XlsbWriter("output.xlsb", compressionLevel=6) as writer:
32
+ # Add a visible sheet
33
+ writer.add_sheet("Visible Sheet")
34
+ writer.write_sheet(data)
35
+
36
+ # Add a hidden sheet
37
+ writer.add_sheet("Hidden Sheet", hidden=True)
38
+ writer.write_sheet([["This sheet is hidden."]])
39
+ ```
40
+
41
+ ### XlsxWriter Example
42
+
43
+ ```python
44
+ from xlspy import XlsxWriter
45
+ import datetime
46
+ from decimal import Decimal
47
+
48
+ data = [
49
+ ["Name", "Age", "City", "info"],
50
+ [-123, 2147483647, 2147483648, 2147483999],
51
+ ["x", "y", "z", datetime.datetime.today()],
52
+ ["Alice", 25, "New York", datetime.date.today()],
53
+ ["Bob", 30, "London", Decimal(3.14)],
54
+ ["Charlie", 35, "Paris", datetime.datetime.now()],
55
+ [True, False, None, datetime.datetime.utcnow()]
56
+ ]
57
+
58
+ # Initialize writer with a specific compression level
59
+ with XlsxWriter("output.xlsx", compressionLevel=6) as writer:
60
+ # Add a visible sheet
61
+ writer.add_sheet("Visible Sheet")
62
+ writer.write_sheet(data)
63
+
64
+ # Add a hidden sheet
65
+ writer.add_sheet("Hidden Sheet", hidden=True)
66
+ writer.write_sheet([["This sheet is hidden."]])
67
+ ```
68
+
69
+ ### Reading XLSB and XLSX Files
70
+
71
+ Reading files is done via the `ExcelReader` class, which automatically detects the format.
72
+
73
+ ```python
74
+ from xlspy import ExcelReader
75
+
76
+ with ExcelReader("input.xlsx") as reader: # or .xlsb
77
+ names = reader.get_sheet_names()
78
+ print(f"Sheets: {names}")
79
+
80
+ for sheet_name in names:
81
+ rows = reader.read_all(sheet_name)
82
+ for row in rows:
83
+ print(row)
84
+
85
+ # Generator usage (memory efficient for large files):
86
+ with ExcelReader("large_file.xlsb") as reader:
87
+ for row in reader.get_rows("Sheet1"):
88
+ print(row)
89
+ ```
90
+
91
+ ### Streaming from a Database (Netezza)
92
+
93
+ This example shows how to stream data directly from a database query into an XLSB file using `nzpy-extended`. This is highly memory-efficient as it doesn't load the entire dataset into memory.
94
+
95
+ First, ensure you have `nzpy-extended` installed:
96
+ ```bash
97
+ pip install nzpy-extended
98
+ ```
99
+
100
+ Then, you can use a generator function to feed data to `XlsbWriter`.
101
+
102
+ ```python
103
+ import os
104
+ from typing import Generator
105
+ from xlspy import XlsbWriter
106
+
107
+ # --- Configuration ---
108
+ NZ_CONFIG = {
109
+ "host": os.environ.get("NZ_DEV_HOST", "your_host"),
110
+ "port": int(os.environ.get("NZ_DEV_PORT", "5480")),
111
+ "database": os.environ.get("NZ_DEV_DB", "your_db"),
112
+ "user": os.environ.get("NZ_DEV_USER", "your_user"),
113
+ "password": os.environ.get("NZ_DEV_PASSWORD", "your_password"),
114
+ }
115
+ QUERY = "SELECT * FROM YourTable"
116
+ OUTPUT_FILENAME = "db_output.xlsb"
117
+
118
+
119
+ def row_generator(cursor) -> Generator[list, None, None]:
120
+ """Yields column headers first, then each data row."""
121
+ headers = [column[0] for column in cursor.description]
122
+ yield headers
123
+ while row := cursor.fetchone():
124
+ yield list(row)
125
+
126
+
127
+ # --- Main Execution ---
128
+ try:
129
+ import nzpy_extended.sync as nzpy
130
+
131
+ with nzpy.connect(**NZ_CONFIG) as conn:
132
+ cursor = conn.cursor()
133
+ cursor.execute(QUERY)
134
+
135
+ with XlsbWriter(OUTPUT_FILENAME) as writer:
136
+ writer.add_sheet("Database Export")
137
+ writer.write_sheet(row_generator(cursor))
138
+ writer.add_sheet("SQL Query", hidden=True)
139
+ writer.write_sheet([["SQL"], [QUERY]])
140
+
141
+ print(f"Successfully created '{OUTPUT_FILENAME}'")
142
+
143
+ except Exception as e:
144
+ print(f"An unexpected error occurred: {e}")
145
+ ```
146
+
147
+ ## Performance
148
+
149
+ `xlspy` is designed for high performance. Since version 0.1.0, the library includes a **C extension** (`_c_core`) that accelerates XLSB read and write. The C extension is **enabled by default** (compiled automatically on install). Set `XLSPY_DISABLE_C_EXT=1` to force the pure Python fallback.
150
+
151
+ All benchmarks: **50000 × 50** dataset (2.5M cells). Tests performed on **Windows 11** (Python 3.14, AMD64).
152
+
153
+ ### Write
154
+
155
+ | Library | Format | Time | Size |
156
+ |---------|--------|------|------|
157
+ | **xlspy (C_EXT)** | XLSB | **1.02 s** | 7.20 MB |
158
+ | xlspy (Python) | XLSB | 2.54 s | 7.20 MB |
159
+ | xlspy | XLSX | 3.66 s | 6.32 MB |
160
+ | [xlsxwriter](https://pypi.org/project/xlsxwriter/) | XLSX | 8.67 s | 11.44 MB |
161
+
162
+ ### Read
163
+
164
+ | Library | Format | Time | Notes |
165
+ |---------|--------|------|-------|
166
+ | **xlspy (C_EXT)** | XLSB | **1.39 s** | default, compiled C |
167
+ | xlspy | XLSX | 4.72 s | uses expat XML parser (C) |
168
+ | xlspy (Python) | XLSB | 6.41 s | pure Python fallback |
169
+ | [openpyxl](https://pypi.org/project/openpyxl/) | XLSX | 7.85 s | read-only mode |
170
+
171
+
172
+ ### Analysis
173
+
174
+ The **4.6× read speedup** comes from two factors:
175
+ - **~60–70%** — native C compilation, no interpreter overhead per record
176
+ - **~30–40%** — algorithm simplification: flat array indexed by `col − first_col` instead of `Dict[int, Any]`, no `isinstance` per cell, no `BiffReader.read_worksheet()` method call per record
177
+
178
+ Run the benchmarks yourself with `examples/performance_test.py`.
179
+
180
+ ## Repository
181
+
182
+ <https://github.com/KrzysztofDusko/xlspy/>
@@ -0,0 +1,51 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [[tool.setuptools.ext-modules]]
6
+ name = "xlspy._c_core"
7
+ sources = ["src/xlspy/c_ext/_c_core.c"]
8
+
9
+ [tool.setuptools.packages.find]
10
+ where = ["src"]
11
+
12
+ [project]
13
+ name = "xlspy"
14
+ version = "0.1.3"
15
+ authors = [
16
+ { name="Krzysztof Duśko"},
17
+ ]
18
+ description = "Read and write XLSB and XLSX files efficiently."
19
+ readme = "README.md"
20
+ requires-python = ">=3.12"
21
+ license = "MIT"
22
+ classifiers = [
23
+ "Programming Language :: Python :: 3",
24
+ "Operating System :: OS Independent",
25
+ "Topic :: Office/Business :: Office Suites",
26
+ "Topic :: Software Development :: Libraries :: Python Modules",
27
+ ]
28
+ dependencies = [
29
+ ]
30
+
31
+ [project.urls]
32
+ Repository = "https://github.com/KrzysztofDusko/xlspy/"
33
+
34
+ [project.optional-dependencies]
35
+ test = [
36
+ "openpyxl",
37
+ "pytest",
38
+ "xlsxwriter",
39
+ ]
40
+ examples = [
41
+ "memory-profiler"
42
+ ]
43
+
44
+ [tool.cibuildwheel]
45
+ build = "cp312-* cp313-* cp314-*"
46
+ archs = "auto64"
47
+
48
+ [tool.pytest.ini_options]
49
+ testpaths = ["tests"]
50
+
51
+
xlspy-0.1.3/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,6 @@
1
+ from .writer import XlsbWriter
2
+ from .xlsx_writer import XlsxWriter
3
+ from .reader import ExcelReader
4
+
5
+ __version__ = "0.1.0"
6
+ __all__ = ['XlsbWriter', 'XlsxWriter', 'ExcelReader']
@@ -0,0 +1,35 @@
1
+ import os as _os
2
+
3
+ HAVE_C_EXT = False
4
+ _c_encode_xlsb_row = None
5
+ _c_calc_column_widths = None
6
+ _c_read_xlsb_worksheet = None
7
+
8
+ if _os.environ.get('XLSPY_DISABLE_C_EXT', '0') != '1':
9
+ try:
10
+ from xlspy._c_core import (
11
+ encode_xlsb_row as _c_encode_xlsb_row,
12
+ calc_column_widths as _c_calc_column_widths,
13
+ read_xlsb_worksheet as _c_read_xlsb_worksheet,
14
+ )
15
+ HAVE_C_EXT = True
16
+ except ImportError:
17
+ pass
18
+
19
+
20
+ def encode_xlsb_row(row, ss_dict, ss_list, sst_unique_count, sst_all_count, row_idx):
21
+ if HAVE_C_EXT:
22
+ return _c_encode_xlsb_row(row, ss_dict, ss_list, sst_unique_count, sst_all_count, row_idx)
23
+ return None
24
+
25
+
26
+ def calc_column_widths(rows, max_cols):
27
+ if HAVE_C_EXT:
28
+ return _c_calc_column_widths(rows, max_cols)
29
+ return None
30
+
31
+
32
+ def read_xlsb_worksheet(data, shared_strings, styles_list, date_num_fmts):
33
+ if HAVE_C_EXT:
34
+ return _c_read_xlsb_worksheet(data, shared_strings, styles_list, date_num_fmts)
35
+ return None