tabular-reader 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+  The MIT License (MIT)
2
+
3
+ Copyright © 2025 arkhan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,146 @@
1
+ Metadata-Version: 2.4
2
+ Name: tabular-reader
3
+ Version: 0.1.0
4
+ Summary: Read XLSX, XLS and CSV files with a uniform interface Read XLSX, XLS and CSV files with a uniform interface
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Keywords: spreadsheet,csv,xlsx,xls,reader
8
+ Author: arkhan
9
+ Author-email: arkhan@riseup.net
10
+ Requires-Python: >=3.6,<4.0
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.6
16
+ Classifier: Programming Language :: Python :: 3.7
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Programming Language :: Python :: 3.14
24
+ Requires-Dist: openpyxl (>=3.0,<3.1)
25
+ Project-URL: Repository, https://github.com/arkhan/tabular-reader
26
+ Description-Content-Type: text/plain
27
+
28
+ * tabular_reader
29
+
30
+ ** Description
31
+ A module that maps information from each row in XLSX, XLS, or CSV
32
+ files to a ~SimpleNamespace~ object, providing a uniform interface
33
+ across different spreadsheet and data formats. Similar to Python's
34
+ native [[https://docs.python.org/3/library/csv.html#csv.DictReader][csv.DictReader]] but with support for multiple file types.
35
+
36
+ ** Installing
37
+ #+begin_src bash
38
+ pip install tabular-reader
39
+ #+end_src
40
+
41
+ ** Examples
42
+
43
+ *** Basic usage with XLSX
44
+ #+begin_src python
45
+ from tabular_reader import TabularReader
46
+
47
+ reader = TabularReader("names.xlsx", worksheet="Sheet1")
48
+ for row in reader:
49
+ print(row.first_name, row.last_name)
50
+ #+end_src
51
+
52
+ Output:
53
+ #+begin_src
54
+ Boris Johnson
55
+ Donald Trump
56
+ Mark Rutte
57
+ #+end_src
58
+
59
+ *** Reading CSV files
60
+ #+begin_src python
61
+ from tabular_reader import TabularReader
62
+
63
+ reader = TabularReader("data.csv")
64
+ for row in reader:
65
+ print(row.email, row.phone)
66
+ #+end_src
67
+
68
+ *** Reading XLS files
69
+ #+begin_src python
70
+ from tabular_reader import TabularReader
71
+
72
+ reader = TabularReader("legacy_data.xls")
73
+ for row in reader:
74
+ print(row.id, row.name)
75
+ #+end_src
76
+
77
+ ** Custom fieldnames
78
+ You can specify fieldnames manually:
79
+
80
+ #+begin_src python
81
+ reader = TabularReader(
82
+ "data.xlsx",
83
+ fieldnames=["id", "email", "username"]
84
+ )
85
+ for row in reader:
86
+ print(row.id, row.email)
87
+ #+end_src
88
+
89
+ ** Skip blank lines
90
+ Filter out empty rows:
91
+
92
+ #+begin_src python
93
+ reader = TabularReader(
94
+ "data.csv",
95
+ skip_blank_lines=True
96
+ )
97
+ for row in reader:
98
+ print(row)
99
+ #+end_src
100
+
101
+ ** Keyword arguments
102
+
103
+ *** For Excel files (XLSX/XLS)
104
+ Pass any ~openpyxl.load_workbook~ keyword arguments:
105
+
106
+ #+begin_src python
107
+ reader = TabularReader(
108
+ "names.xlsx",
109
+ worksheet="Sheet1",
110
+ read_only=False,
111
+ keep_vba=False,
112
+ data_only=False,
113
+ keep_links=True
114
+ )
115
+ #+end_src
116
+
117
+ *** For CSV files
118
+ Pass any ~csv.reader~ keyword arguments:
119
+
120
+ #+begin_src python
121
+ reader = TabularReader(
122
+ "data.csv",
123
+ delimiter=";",
124
+ quotechar='"',
125
+ encoding="utf-8"
126
+ )
127
+ #+end_src
128
+
129
+ ** Features
130
+ - Automatic format detection by file extension
131
+ - Empty column filtering (only processes columns with headers)
132
+ - Uniform interface across XLSX, XLS, and CSV
133
+ - ~SimpleNamespace~ objects with attribute access (~row.field_name~)
134
+ - Python 3.6 - 3.14 support
135
+
136
+ ** Supported formats
137
+ | Format | Extension | Status |
138
+ |--------|-----------|-------------|
139
+ | Excel | .xlsx | ✓ Supported |
140
+ | Excel | .xls | ✓ Supported |
141
+ | CSV | .csv | ✓ Supported |
142
+
143
+ ** Acknowledgements
144
+ - [[https://openpyxl.readthedocs.io/][openpyxl]]
145
+ - [[https://docs.python.org/3/library/csv.html][csv]] - Python's standard library
146
+
@@ -0,0 +1,118 @@
1
+ * tabular_reader
2
+
3
+ ** Description
4
+ A module that maps information from each row in XLSX, XLS, or CSV
5
+ files to a ~SimpleNamespace~ object, providing a uniform interface
6
+ across different spreadsheet and data formats. Similar to Python's
7
+ native [[https://docs.python.org/3/library/csv.html#csv.DictReader][csv.DictReader]] but with support for multiple file types.
8
+
9
+ ** Installing
10
+ #+begin_src bash
11
+ pip install tabular-reader
12
+ #+end_src
13
+
14
+ ** Examples
15
+
16
+ *** Basic usage with XLSX
17
+ #+begin_src python
18
+ from tabular_reader import TabularReader
19
+
20
+ reader = TabularReader("names.xlsx", worksheet="Sheet1")
21
+ for row in reader:
22
+ print(row.first_name, row.last_name)
23
+ #+end_src
24
+
25
+ Output:
26
+ #+begin_src
27
+ Boris Johnson
28
+ Donald Trump
29
+ Mark Rutte
30
+ #+end_src
31
+
32
+ *** Reading CSV files
33
+ #+begin_src python
34
+ from tabular_reader import TabularReader
35
+
36
+ reader = TabularReader("data.csv")
37
+ for row in reader:
38
+ print(row.email, row.phone)
39
+ #+end_src
40
+
41
+ *** Reading XLS files
42
+ #+begin_src python
43
+ from tabular_reader import TabularReader
44
+
45
+ reader = TabularReader("legacy_data.xls")
46
+ for row in reader:
47
+ print(row.id, row.name)
48
+ #+end_src
49
+
50
+ ** Custom fieldnames
51
+ You can specify fieldnames manually:
52
+
53
+ #+begin_src python
54
+ reader = TabularReader(
55
+ "data.xlsx",
56
+ fieldnames=["id", "email", "username"]
57
+ )
58
+ for row in reader:
59
+ print(row.id, row.email)
60
+ #+end_src
61
+
62
+ ** Skip blank lines
63
+ Filter out empty rows:
64
+
65
+ #+begin_src python
66
+ reader = TabularReader(
67
+ "data.csv",
68
+ skip_blank_lines=True
69
+ )
70
+ for row in reader:
71
+ print(row)
72
+ #+end_src
73
+
74
+ ** Keyword arguments
75
+
76
+ *** For Excel files (XLSX/XLS)
77
+ Pass any ~openpyxl.load_workbook~ keyword arguments:
78
+
79
+ #+begin_src python
80
+ reader = TabularReader(
81
+ "names.xlsx",
82
+ worksheet="Sheet1",
83
+ read_only=False,
84
+ keep_vba=False,
85
+ data_only=False,
86
+ keep_links=True
87
+ )
88
+ #+end_src
89
+
90
+ *** For CSV files
91
+ Pass any ~csv.reader~ keyword arguments:
92
+
93
+ #+begin_src python
94
+ reader = TabularReader(
95
+ "data.csv",
96
+ delimiter=";",
97
+ quotechar='"',
98
+ encoding="utf-8"
99
+ )
100
+ #+end_src
101
+
102
+ ** Features
103
+ - Automatic format detection by file extension
104
+ - Empty column filtering (only processes columns with headers)
105
+ - Uniform interface across XLSX, XLS, and CSV
106
+ - ~SimpleNamespace~ objects with attribute access (~row.field_name~)
107
+ - Python 3.6 - 3.14 support
108
+
109
+ ** Supported formats
110
+ | Format | Extension | Status |
111
+ |--------|-----------|-------------|
112
+ | Excel | .xlsx | ✓ Supported |
113
+ | Excel | .xls | ✓ Supported |
114
+ | CSV | .csv | ✓ Supported |
115
+
116
+ ** Acknowledgements
117
+ - [[https://openpyxl.readthedocs.io/][openpyxl]]
118
+ - [[https://docs.python.org/3/library/csv.html][csv]] - Python's standard library
@@ -0,0 +1,37 @@
1
+ [tool.poetry]
2
+ name = "tabular-reader"
3
+ version = "0.1.0"
4
+ description = "Read XLSX, XLS and CSV files with a uniform interface Read XLSX, XLS and CSV files with a uniform interface"
5
+ authors = ["arkhan <arkhan@riseup.net>"]
6
+ license = "MIT"
7
+ readme = "README.org"
8
+ repository = "https://github.com/arkhan/tabular-reader"
9
+ keywords = ["spreadsheet", "csv", "xlsx", "xls", "reader"]
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "Intended Audience :: Developers",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3.6",
16
+ "Programming Language :: Python :: 3.7",
17
+ "Programming Language :: Python :: 3.8",
18
+ "Programming Language :: Python :: 3.9",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Programming Language :: Python :: 3.14",
24
+ ]
25
+
26
+ [tool.poetry.dependencies]
27
+ python = "^3.6"
28
+ openpyxl = ">=3.0,<3.1"
29
+
30
+ [tool.poetry.group.dev.dependencies]
31
+ pytest = "^6.2"
32
+ black = "^21.0"
33
+ flake8 = "^3.9"
34
+
35
+ [build-system]
36
+ requires = ["poetry-core>=1.0.0"]
37
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python
2
+
3
+ from .reader import TabularReader
4
+
5
+ __version__ = "0.1.0"
6
+ __all__ = ["TabularReader"]
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env python
2
+ import csv
3
+ import os
4
+ from types import SimpleNamespace
5
+
6
+ from openpyxl import load_workbook
7
+
8
+
9
+ def get_file_format(filename):
10
+ _, ext = os.path.splitext(filename)
11
+ return ext.lower().lstrip(".")
12
+
13
+
14
+ def read_csv(filename, **kwargs):
15
+ csv_kwargs = {
16
+ k: v for k, v in kwargs.items() if k in ["delimiter", "quotechar", "encoding"]
17
+ }
18
+ with open(filename, "r", encoding=csv_kwargs.pop("encoding", "utf-8-sig")) as f:
19
+ reader = csv.reader(f, **csv_kwargs)
20
+ total = list(reader)
21
+
22
+ header = total[0] if total else []
23
+ filtered_indices = [
24
+ i for i, val in enumerate(header) if val is not None and str(val).strip() != ""
25
+ ]
26
+ return [
27
+ [row[i] if i < len(row) else None for i in filtered_indices] for row in total
28
+ ]
29
+
30
+
31
+ def read_xlsx(filename, worksheet="", **kwargs):
32
+ excel_kwargs = {
33
+ k: v for k, v in kwargs.items() if k not in ["delimiter", "encoding"]
34
+ }
35
+ wb = load_workbook(filename, **excel_kwargs)
36
+ ws = worksheet and wb[worksheet] or wb.active
37
+
38
+ total = [[col.value for col in row] for row in ws]
39
+ header = total[0] if total else []
40
+ filtered_indices = [
41
+ i for i, val in enumerate(header) if val is not None and str(val).strip() != ""
42
+ ]
43
+ return [
44
+ [row[i] if i < len(row) else None for i in filtered_indices] for row in total
45
+ ]
46
+
47
+
48
+ class TabularReader:
49
+ def __init__(
50
+ self,
51
+ filename,
52
+ worksheet="",
53
+ fieldnames=None,
54
+ restval=None,
55
+ restkey=None,
56
+ skip_blank_lines=False,
57
+ *args,
58
+ **kwargs,
59
+ ):
60
+ file_format = get_file_format(filename)
61
+
62
+ if file_format == "csv":
63
+ filtered_data = read_csv(filename, **kwargs)
64
+ elif file_format in ("xlsx", "xls"):
65
+ filtered_data = read_xlsx(filename, worksheet, **kwargs)
66
+ else:
67
+ raise ValueError(f"Unsupported format: {file_format}")
68
+
69
+ self.reader = iter(filtered_data)
70
+ self._fieldnames = fieldnames
71
+ self.restkey = restkey
72
+ self.restval = restval
73
+ self.skip_blank_lines = skip_blank_lines
74
+ self.line_num = 0
75
+
76
+ @property
77
+ def fieldnames(self):
78
+ if self._fieldnames is None:
79
+ try:
80
+ self._fieldnames = next(self.reader)
81
+ except StopIteration:
82
+ pass
83
+ self.line_num += 1
84
+ return self._fieldnames
85
+
86
+ @fieldnames.setter
87
+ def fieldnames(self, value):
88
+ self._fieldnames = value
89
+
90
+ def __iter__(self):
91
+ return self
92
+
93
+ def __next__(self):
94
+ if self.line_num == 0:
95
+ self.fieldnames
96
+ row = next(self.reader)
97
+ self.line_num += 1
98
+ while self.skip_blank_lines and all(cell is None for cell in row):
99
+ row = next(self.reader)
100
+ record = SimpleNamespace(**dict(zip(self.fieldnames, row)))
101
+ return record
102
+
103
+ next = __next__