tabular-reader 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tabular_reader-0.1.1 → tabular_reader-0.1.2}/PKG-INFO +4 -2
- {tabular_reader-0.1.1 → tabular_reader-0.1.2}/README.org +3 -1
- {tabular_reader-0.1.1 → tabular_reader-0.1.2}/pyproject.toml +1 -1
- {tabular_reader-0.1.1 → tabular_reader-0.1.2}/tabular_reader/__init__.py +1 -1
- {tabular_reader-0.1.1 → tabular_reader-0.1.2}/tabular_reader/reader.py +57 -12
- {tabular_reader-0.1.1 → tabular_reader-0.1.2}/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tabular-reader
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Read XLSX, XLS and CSV files with a uniform interface Read XLSX, XLS and CSV files with a uniform interface
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -26,7 +26,9 @@ Requires-Dist: xlrd (>=2.0,<3.0)
|
|
|
26
26
|
Project-URL: Repository, https://github.com/arkhan/tabular-reader
|
|
27
27
|
Description-Content-Type: text/plain
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
#+TITLE: tabular-reader
|
|
30
|
+
|
|
31
|
+
* tabular_reader
|
|
30
32
|
|
|
31
33
|
** Description
|
|
32
34
|
A module that maps information from each row in XLSX, XLS, or CSV
|
|
@@ -1,21 +1,34 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
import csv
|
|
3
|
+
import io
|
|
3
4
|
import os
|
|
4
5
|
from types import SimpleNamespace
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def get_file_format(filename):
|
|
9
|
+
if isinstance(filename, (io.BytesIO, bytes)):
|
|
10
|
+
return None
|
|
8
11
|
_, ext = os.path.splitext(filename)
|
|
9
12
|
return ext.lower().lstrip(".")
|
|
10
13
|
|
|
11
14
|
|
|
12
|
-
def read_csv(
|
|
15
|
+
def read_csv(source, **kwargs):
|
|
13
16
|
csv_kwargs = {
|
|
14
17
|
k: v for k, v in kwargs.items() if k in ["delimiter", "quotechar", "encoding"]
|
|
15
18
|
}
|
|
16
|
-
|
|
19
|
+
|
|
20
|
+
if isinstance(source, io.BytesIO):
|
|
21
|
+
source.seek(0)
|
|
22
|
+
f = io.TextIOWrapper(source, encoding=csv_kwargs.pop("encoding", "utf-8-sig"))
|
|
23
|
+
else:
|
|
24
|
+
f = open(source, "r", encoding=csv_kwargs.pop("encoding", "utf-8-sig"))
|
|
25
|
+
|
|
26
|
+
try:
|
|
17
27
|
reader = csv.reader(f, **csv_kwargs)
|
|
18
28
|
total = list(reader)
|
|
29
|
+
finally:
|
|
30
|
+
if not isinstance(source, io.BytesIO):
|
|
31
|
+
f.close()
|
|
19
32
|
|
|
20
33
|
header = total[0] if total else []
|
|
21
34
|
filtered_indices = [
|
|
@@ -26,10 +39,15 @@ def read_csv(filename, **kwargs):
|
|
|
26
39
|
]
|
|
27
40
|
|
|
28
41
|
|
|
29
|
-
def read_xls(
|
|
42
|
+
def read_xls(source, worksheet="", **kwargs):
|
|
30
43
|
import xlrd
|
|
31
44
|
|
|
32
|
-
|
|
45
|
+
if isinstance(source, io.BytesIO):
|
|
46
|
+
source.seek(0)
|
|
47
|
+
wb = xlrd.open_workbook(file_contents=source.read())
|
|
48
|
+
else:
|
|
49
|
+
wb = xlrd.open_workbook(source)
|
|
50
|
+
|
|
33
51
|
ws = wb.sheet_by_name(worksheet) if worksheet else wb.sheet_by_index(0)
|
|
34
52
|
|
|
35
53
|
total = [[cell.value for cell in row] for row in ws.get_rows()]
|
|
@@ -42,13 +60,17 @@ def read_xls(filename, worksheet="", **kwargs):
|
|
|
42
60
|
]
|
|
43
61
|
|
|
44
62
|
|
|
45
|
-
def read_xlsx(
|
|
63
|
+
def read_xlsx(source, worksheet="", **kwargs):
|
|
46
64
|
from openpyxl import load_workbook
|
|
47
65
|
|
|
48
66
|
excel_kwargs = {
|
|
49
67
|
k: v for k, v in kwargs.items() if k not in ["delimiter", "encoding"]
|
|
50
68
|
}
|
|
51
|
-
|
|
69
|
+
|
|
70
|
+
if isinstance(source, io.BytesIO):
|
|
71
|
+
source.seek(0)
|
|
72
|
+
|
|
73
|
+
wb = load_workbook(source, **excel_kwargs)
|
|
52
74
|
ws = worksheet and wb[worksheet] or wb.active
|
|
53
75
|
|
|
54
76
|
total = [[col.value for col in row] for row in ws]
|
|
@@ -64,7 +86,7 @@ def read_xlsx(filename, worksheet="", **kwargs):
|
|
|
64
86
|
class TabularReader:
|
|
65
87
|
def __init__(
|
|
66
88
|
self,
|
|
67
|
-
|
|
89
|
+
source,
|
|
68
90
|
worksheet="",
|
|
69
91
|
fieldnames=None,
|
|
70
92
|
restval=None,
|
|
@@ -73,14 +95,16 @@ class TabularReader:
|
|
|
73
95
|
*args,
|
|
74
96
|
**kwargs,
|
|
75
97
|
):
|
|
76
|
-
file_format = get_file_format(
|
|
98
|
+
file_format = get_file_format(source)
|
|
77
99
|
|
|
78
|
-
if file_format
|
|
79
|
-
filtered_data =
|
|
100
|
+
if file_format is None:
|
|
101
|
+
filtered_data = self._detect_and_read_bytes(source, worksheet, **kwargs)
|
|
102
|
+
elif file_format == "csv":
|
|
103
|
+
filtered_data = read_csv(source, **kwargs)
|
|
80
104
|
elif file_format == "xlsx":
|
|
81
|
-
filtered_data = read_xlsx(
|
|
105
|
+
filtered_data = read_xlsx(source, worksheet, **kwargs)
|
|
82
106
|
elif file_format == "xls":
|
|
83
|
-
filtered_data = read_xls(
|
|
107
|
+
filtered_data = read_xls(source, worksheet, **kwargs)
|
|
84
108
|
else:
|
|
85
109
|
raise ValueError(f"Unsupported format: {file_format}")
|
|
86
110
|
|
|
@@ -91,6 +115,27 @@ class TabularReader:
|
|
|
91
115
|
self.skip_blank_lines = skip_blank_lines
|
|
92
116
|
self.line_num = 0
|
|
93
117
|
|
|
118
|
+
def _detect_and_read_bytes(self, source, worksheet, **kwargs):
|
|
119
|
+
errors = []
|
|
120
|
+
|
|
121
|
+
for fmt, reader_func in [
|
|
122
|
+
("xlsx", read_xlsx),
|
|
123
|
+
("xls", read_xls),
|
|
124
|
+
("csv", read_csv),
|
|
125
|
+
]:
|
|
126
|
+
try:
|
|
127
|
+
if isinstance(source, io.BytesIO):
|
|
128
|
+
source.seek(0)
|
|
129
|
+
return reader_func(source, worksheet, **kwargs)
|
|
130
|
+
except Exception as e:
|
|
131
|
+
errors.append((fmt, str(e)))
|
|
132
|
+
if isinstance(source, io.BytesIO):
|
|
133
|
+
source.seek(0)
|
|
134
|
+
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"Could not detect file format. Tried: {', '.join(f[0] for f in errors)}"
|
|
137
|
+
)
|
|
138
|
+
|
|
94
139
|
@property
|
|
95
140
|
def fieldnames(self):
|
|
96
141
|
if self._fieldnames is None:
|
|
File without changes
|