tabular-reader 0.1.3__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tabular_reader-0.1.3 → tabular_reader-0.1.4}/PKG-INFO +1 -1
- {tabular_reader-0.1.3 → tabular_reader-0.1.4}/pyproject.toml +1 -1
- {tabular_reader-0.1.3 → tabular_reader-0.1.4}/tabular_reader/__init__.py +1 -1
- {tabular_reader-0.1.3 → tabular_reader-0.1.4}/tabular_reader/reader.py +26 -15
- {tabular_reader-0.1.3 → tabular_reader-0.1.4}/LICENSE +0 -0
- {tabular_reader-0.1.3 → tabular_reader-0.1.4}/README.org +0 -0
|
@@ -17,6 +17,8 @@ def read_csv(source, **kwargs):
|
|
|
17
17
|
k: v for k, v in kwargs.items() if k in ["delimiter", "quotechar", "encoding"]
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
+
if isinstance(source, bytes):
|
|
21
|
+
source = io.BytesIO(source)
|
|
20
22
|
if isinstance(source, io.BytesIO):
|
|
21
23
|
source.seek(0)
|
|
22
24
|
f = io.TextIOWrapper(source, encoding=csv_kwargs.pop("encoding", "utf-8-sig"))
|
|
@@ -39,9 +41,11 @@ def read_csv(source, **kwargs):
|
|
|
39
41
|
]
|
|
40
42
|
|
|
41
43
|
|
|
42
|
-
def read_xls(source, worksheet="", **kwargs):
|
|
44
|
+
def read_xls(source, worksheet="", skip_rows=0, **kwargs):
|
|
43
45
|
import xlrd
|
|
44
46
|
|
|
47
|
+
if isinstance(source, bytes):
|
|
48
|
+
source = io.BytesIO(source)
|
|
45
49
|
if isinstance(source, io.BytesIO):
|
|
46
50
|
source.seek(0)
|
|
47
51
|
wb = xlrd.open_workbook(file_contents=source.read())
|
|
@@ -51,7 +55,8 @@ def read_xls(source, worksheet="", **kwargs):
|
|
|
51
55
|
ws = wb.sheet_by_name(worksheet) if worksheet else wb.sheet_by_index(0)
|
|
52
56
|
|
|
53
57
|
total = [[cell.value for cell in row] for row in ws.get_rows()]
|
|
54
|
-
|
|
58
|
+
header_idx = skip_rows if skip_rows < len(total) else 0
|
|
59
|
+
header = total[header_idx] if total else []
|
|
55
60
|
filtered_indices = [
|
|
56
61
|
i for i, val in enumerate(header) if val is not None and str(val).strip() != ""
|
|
57
62
|
]
|
|
@@ -60,13 +65,14 @@ def read_xls(source, worksheet="", **kwargs):
|
|
|
60
65
|
]
|
|
61
66
|
|
|
62
67
|
|
|
63
|
-
def read_xlsx(source, worksheet="", **kwargs):
|
|
68
|
+
def read_xlsx(source, worksheet="", skip_rows=0, **kwargs):
|
|
64
69
|
from openpyxl import load_workbook
|
|
65
70
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
}
|
|
71
|
+
_OPENPYXL_KWARGS = {"read_only", "keep_vba", "data_only", "keep_links", "rich_text"}
|
|
72
|
+
excel_kwargs = {k: v for k, v in kwargs.items() if k in _OPENPYXL_KWARGS}
|
|
69
73
|
|
|
74
|
+
if isinstance(source, bytes):
|
|
75
|
+
source = io.BytesIO(source)
|
|
70
76
|
if isinstance(source, io.BytesIO):
|
|
71
77
|
source.seek(0)
|
|
72
78
|
|
|
@@ -74,7 +80,8 @@ def read_xlsx(source, worksheet="", **kwargs):
|
|
|
74
80
|
ws = worksheet and wb[worksheet] or wb.active
|
|
75
81
|
|
|
76
82
|
total = [[col.value for col in row] for row in ws]
|
|
77
|
-
|
|
83
|
+
header_idx = skip_rows if skip_rows < len(total) else 0
|
|
84
|
+
header = total[header_idx] if total else []
|
|
78
85
|
filtered_indices = [
|
|
79
86
|
i for i, val in enumerate(header) if val is not None and str(val).strip() != ""
|
|
80
87
|
]
|
|
@@ -93,19 +100,22 @@ class TabularReader:
|
|
|
93
100
|
restkey=None,
|
|
94
101
|
skip_blank_lines=False,
|
|
95
102
|
skip_rows=0,
|
|
103
|
+
skip_row=None,
|
|
96
104
|
*args,
|
|
97
105
|
**kwargs,
|
|
98
106
|
):
|
|
107
|
+
if skip_row is not None and not skip_rows:
|
|
108
|
+
skip_rows = skip_row
|
|
99
109
|
file_format = get_file_format(source)
|
|
100
110
|
|
|
101
111
|
if file_format is None:
|
|
102
|
-
filtered_data = self._detect_and_read_bytes(source, worksheet, **kwargs)
|
|
112
|
+
filtered_data = self._detect_and_read_bytes(source, worksheet, skip_rows=skip_rows, **kwargs)
|
|
103
113
|
elif file_format == "csv":
|
|
104
114
|
filtered_data = read_csv(source, **kwargs)
|
|
105
115
|
elif file_format == "xlsx":
|
|
106
|
-
filtered_data = read_xlsx(source, worksheet, **kwargs)
|
|
116
|
+
filtered_data = read_xlsx(source, worksheet, skip_rows=skip_rows, **kwargs)
|
|
107
117
|
elif file_format == "xls":
|
|
108
|
-
filtered_data = read_xls(source, worksheet, **kwargs)
|
|
118
|
+
filtered_data = read_xls(source, worksheet, skip_rows=skip_rows, **kwargs)
|
|
109
119
|
else:
|
|
110
120
|
raise ValueError(f"Unsupported format: {file_format}")
|
|
111
121
|
|
|
@@ -119,8 +129,10 @@ class TabularReader:
|
|
|
119
129
|
self.skip_blank_lines = skip_blank_lines
|
|
120
130
|
self.line_num = 0
|
|
121
131
|
|
|
122
|
-
def _detect_and_read_bytes(self, source, worksheet, **kwargs):
|
|
132
|
+
def _detect_and_read_bytes(self, source, worksheet, skip_rows=0, **kwargs):
|
|
123
133
|
errors = []
|
|
134
|
+
if isinstance(source, bytes):
|
|
135
|
+
source = io.BytesIO(source)
|
|
124
136
|
|
|
125
137
|
for fmt, reader_func in [
|
|
126
138
|
("xlsx", read_xlsx),
|
|
@@ -130,15 +142,14 @@ class TabularReader:
|
|
|
130
142
|
try:
|
|
131
143
|
if isinstance(source, io.BytesIO):
|
|
132
144
|
source.seek(0)
|
|
133
|
-
return reader_func(source, worksheet, **kwargs)
|
|
145
|
+
return reader_func(source, worksheet, skip_rows=skip_rows, **kwargs)
|
|
134
146
|
except Exception as e:
|
|
135
147
|
errors.append((fmt, str(e)))
|
|
136
148
|
if isinstance(source, io.BytesIO):
|
|
137
149
|
source.seek(0)
|
|
138
150
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
)
|
|
151
|
+
details = "; ".join(f"{fmt}: {err}" for fmt, err in errors)
|
|
152
|
+
raise ValueError(f"Could not detect file format. Tried: {details}")
|
|
142
153
|
|
|
143
154
|
@property
|
|
144
155
|
def fieldnames(self):
|
|
File without changes
|
|
File without changes
|