tabular-reader 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabular-reader
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Read XLSX, XLS and CSV files with a uniform interface Read XLSX, XLS and CSV files with a uniform interface
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "tabular-reader"
3
- version = "0.1.3"
3
+ version = "0.1.4"
4
4
  description = "Read XLSX, XLS and CSV files with a uniform interface Read XLSX, XLS and CSV files with a uniform interface"
5
5
  authors = ["arkhan <arkhan@riseup.net>"]
6
6
  license = "MIT"
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .reader import TabularReader
4
4
 
5
- __version__ = "0.1.3"
5
+ __version__ = "0.1.4"
6
6
  __all__ = ["TabularReader"]
@@ -17,6 +17,8 @@ def read_csv(source, **kwargs):
17
17
  k: v for k, v in kwargs.items() if k in ["delimiter", "quotechar", "encoding"]
18
18
  }
19
19
 
20
+ if isinstance(source, bytes):
21
+ source = io.BytesIO(source)
20
22
  if isinstance(source, io.BytesIO):
21
23
  source.seek(0)
22
24
  f = io.TextIOWrapper(source, encoding=csv_kwargs.pop("encoding", "utf-8-sig"))
@@ -39,9 +41,11 @@ def read_csv(source, **kwargs):
39
41
  ]
40
42
 
41
43
 
42
- def read_xls(source, worksheet="", **kwargs):
44
+ def read_xls(source, worksheet="", skip_rows=0, **kwargs):
43
45
  import xlrd
44
46
 
47
+ if isinstance(source, bytes):
48
+ source = io.BytesIO(source)
45
49
  if isinstance(source, io.BytesIO):
46
50
  source.seek(0)
47
51
  wb = xlrd.open_workbook(file_contents=source.read())
@@ -51,7 +55,8 @@ def read_xls(source, worksheet="", **kwargs):
51
55
  ws = wb.sheet_by_name(worksheet) if worksheet else wb.sheet_by_index(0)
52
56
 
53
57
  total = [[cell.value for cell in row] for row in ws.get_rows()]
54
- header = total[0] if total else []
58
+ header_idx = skip_rows if skip_rows < len(total) else 0
59
+ header = total[header_idx] if total else []
55
60
  filtered_indices = [
56
61
  i for i, val in enumerate(header) if val is not None and str(val).strip() != ""
57
62
  ]
@@ -60,13 +65,14 @@ def read_xls(source, worksheet="", **kwargs):
60
65
  ]
61
66
 
62
67
 
63
- def read_xlsx(source, worksheet="", **kwargs):
68
+ def read_xlsx(source, worksheet="", skip_rows=0, **kwargs):
64
69
  from openpyxl import load_workbook
65
70
 
66
- excel_kwargs = {
67
- k: v for k, v in kwargs.items() if k not in ["delimiter", "encoding"]
68
- }
71
+ _OPENPYXL_KWARGS = {"read_only", "keep_vba", "data_only", "keep_links", "rich_text"}
72
+ excel_kwargs = {k: v for k, v in kwargs.items() if k in _OPENPYXL_KWARGS}
69
73
 
74
+ if isinstance(source, bytes):
75
+ source = io.BytesIO(source)
70
76
  if isinstance(source, io.BytesIO):
71
77
  source.seek(0)
72
78
 
@@ -74,7 +80,8 @@ def read_xlsx(source, worksheet="", **kwargs):
74
80
  ws = worksheet and wb[worksheet] or wb.active
75
81
 
76
82
  total = [[col.value for col in row] for row in ws]
77
- header = total[0] if total else []
83
+ header_idx = skip_rows if skip_rows < len(total) else 0
84
+ header = total[header_idx] if total else []
78
85
  filtered_indices = [
79
86
  i for i, val in enumerate(header) if val is not None and str(val).strip() != ""
80
87
  ]
@@ -93,19 +100,22 @@ class TabularReader:
93
100
  restkey=None,
94
101
  skip_blank_lines=False,
95
102
  skip_rows=0,
103
+ skip_row=None,
96
104
  *args,
97
105
  **kwargs,
98
106
  ):
107
+ if skip_row is not None and not skip_rows:
108
+ skip_rows = skip_row
99
109
  file_format = get_file_format(source)
100
110
 
101
111
  if file_format is None:
102
- filtered_data = self._detect_and_read_bytes(source, worksheet, **kwargs)
112
+ filtered_data = self._detect_and_read_bytes(source, worksheet, skip_rows=skip_rows, **kwargs)
103
113
  elif file_format == "csv":
104
114
  filtered_data = read_csv(source, **kwargs)
105
115
  elif file_format == "xlsx":
106
- filtered_data = read_xlsx(source, worksheet, **kwargs)
116
+ filtered_data = read_xlsx(source, worksheet, skip_rows=skip_rows, **kwargs)
107
117
  elif file_format == "xls":
108
- filtered_data = read_xls(source, worksheet, **kwargs)
118
+ filtered_data = read_xls(source, worksheet, skip_rows=skip_rows, **kwargs)
109
119
  else:
110
120
  raise ValueError(f"Unsupported format: {file_format}")
111
121
 
@@ -119,8 +129,10 @@ class TabularReader:
119
129
  self.skip_blank_lines = skip_blank_lines
120
130
  self.line_num = 0
121
131
 
122
- def _detect_and_read_bytes(self, source, worksheet, **kwargs):
132
+ def _detect_and_read_bytes(self, source, worksheet, skip_rows=0, **kwargs):
123
133
  errors = []
134
+ if isinstance(source, bytes):
135
+ source = io.BytesIO(source)
124
136
 
125
137
  for fmt, reader_func in [
126
138
  ("xlsx", read_xlsx),
@@ -130,15 +142,14 @@ class TabularReader:
130
142
  try:
131
143
  if isinstance(source, io.BytesIO):
132
144
  source.seek(0)
133
- return reader_func(source, worksheet, **kwargs)
145
+ return reader_func(source, worksheet, skip_rows=skip_rows, **kwargs)
134
146
  except Exception as e:
135
147
  errors.append((fmt, str(e)))
136
148
  if isinstance(source, io.BytesIO):
137
149
  source.seek(0)
138
150
 
139
- raise ValueError(
140
- f"Could not detect file format. Tried: {', '.join(f[0] for f in errors)}"
141
- )
151
+ details = "; ".join(f"{fmt}: {err}" for fmt, err in errors)
152
+ raise ValueError(f"Could not detect file format. Tried: {details}")
142
153
 
143
154
  @property
144
155
  def fieldnames(self):
File without changes