tabular-reader 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabular-reader
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Read XLSX, XLS and CSV files with a uniform interface Read XLSX, XLS and CSV files with a uniform interface
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -26,7 +26,9 @@ Requires-Dist: xlrd (>=2.0,<3.0)
26
26
  Project-URL: Repository, https://github.com/arkhan/tabular-reader
27
27
  Description-Content-Type: text/plain
28
28
 
29
- * tabular_reader
29
+ #+TITLE: tabular-reader
30
+
31
+ * tabular_reader
30
32
 
31
33
  ** Description
32
34
  A module that maps information from each row in XLSX, XLS, or CSV
@@ -1,4 +1,6 @@
1
- * tabular_reader
1
+ #+TITLE: tabular-reader
2
+
3
+ * tabular_reader
2
4
 
3
5
  ** Description
4
6
  A module that maps information from each row in XLSX, XLS, or CSV
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "tabular-reader"
3
- version = "0.1.1"
3
+ version = "0.1.3"
4
4
  description = "Read XLSX, XLS and CSV files with a uniform interface Read XLSX, XLS and CSV files with a uniform interface"
5
5
  authors = ["arkhan <arkhan@riseup.net>"]
6
6
  license = "MIT"
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .reader import TabularReader
4
4
 
5
- __version__ = "0.1.1"
5
+ __version__ = "0.1.3"
6
6
  __all__ = ["TabularReader"]
@@ -1,21 +1,34 @@
1
1
  #!/usr/bin/env python
2
2
  import csv
3
+ import io
3
4
  import os
4
5
  from types import SimpleNamespace
5
6
 
6
7
 
7
8
  def get_file_format(filename):
9
+ if isinstance(filename, (io.BytesIO, bytes)):
10
+ return None
8
11
  _, ext = os.path.splitext(filename)
9
12
  return ext.lower().lstrip(".")
10
13
 
11
14
 
12
- def read_csv(filename, **kwargs):
15
+ def read_csv(source, **kwargs):
13
16
  csv_kwargs = {
14
17
  k: v for k, v in kwargs.items() if k in ["delimiter", "quotechar", "encoding"]
15
18
  }
16
- with open(filename, "r", encoding=csv_kwargs.pop("encoding", "utf-8-sig")) as f:
19
+
20
+ if isinstance(source, io.BytesIO):
21
+ source.seek(0)
22
+ f = io.TextIOWrapper(source, encoding=csv_kwargs.pop("encoding", "utf-8-sig"))
23
+ else:
24
+ f = open(source, "r", encoding=csv_kwargs.pop("encoding", "utf-8-sig"))
25
+
26
+ try:
17
27
  reader = csv.reader(f, **csv_kwargs)
18
28
  total = list(reader)
29
+ finally:
30
+ if not isinstance(source, io.BytesIO):
31
+ f.close()
19
32
 
20
33
  header = total[0] if total else []
21
34
  filtered_indices = [
@@ -26,10 +39,15 @@ def read_csv(filename, **kwargs):
26
39
  ]
27
40
 
28
41
 
29
- def read_xls(filename, worksheet="", **kwargs):
42
+ def read_xls(source, worksheet="", **kwargs):
30
43
  import xlrd
31
44
 
32
- wb = xlrd.open_workbook(filename)
45
+ if isinstance(source, io.BytesIO):
46
+ source.seek(0)
47
+ wb = xlrd.open_workbook(file_contents=source.read())
48
+ else:
49
+ wb = xlrd.open_workbook(source)
50
+
33
51
  ws = wb.sheet_by_name(worksheet) if worksheet else wb.sheet_by_index(0)
34
52
 
35
53
  total = [[cell.value for cell in row] for row in ws.get_rows()]
@@ -42,13 +60,17 @@ def read_xls(filename, worksheet="", **kwargs):
42
60
  ]
43
61
 
44
62
 
45
- def read_xlsx(filename, worksheet="", **kwargs):
63
+ def read_xlsx(source, worksheet="", **kwargs):
46
64
  from openpyxl import load_workbook
47
65
 
48
66
  excel_kwargs = {
49
67
  k: v for k, v in kwargs.items() if k not in ["delimiter", "encoding"]
50
68
  }
51
- wb = load_workbook(filename, **excel_kwargs)
69
+
70
+ if isinstance(source, io.BytesIO):
71
+ source.seek(0)
72
+
73
+ wb = load_workbook(source, **excel_kwargs)
52
74
  ws = worksheet and wb[worksheet] or wb.active
53
75
 
54
76
  total = [[col.value for col in row] for row in ws]
@@ -64,26 +86,32 @@ def read_xlsx(filename, worksheet="", **kwargs):
64
86
  class TabularReader:
65
87
  def __init__(
66
88
  self,
67
- filename,
89
+ source,
68
90
  worksheet="",
69
91
  fieldnames=None,
70
92
  restval=None,
71
93
  restkey=None,
72
94
  skip_blank_lines=False,
95
+ skip_rows=0,
73
96
  *args,
74
97
  **kwargs,
75
98
  ):
76
- file_format = get_file_format(filename)
99
+ file_format = get_file_format(source)
77
100
 
78
- if file_format == "csv":
79
- filtered_data = read_csv(filename, **kwargs)
101
+ if file_format is None:
102
+ filtered_data = self._detect_and_read_bytes(source, worksheet, **kwargs)
103
+ elif file_format == "csv":
104
+ filtered_data = read_csv(source, **kwargs)
80
105
  elif file_format == "xlsx":
81
- filtered_data = read_xlsx(filename, worksheet, **kwargs)
106
+ filtered_data = read_xlsx(source, worksheet, **kwargs)
82
107
  elif file_format == "xls":
83
- filtered_data = read_xls(filename, worksheet, **kwargs)
108
+ filtered_data = read_xls(source, worksheet, **kwargs)
84
109
  else:
85
110
  raise ValueError(f"Unsupported format: {file_format}")
86
111
 
112
+ if skip_rows:
113
+ filtered_data = filtered_data[skip_rows:]
114
+
87
115
  self.reader = iter(filtered_data)
88
116
  self._fieldnames = fieldnames
89
117
  self.restkey = restkey
@@ -91,6 +119,27 @@ class TabularReader:
91
119
  self.skip_blank_lines = skip_blank_lines
92
120
  self.line_num = 0
93
121
 
122
+ def _detect_and_read_bytes(self, source, worksheet, **kwargs):
123
+ errors = []
124
+
125
+ for fmt, reader_func in [
126
+ ("xlsx", read_xlsx),
127
+ ("xls", read_xls),
128
+ ("csv", read_csv),
129
+ ]:
130
+ try:
131
+ if isinstance(source, io.BytesIO):
132
+ source.seek(0)
133
+ return reader_func(source, worksheet, **kwargs)
134
+ except Exception as e:
135
+ errors.append((fmt, str(e)))
136
+ if isinstance(source, io.BytesIO):
137
+ source.seek(0)
138
+
139
+ raise ValueError(
140
+ f"Could not detect file format. Tried: {', '.join(f[0] for f in errors)}"
141
+ )
142
+
94
143
  @property
95
144
  def fieldnames(self):
96
145
  if self._fieldnames is None:
File without changes