PyPI - tabular-reader - Versions diffs - 0.1.1__tar.gz → 0.1.3__tar.gz - Mend

tabular-reader 0.1.1tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

{tabular_reader-0.1.1 → tabular_reader-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tabular-reader
-Version: 0.1.1
+Version: 0.1.3
 Summary: Read XLSX, XLS and CSV files with a uniform interface Read XLSX, XLS and CSV files with a uniform interface
 License: MIT
 License-File: LICENSE
@@ -26,7 +26,9 @@ Requires-Dist: xlrd (>=2.0,<3.0)
 Project-URL: Repository, https://github.com/arkhan/tabular-reader
 Description-Content-Type: text/plain
-* tabular_reader
+#+TITLE: tabular-reader
+* tabular_reader
 ** Description
 A module that maps information from each row in XLSX, XLS, or CSV

{tabular_reader-0.1.1 → tabular_reader-0.1.3}/README.org RENAMED Viewed

@@ -1,4 +1,6 @@
-* tabular_reader
+#+TITLE: tabular-reader
+* tabular_reader
 ** Description
 A module that maps information from each row in XLSX, XLS, or CSV

{tabular_reader-0.1.1 → tabular_reader-0.1.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "tabular-reader"
-version = "0.1.1"
+version = "0.1.3"
 description = "Read XLSX, XLS and CSV files with a uniform interface Read XLSX, XLS and CSV files with a uniform interface"
 authors = ["arkhan <arkhan@riseup.net>"]
 license = "MIT"

{tabular_reader-0.1.1 → tabular_reader-0.1.3}/tabular_reader/__init__.py RENAMED Viewed

@@ -2,5 +2,5 @@
 from .reader import TabularReader
-__version__ = "0.1.1"
+__version__ = "0.1.3"
 __all__ = ["TabularReader"]

{tabular_reader-0.1.1 → tabular_reader-0.1.3}/tabular_reader/reader.py RENAMED Viewed

@@ -1,21 +1,34 @@
 #!/usr/bin/env python
 import csv
+import io
 import os
 from types import SimpleNamespace
 def get_file_format(filename):
+    if isinstance(filename, (io.BytesIO, bytes)):
+        return None
     _, ext = os.path.splitext(filename)
     return ext.lower().lstrip(".")
-def read_csv(filename, **kwargs):
+def read_csv(source, **kwargs):
     csv_kwargs = {
         k: v for k, v in kwargs.items() if k in ["delimiter", "quotechar", "encoding"]
     }
-    with open(filename, "r", encoding=csv_kwargs.pop("encoding", "utf-8-sig")) as f:
+    if isinstance(source, io.BytesIO):
+        source.seek(0)
+        f = io.TextIOWrapper(source, encoding=csv_kwargs.pop("encoding", "utf-8-sig"))
+    else:
+        f = open(source, "r", encoding=csv_kwargs.pop("encoding", "utf-8-sig"))
+    try:
         reader = csv.reader(f, **csv_kwargs)
         total = list(reader)
+    finally:
+        if not isinstance(source, io.BytesIO):
+            f.close()
     header = total[0] if total else []
     filtered_indices = [
@@ -26,10 +39,15 @@ def read_csv(filename, **kwargs):
     ]
-def read_xls(filename, worksheet="", **kwargs):
+def read_xls(source, worksheet="", **kwargs):
     import xlrd
-    wb = xlrd.open_workbook(filename)
+    if isinstance(source, io.BytesIO):
+        source.seek(0)
+        wb = xlrd.open_workbook(file_contents=source.read())
+    else:
+        wb = xlrd.open_workbook(source)
     ws = wb.sheet_by_name(worksheet) if worksheet else wb.sheet_by_index(0)
     total = [[cell.value for cell in row] for row in ws.get_rows()]
@@ -42,13 +60,17 @@ def read_xls(filename, worksheet="", **kwargs):
     ]
-def read_xlsx(filename, worksheet="", **kwargs):
+def read_xlsx(source, worksheet="", **kwargs):
     from openpyxl import load_workbook
     excel_kwargs = {
         k: v for k, v in kwargs.items() if k not in ["delimiter", "encoding"]
     }
-    wb = load_workbook(filename, **excel_kwargs)
+    if isinstance(source, io.BytesIO):
+        source.seek(0)
+    wb = load_workbook(source, **excel_kwargs)
     ws = worksheet and wb[worksheet] or wb.active
     total = [[col.value for col in row] for row in ws]
@@ -64,26 +86,32 @@ def read_xlsx(filename, worksheet="", **kwargs):
 class TabularReader:
     def __init__(
         self,
-        filename,
+        source,
         worksheet="",
         fieldnames=None,
         restval=None,
         restkey=None,
         skip_blank_lines=False,
+        skip_rows=0,
         *args,
         **kwargs,
     ):
-        file_format = get_file_format(filename)
+        file_format = get_file_format(source)
-        if file_format == "csv":
-            filtered_data = read_csv(filename, **kwargs)
+        if file_format is None:
+            filtered_data = self._detect_and_read_bytes(source, worksheet, **kwargs)
+        elif file_format == "csv":
+            filtered_data = read_csv(source, **kwargs)
         elif file_format == "xlsx":
-            filtered_data = read_xlsx(filename, worksheet, **kwargs)
+            filtered_data = read_xlsx(source, worksheet, **kwargs)
         elif file_format == "xls":
-            filtered_data = read_xls(filename, worksheet, **kwargs)
+            filtered_data = read_xls(source, worksheet, **kwargs)
         else:
             raise ValueError(f"Unsupported format: {file_format}")
+        if skip_rows:
+            filtered_data = filtered_data[skip_rows:]
         self.reader = iter(filtered_data)
         self._fieldnames = fieldnames
         self.restkey = restkey
@@ -91,6 +119,27 @@ class TabularReader:
         self.skip_blank_lines = skip_blank_lines
         self.line_num = 0
+    def _detect_and_read_bytes(self, source, worksheet, **kwargs):
+        errors = []
+        for fmt, reader_func in [
+            ("xlsx", read_xlsx),
+            ("xls", read_xls),
+            ("csv", read_csv),
+        ]:
+            try:
+                if isinstance(source, io.BytesIO):
+                    source.seek(0)
+                return reader_func(source, worksheet, **kwargs)
+            except Exception as e:
+                errors.append((fmt, str(e)))
+                if isinstance(source, io.BytesIO):
+                    source.seek(0)
+        raise ValueError(
+            f"Could not detect file format. Tried: {', '.join(f[0] for f in errors)}"
+        )
     @property
     def fieldnames(self):
         if self._fieldnames is None:

{tabular_reader-0.1.1 → tabular_reader-0.1.3}/LICENSE RENAMED Viewed

File without changes

tabular-reader 0.1.1__tar.gz → 0.1.3__tar.gz

tabular-reader 0.1.1tar.gz → 0.1.3tar.gz