tabularmapper 1.0.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {tabularmapper-1.0.0/src/tabularmapper.egg-info → tabularmapper-1.0.2}/PKG-INFO +3 -1
  2. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/pyproject.toml +3 -1
  3. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/__init__.py +1 -1
  4. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/api.py +49 -4
  5. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/stores.py +2 -2
  6. {tabularmapper-1.0.0 → tabularmapper-1.0.2/src/tabularmapper.egg-info}/PKG-INFO +3 -1
  7. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/tests/test_api.py +42 -0
  8. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/LICENSE +0 -0
  9. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/README.md +0 -0
  10. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/setup.cfg +0 -0
  11. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/ai_matcher.py +0 -0
  12. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/cli.py +0 -0
  13. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/engine.py +0 -0
  14. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/learn.py +0 -0
  15. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/llm_fallback.py +0 -0
  16. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/mapping_cache.py +0 -0
  17. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/schema.py +0 -0
  18. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper.egg-info/SOURCES.txt +0 -0
  19. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper.egg-info/dependency_links.txt +0 -0
  20. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper.egg-info/entry_points.txt +0 -0
  21. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper.egg-info/requires.txt +0 -0
  22. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper.egg-info/top_level.txt +0 -0
  23. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/tests/test_learn.py +0 -0
  24. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/tests/test_mapper.py +0 -0
  25. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/tests/test_schema.py +0 -0
  26. {tabularmapper-1.0.0 → tabularmapper-1.0.2}/tests/test_stores.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabularmapper
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
5
5
  Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
6
6
  License-Expression: MIT
@@ -15,6 +15,8 @@ Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Programming Language :: Python :: 3.14
18
20
  Classifier: Topic :: Office/Business
19
21
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
22
  Requires-Python: >=3.9
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "tabularmapper"
7
- version = "1.0.0"
7
+ version = "1.0.2"
8
8
  description = "Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -22,6 +22,8 @@ classifiers = [
22
22
  "Programming Language :: Python :: 3.10",
23
23
  "Programming Language :: Python :: 3.11",
24
24
  "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Programming Language :: Python :: 3.14",
25
27
  "Topic :: Office/Business",
26
28
  "Topic :: Software Development :: Libraries :: Python Modules",
27
29
  ]
@@ -44,7 +44,7 @@ from .schema import (
44
44
  )
45
45
  from .stores import open_store
46
46
 
47
- __version__ = "1.0.0"
47
+ __version__ = "1.0.2"
48
48
 
49
49
  __all__ = [
50
50
  "process_file",
@@ -30,16 +30,27 @@ from __future__ import annotations
30
30
 
31
31
  import os
32
32
  from contextlib import asynccontextmanager
33
+ from enum import Enum
33
34
  from typing import Any, Optional
34
35
 
35
- from fastapi import APIRouter, FastAPI, File, HTTPException, UploadFile
36
+ from fastapi import APIRouter, FastAPI, File, HTTPException, Query, UploadFile
36
37
  from fastapi.concurrency import run_in_threadpool
38
+ from fastapi.responses import Response
37
39
  from pydantic import BaseModel
38
40
 
39
41
  from . import engine # imported as a module so OUTPUT_SCHEMA is read
40
- from .engine import process_stream # dynamically (after configure), never a stale copy
42
+ from .engine import OutputResult, process_stream # dynamically (after configure), never a stale copy
41
43
  from .mapping_cache import MappingCache
42
44
 
45
+ _XLSX_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
46
+
47
+
48
+ class OutFormat(str, Enum):
49
+ """Response shape for POST /map — rendered as a dropdown in the docs."""
50
+ json = "json" # rows inline (default)
51
+ base64 = "base64" # rows inline + a mapped .xlsx in file_base64
52
+ file = "file" # download the .xlsx directly (binary, no JSON body)
53
+
43
54
 
44
55
  # --------------------------------------------------------------------------
45
56
  # Shared singletons (built once at startup)
@@ -105,6 +116,9 @@ class MapResponse(BaseModel):
105
116
  schema_columns: list[str]
106
117
  columns: list[ColumnMapOut]
107
118
  transactions: list[dict]
119
+ # Populated only when ?format=base64 — a base64-encoded .xlsx of the mapped
120
+ # rows, ready to decode and save client-side. None otherwise.
121
+ file_base64: Optional[str] = None
108
122
 
109
123
 
110
124
  # --------------------------------------------------------------------------
@@ -114,8 +128,22 @@ async def health() -> dict:
114
128
  return {"status": "ok", "ai_enabled": state.matcher is not None}
115
129
 
116
130
 
117
- async def map_statement(file: UploadFile = File(...)) -> MapResponse:
118
- """Upload a spreadsheet (.xlsx); get the standardized mapping + rows."""
131
+ async def map_statement(
132
+ file: UploadFile = File(...),
133
+ format: OutFormat = Query(
134
+ OutFormat.json,
135
+ description="json = rows inline (default); base64 = rows inline + an "
136
+ ".xlsx encoded in file_base64; file = download the .xlsx "
137
+ "directly (binary, no JSON body).",
138
+ ),
139
+ ):
140
+ """Upload a spreadsheet (.xlsx); get the standardized mapping + rows.
141
+
142
+ `format` controls what comes back:
143
+ * json -> MapResponse with the rows in `transactions`
144
+ * base64 -> same MapResponse, plus a mapped .xlsx in `file_base64`
145
+ * file -> the mapped .xlsx as a downloadable attachment
146
+ """
119
147
  name = (file.filename or "").lower()
120
148
  if not name.endswith((".xlsx", ".xls")):
121
149
  raise HTTPException(status_code=400, detail="expected an .xlsx/.xls file")
@@ -132,6 +160,22 @@ async def map_statement(file: UploadFile = File(...)) -> MapResponse:
132
160
  except Exception as exc: # noqa: BLE001
133
161
  raise HTTPException(status_code=422, detail=f"could not process file: {exc}")
134
162
 
163
+ # file -> stream the mapped .xlsx straight back as a download (no JSON body).
164
+ if format == "file":
165
+ xlsx = await run_in_threadpool(
166
+ lambda: OutputResult(records=res.records, format="bytes").bytes)
167
+ stem = os.path.splitext(os.path.basename(file.filename or "mapped"))[0]
168
+ return Response(
169
+ content=xlsx,
170
+ media_type=_XLSX_MIME,
171
+ headers={"Content-Disposition": f'attachment; filename="{stem}_mapped.xlsx"'},
172
+ )
173
+
174
+ file_b64 = None
175
+ if format == "base64":
176
+ file_b64 = await run_in_threadpool(
177
+ lambda: OutputResult(records=res.records, format="base64").base64)
178
+
135
179
  return MapResponse(
136
180
  header_index=res.header_index,
137
181
  needs_review=res.needs_review,
@@ -142,6 +186,7 @@ async def map_statement(file: UploadFile = File(...)) -> MapResponse:
142
186
  "field": m.field, "confidence": m.confidence, "method": m.method,
143
187
  }) for m in res.column_maps],
144
188
  transactions=res.records,
189
+ file_base64=file_b64,
145
190
  )
146
191
 
147
192
 
@@ -145,7 +145,7 @@ def _redis_proto_client(url: str, prefer: str = "redis"):
145
145
  return mod.from_url(u) # module-level from_url (both expose it)
146
146
  raise ImportError(
147
147
  "This cache backend needs the 'valkey' or 'redis' package. Install one "
148
- "with: pip install bank-statement-mapper[valkey] (or [redis]). Both "
148
+ "with: pip install tabularmapper[valkey] (or [redis]). Both "
149
149
  "are optional — the default SQLite backend needs nothing extra."
150
150
  ) from last_err
151
151
 
@@ -188,7 +188,7 @@ class PostgresStore:
188
188
  except ImportError as exc:
189
189
  raise ImportError(
190
190
  "The postgres cache backend needs the 'psycopg' package. Install "
191
- "it with: pip install bank-statement-mapper[postgres]. It is "
191
+ "it with: pip install tabularmapper[postgres]. It is "
192
192
  "optional — the default SQLite backend needs nothing extra."
193
193
  ) from exc
194
194
  self._table = table
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabularmapper
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
5
5
  Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
6
6
  License-Expression: MIT
@@ -15,6 +15,8 @@ Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Programming Language :: Python :: 3.14
18
20
  Classifier: Topic :: Office/Business
19
21
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
22
  Requires-Python: >=3.9
@@ -59,6 +59,48 @@ def test_map_deterministic(client):
59
59
  assert body["schema_columns"][0] == "Date"
60
60
 
61
61
 
62
+ def test_map_format_base64(client):
63
+ import base64
64
+ with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
65
+ payload = fh.read()
66
+ r = client.post("/mapper/map", params={"format": "base64"},
67
+ files={"file": ("stmt.xlsx", io.BytesIO(payload))})
68
+ assert r.status_code == 200
69
+ body = r.json()
70
+ # rows still inline...
71
+ assert any(t["credit"] == 45000.0 for t in body["transactions"])
72
+ # ...plus a base64 .xlsx that decodes to a real zip (xlsx magic = PK)
73
+ assert body["file_base64"]
74
+ assert base64.b64decode(body["file_base64"])[:2] == b"PK"
75
+
76
+
77
+ def test_map_format_file_download(client):
78
+ with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
79
+ payload = fh.read()
80
+ r = client.post("/mapper/map", params={"format": "file"},
81
+ files={"file": ("statement.xlsx", io.BytesIO(payload))})
82
+ assert r.status_code == 200
83
+ assert r.headers["content-type"].startswith(
84
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
85
+ assert 'attachment; filename="statement_mapped.xlsx"' in r.headers["content-disposition"]
86
+ assert r.content[:2] == b"PK" # real .xlsx bytes
87
+
88
+
89
+ def test_map_default_format_has_no_file_base64(client):
90
+ with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
91
+ payload = fh.read()
92
+ r = client.post("/mapper/map", files={"file": ("stmt.xlsx", io.BytesIO(payload))})
93
+ assert r.json()["file_base64"] is None
94
+
95
+
96
+ def test_map_rejects_bad_format(client):
97
+ with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
98
+ payload = fh.read()
99
+ r = client.post("/mapper/map", params={"format": "pdf"},
100
+ files={"file": ("stmt.xlsx", io.BytesIO(payload))})
101
+ assert r.status_code == 422 # fails the regex pattern
102
+
103
+
62
104
  def test_map_rejects_non_xlsx(client):
63
105
  r = client.post("/mapper/map",
64
106
  files={"file": ("notes.txt", io.BytesIO(b"hello"), "text/plain")})
File without changes
File without changes
File without changes