tabularmapper 1.0.0__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {tabularmapper-1.0.0/src/tabularmapper.egg-info → tabularmapper-1.0.1}/PKG-INFO +1 -1
  2. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/pyproject.toml +1 -1
  3. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/__init__.py +1 -1
  4. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/api.py +42 -4
  5. {tabularmapper-1.0.0 → tabularmapper-1.0.1/src/tabularmapper.egg-info}/PKG-INFO +1 -1
  6. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/tests/test_api.py +42 -0
  7. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/LICENSE +0 -0
  8. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/README.md +0 -0
  9. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/setup.cfg +0 -0
  10. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/ai_matcher.py +0 -0
  11. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/cli.py +0 -0
  12. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/engine.py +0 -0
  13. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/learn.py +0 -0
  14. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/llm_fallback.py +0 -0
  15. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/mapping_cache.py +0 -0
  16. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/schema.py +0 -0
  17. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/stores.py +0 -0
  18. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper.egg-info/SOURCES.txt +0 -0
  19. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper.egg-info/dependency_links.txt +0 -0
  20. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper.egg-info/entry_points.txt +0 -0
  21. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper.egg-info/requires.txt +0 -0
  22. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper.egg-info/top_level.txt +0 -0
  23. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/tests/test_learn.py +0 -0
  24. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/tests/test_mapper.py +0 -0
  25. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/tests/test_schema.py +0 -0
  26. {tabularmapper-1.0.0 → tabularmapper-1.0.1}/tests/test_stores.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabularmapper
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
5
5
  Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "tabularmapper"
7
- version = "1.0.0"
7
+ version = "1.0.1"
8
8
  description = "Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -44,7 +44,7 @@ from .schema import (
44
44
  )
45
45
  from .stores import open_store
46
46
 
47
- __version__ = "1.0.0"
47
+ __version__ = "1.0.1"
48
48
 
49
49
  __all__ = [
50
50
  "process_file",
@@ -32,14 +32,17 @@ import os
32
32
  from contextlib import asynccontextmanager
33
33
  from typing import Any, Optional
34
34
 
35
- from fastapi import APIRouter, FastAPI, File, HTTPException, UploadFile
35
+ from fastapi import APIRouter, FastAPI, File, HTTPException, Query, UploadFile
36
36
  from fastapi.concurrency import run_in_threadpool
37
+ from fastapi.responses import Response
37
38
  from pydantic import BaseModel
38
39
 
39
40
  from . import engine # imported as a module so OUTPUT_SCHEMA is read
40
- from .engine import process_stream # dynamically (after configure), never a stale copy
41
+ from .engine import OutputResult, process_stream # dynamically (after configure), never a stale copy
41
42
  from .mapping_cache import MappingCache
42
43
 
44
+ _XLSX_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
45
+
43
46
 
44
47
  # --------------------------------------------------------------------------
45
48
  # Shared singletons (built once at startup)
@@ -105,6 +108,9 @@ class MapResponse(BaseModel):
105
108
  schema_columns: list[str]
106
109
  columns: list[ColumnMapOut]
107
110
  transactions: list[dict]
111
+ # Populated only when ?format=base64 — a base64-encoded .xlsx of the mapped
112
+ # rows, ready to decode and save client-side. None otherwise.
113
+ file_base64: Optional[str] = None
108
114
 
109
115
 
110
116
  # --------------------------------------------------------------------------
@@ -114,8 +120,23 @@ async def health() -> dict:
114
120
  return {"status": "ok", "ai_enabled": state.matcher is not None}
115
121
 
116
122
 
117
- async def map_statement(file: UploadFile = File(...)) -> MapResponse:
118
- """Upload a spreadsheet (.xlsx); get the standardized mapping + rows."""
123
+ async def map_statement(
124
+ file: UploadFile = File(...),
125
+ format: str = Query(
126
+ "json",
127
+ pattern="^(json|base64|file)$",
128
+ description="json = rows inline (default); base64 = rows inline + an "
129
+ ".xlsx encoded in file_base64; file = download the .xlsx "
130
+ "directly (binary, no JSON body).",
131
+ ),
132
+ ):
133
+ """Upload a spreadsheet (.xlsx); get the standardized mapping + rows.
134
+
135
+ `format` controls what comes back:
136
+ * json -> MapResponse with the rows in `transactions`
137
+ * base64 -> same MapResponse, plus a mapped .xlsx in `file_base64`
138
+ * file -> the mapped .xlsx as a downloadable attachment
139
+ """
119
140
  name = (file.filename or "").lower()
120
141
  if not name.endswith((".xlsx", ".xls")):
121
142
  raise HTTPException(status_code=400, detail="expected an .xlsx/.xls file")
@@ -132,6 +153,22 @@ async def map_statement(file: UploadFile = File(...)) -> MapResponse:
132
153
  except Exception as exc: # noqa: BLE001
133
154
  raise HTTPException(status_code=422, detail=f"could not process file: {exc}")
134
155
 
156
+ # file -> stream the mapped .xlsx straight back as a download (no JSON body).
157
+ if format == "file":
158
+ xlsx = await run_in_threadpool(
159
+ lambda: OutputResult(records=res.records, format="bytes").bytes)
160
+ stem = os.path.splitext(os.path.basename(file.filename or "mapped"))[0]
161
+ return Response(
162
+ content=xlsx,
163
+ media_type=_XLSX_MIME,
164
+ headers={"Content-Disposition": f'attachment; filename="{stem}_mapped.xlsx"'},
165
+ )
166
+
167
+ file_b64 = None
168
+ if format == "base64":
169
+ file_b64 = await run_in_threadpool(
170
+ lambda: OutputResult(records=res.records, format="base64").base64)
171
+
135
172
  return MapResponse(
136
173
  header_index=res.header_index,
137
174
  needs_review=res.needs_review,
@@ -142,6 +179,7 @@ async def map_statement(file: UploadFile = File(...)) -> MapResponse:
142
179
  "field": m.field, "confidence": m.confidence, "method": m.method,
143
180
  }) for m in res.column_maps],
144
181
  transactions=res.records,
182
+ file_base64=file_b64,
145
183
  )
146
184
 
147
185
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabularmapper
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
5
5
  Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
6
6
  License-Expression: MIT
@@ -59,6 +59,48 @@ def test_map_deterministic(client):
59
59
  assert body["schema_columns"][0] == "Date"
60
60
 
61
61
 
62
+ def test_map_format_base64(client):
63
+ import base64
64
+ with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
65
+ payload = fh.read()
66
+ r = client.post("/mapper/map", params={"format": "base64"},
67
+ files={"file": ("stmt.xlsx", io.BytesIO(payload))})
68
+ assert r.status_code == 200
69
+ body = r.json()
70
+ # rows still inline...
71
+ assert any(t["credit"] == 45000.0 for t in body["transactions"])
72
+ # ...plus a base64 .xlsx that decodes to a real zip (xlsx magic = PK)
73
+ assert body["file_base64"]
74
+ assert base64.b64decode(body["file_base64"])[:2] == b"PK"
75
+
76
+
77
+ def test_map_format_file_download(client):
78
+ with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
79
+ payload = fh.read()
80
+ r = client.post("/mapper/map", params={"format": "file"},
81
+ files={"file": ("statement.xlsx", io.BytesIO(payload))})
82
+ assert r.status_code == 200
83
+ assert r.headers["content-type"].startswith(
84
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
85
+ assert 'attachment; filename="statement_mapped.xlsx"' in r.headers["content-disposition"]
86
+ assert r.content[:2] == b"PK" # real .xlsx bytes
87
+
88
+
89
+ def test_map_default_format_has_no_file_base64(client):
90
+ with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
91
+ payload = fh.read()
92
+ r = client.post("/mapper/map", files={"file": ("stmt.xlsx", io.BytesIO(payload))})
93
+ assert r.json()["file_base64"] is None
94
+
95
+
96
+ def test_map_rejects_bad_format(client):
97
+ with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
98
+ payload = fh.read()
99
+ r = client.post("/mapper/map", params={"format": "pdf"},
100
+ files={"file": ("stmt.xlsx", io.BytesIO(payload))})
101
+ assert r.status_code == 422 # fails the regex pattern
102
+
103
+
62
104
  def test_map_rejects_non_xlsx(client):
63
105
  r = client.post("/mapper/map",
64
106
  files={"file": ("notes.txt", io.BytesIO(b"hello"), "text/plain")})
File without changes
File without changes
File without changes