tabularmapper 1.0.0__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tabularmapper-1.0.0/src/tabularmapper.egg-info → tabularmapper-1.0.1}/PKG-INFO +1 -1
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/pyproject.toml +1 -1
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/__init__.py +1 -1
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/api.py +42 -4
- {tabularmapper-1.0.0 → tabularmapper-1.0.1/src/tabularmapper.egg-info}/PKG-INFO +1 -1
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/tests/test_api.py +42 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/LICENSE +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/README.md +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/setup.cfg +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/ai_matcher.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/cli.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/engine.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/learn.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/llm_fallback.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/mapping_cache.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/schema.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper/stores.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper.egg-info/SOURCES.txt +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper.egg-info/dependency_links.txt +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper.egg-info/entry_points.txt +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper.egg-info/requires.txt +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/src/tabularmapper.egg-info/top_level.txt +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/tests/test_learn.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/tests/test_mapper.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/tests/test_schema.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.1}/tests/test_stores.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tabularmapper
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
|
|
5
5
|
Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tabularmapper"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.1"
|
|
8
8
|
description = "Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -32,14 +32,17 @@ import os
|
|
|
32
32
|
from contextlib import asynccontextmanager
|
|
33
33
|
from typing import Any, Optional
|
|
34
34
|
|
|
35
|
-
from fastapi import APIRouter, FastAPI, File, HTTPException, UploadFile
|
|
35
|
+
from fastapi import APIRouter, FastAPI, File, HTTPException, Query, UploadFile
|
|
36
36
|
from fastapi.concurrency import run_in_threadpool
|
|
37
|
+
from fastapi.responses import Response
|
|
37
38
|
from pydantic import BaseModel
|
|
38
39
|
|
|
39
40
|
from . import engine # imported as a module so OUTPUT_SCHEMA is read
|
|
40
|
-
from .engine import process_stream # dynamically (after configure), never a stale copy
|
|
41
|
+
from .engine import OutputResult, process_stream # dynamically (after configure), never a stale copy
|
|
41
42
|
from .mapping_cache import MappingCache
|
|
42
43
|
|
|
44
|
+
_XLSX_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
45
|
+
|
|
43
46
|
|
|
44
47
|
# --------------------------------------------------------------------------
|
|
45
48
|
# Shared singletons (built once at startup)
|
|
@@ -105,6 +108,9 @@ class MapResponse(BaseModel):
|
|
|
105
108
|
schema_columns: list[str]
|
|
106
109
|
columns: list[ColumnMapOut]
|
|
107
110
|
transactions: list[dict]
|
|
111
|
+
# Populated only when ?format=base64 — a base64-encoded .xlsx of the mapped
|
|
112
|
+
# rows, ready to decode and save client-side. None otherwise.
|
|
113
|
+
file_base64: Optional[str] = None
|
|
108
114
|
|
|
109
115
|
|
|
110
116
|
# --------------------------------------------------------------------------
|
|
@@ -114,8 +120,23 @@ async def health() -> dict:
|
|
|
114
120
|
return {"status": "ok", "ai_enabled": state.matcher is not None}
|
|
115
121
|
|
|
116
122
|
|
|
117
|
-
async def map_statement(
|
|
118
|
-
|
|
123
|
+
async def map_statement(
|
|
124
|
+
file: UploadFile = File(...),
|
|
125
|
+
format: str = Query(
|
|
126
|
+
"json",
|
|
127
|
+
pattern="^(json|base64|file)$",
|
|
128
|
+
description="json = rows inline (default); base64 = rows inline + an "
|
|
129
|
+
".xlsx encoded in file_base64; file = download the .xlsx "
|
|
130
|
+
"directly (binary, no JSON body).",
|
|
131
|
+
),
|
|
132
|
+
):
|
|
133
|
+
"""Upload a spreadsheet (.xlsx); get the standardized mapping + rows.
|
|
134
|
+
|
|
135
|
+
`format` controls what comes back:
|
|
136
|
+
* json -> MapResponse with the rows in `transactions`
|
|
137
|
+
* base64 -> same MapResponse, plus a mapped .xlsx in `file_base64`
|
|
138
|
+
* file -> the mapped .xlsx as a downloadable attachment
|
|
139
|
+
"""
|
|
119
140
|
name = (file.filename or "").lower()
|
|
120
141
|
if not name.endswith((".xlsx", ".xls")):
|
|
121
142
|
raise HTTPException(status_code=400, detail="expected an .xlsx/.xls file")
|
|
@@ -132,6 +153,22 @@ async def map_statement(file: UploadFile = File(...)) -> MapResponse:
|
|
|
132
153
|
except Exception as exc: # noqa: BLE001
|
|
133
154
|
raise HTTPException(status_code=422, detail=f"could not process file: {exc}")
|
|
134
155
|
|
|
156
|
+
# file -> stream the mapped .xlsx straight back as a download (no JSON body).
|
|
157
|
+
if format == "file":
|
|
158
|
+
xlsx = await run_in_threadpool(
|
|
159
|
+
lambda: OutputResult(records=res.records, format="bytes").bytes)
|
|
160
|
+
stem = os.path.splitext(os.path.basename(file.filename or "mapped"))[0]
|
|
161
|
+
return Response(
|
|
162
|
+
content=xlsx,
|
|
163
|
+
media_type=_XLSX_MIME,
|
|
164
|
+
headers={"Content-Disposition": f'attachment; filename="{stem}_mapped.xlsx"'},
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
file_b64 = None
|
|
168
|
+
if format == "base64":
|
|
169
|
+
file_b64 = await run_in_threadpool(
|
|
170
|
+
lambda: OutputResult(records=res.records, format="base64").base64)
|
|
171
|
+
|
|
135
172
|
return MapResponse(
|
|
136
173
|
header_index=res.header_index,
|
|
137
174
|
needs_review=res.needs_review,
|
|
@@ -142,6 +179,7 @@ async def map_statement(file: UploadFile = File(...)) -> MapResponse:
|
|
|
142
179
|
"field": m.field, "confidence": m.confidence, "method": m.method,
|
|
143
180
|
}) for m in res.column_maps],
|
|
144
181
|
transactions=res.records,
|
|
182
|
+
file_base64=file_b64,
|
|
145
183
|
)
|
|
146
184
|
|
|
147
185
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tabularmapper
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
|
|
5
5
|
Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -59,6 +59,48 @@ def test_map_deterministic(client):
|
|
|
59
59
|
assert body["schema_columns"][0] == "Date"
|
|
60
60
|
|
|
61
61
|
|
|
62
|
+
def test_map_format_base64(client):
|
|
63
|
+
import base64
|
|
64
|
+
with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
|
|
65
|
+
payload = fh.read()
|
|
66
|
+
r = client.post("/mapper/map", params={"format": "base64"},
|
|
67
|
+
files={"file": ("stmt.xlsx", io.BytesIO(payload))})
|
|
68
|
+
assert r.status_code == 200
|
|
69
|
+
body = r.json()
|
|
70
|
+
# rows still inline...
|
|
71
|
+
assert any(t["credit"] == 45000.0 for t in body["transactions"])
|
|
72
|
+
# ...plus a base64 .xlsx that decodes to a real zip (xlsx magic = PK)
|
|
73
|
+
assert body["file_base64"]
|
|
74
|
+
assert base64.b64decode(body["file_base64"])[:2] == b"PK"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_map_format_file_download(client):
|
|
78
|
+
with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
|
|
79
|
+
payload = fh.read()
|
|
80
|
+
r = client.post("/mapper/map", params={"format": "file"},
|
|
81
|
+
files={"file": ("statement.xlsx", io.BytesIO(payload))})
|
|
82
|
+
assert r.status_code == 200
|
|
83
|
+
assert r.headers["content-type"].startswith(
|
|
84
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
|
85
|
+
assert 'attachment; filename="statement_mapped.xlsx"' in r.headers["content-disposition"]
|
|
86
|
+
assert r.content[:2] == b"PK" # real .xlsx bytes
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_map_default_format_has_no_file_base64(client):
|
|
90
|
+
with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
|
|
91
|
+
payload = fh.read()
|
|
92
|
+
r = client.post("/mapper/map", files={"file": ("stmt.xlsx", io.BytesIO(payload))})
|
|
93
|
+
assert r.json()["file_base64"] is None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_map_rejects_bad_format(client):
|
|
97
|
+
with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
|
|
98
|
+
payload = fh.read()
|
|
99
|
+
r = client.post("/mapper/map", params={"format": "pdf"},
|
|
100
|
+
files={"file": ("stmt.xlsx", io.BytesIO(payload))})
|
|
101
|
+
assert r.status_code == 422 # fails the regex pattern
|
|
102
|
+
|
|
103
|
+
|
|
62
104
|
def test_map_rejects_non_xlsx(client):
|
|
63
105
|
r = client.post("/mapper/map",
|
|
64
106
|
files={"file": ("notes.txt", io.BytesIO(b"hello"), "text/plain")})
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|