tabularmapper 1.0.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tabularmapper-1.0.0/src/tabularmapper.egg-info → tabularmapper-1.0.2}/PKG-INFO +3 -1
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/pyproject.toml +3 -1
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/__init__.py +1 -1
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/api.py +49 -4
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/stores.py +2 -2
- {tabularmapper-1.0.0 → tabularmapper-1.0.2/src/tabularmapper.egg-info}/PKG-INFO +3 -1
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/tests/test_api.py +42 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/LICENSE +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/README.md +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/setup.cfg +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/ai_matcher.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/cli.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/engine.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/learn.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/llm_fallback.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/mapping_cache.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper/schema.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper.egg-info/SOURCES.txt +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper.egg-info/dependency_links.txt +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper.egg-info/entry_points.txt +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper.egg-info/requires.txt +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/src/tabularmapper.egg-info/top_level.txt +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/tests/test_learn.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/tests/test_mapper.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/tests/test_schema.py +0 -0
- {tabularmapper-1.0.0 → tabularmapper-1.0.2}/tests/test_stores.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tabularmapper
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
|
|
5
5
|
Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -15,6 +15,8 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
20
|
Classifier: Topic :: Office/Business
|
|
19
21
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
22
|
Requires-Python: >=3.9
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tabularmapper"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.2"
|
|
8
8
|
description = "Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -22,6 +22,8 @@ classifiers = [
|
|
|
22
22
|
"Programming Language :: Python :: 3.10",
|
|
23
23
|
"Programming Language :: Python :: 3.11",
|
|
24
24
|
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Programming Language :: Python :: 3.14",
|
|
25
27
|
"Topic :: Office/Business",
|
|
26
28
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
27
29
|
]
|
|
@@ -30,16 +30,27 @@ from __future__ import annotations
|
|
|
30
30
|
|
|
31
31
|
import os
|
|
32
32
|
from contextlib import asynccontextmanager
|
|
33
|
+
from enum import Enum
|
|
33
34
|
from typing import Any, Optional
|
|
34
35
|
|
|
35
|
-
from fastapi import APIRouter, FastAPI, File, HTTPException, UploadFile
|
|
36
|
+
from fastapi import APIRouter, FastAPI, File, HTTPException, Query, UploadFile
|
|
36
37
|
from fastapi.concurrency import run_in_threadpool
|
|
38
|
+
from fastapi.responses import Response
|
|
37
39
|
from pydantic import BaseModel
|
|
38
40
|
|
|
39
41
|
from . import engine # imported as a module so OUTPUT_SCHEMA is read
|
|
40
|
-
from .engine import process_stream # dynamically (after configure), never a stale copy
|
|
42
|
+
from .engine import OutputResult, process_stream # dynamically (after configure), never a stale copy
|
|
41
43
|
from .mapping_cache import MappingCache
|
|
42
44
|
|
|
45
|
+
_XLSX_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class OutFormat(str, Enum):
|
|
49
|
+
"""Response shape for POST /map — rendered as a dropdown in the docs."""
|
|
50
|
+
json = "json" # rows inline (default)
|
|
51
|
+
base64 = "base64" # rows inline + a mapped .xlsx in file_base64
|
|
52
|
+
file = "file" # download the .xlsx directly (binary, no JSON body)
|
|
53
|
+
|
|
43
54
|
|
|
44
55
|
# --------------------------------------------------------------------------
|
|
45
56
|
# Shared singletons (built once at startup)
|
|
@@ -105,6 +116,9 @@ class MapResponse(BaseModel):
|
|
|
105
116
|
schema_columns: list[str]
|
|
106
117
|
columns: list[ColumnMapOut]
|
|
107
118
|
transactions: list[dict]
|
|
119
|
+
# Populated only when ?format=base64 — a base64-encoded .xlsx of the mapped
|
|
120
|
+
# rows, ready to decode and save client-side. None otherwise.
|
|
121
|
+
file_base64: Optional[str] = None
|
|
108
122
|
|
|
109
123
|
|
|
110
124
|
# --------------------------------------------------------------------------
|
|
@@ -114,8 +128,22 @@ async def health() -> dict:
|
|
|
114
128
|
return {"status": "ok", "ai_enabled": state.matcher is not None}
|
|
115
129
|
|
|
116
130
|
|
|
117
|
-
async def map_statement(
|
|
118
|
-
|
|
131
|
+
async def map_statement(
|
|
132
|
+
file: UploadFile = File(...),
|
|
133
|
+
format: OutFormat = Query(
|
|
134
|
+
OutFormat.json,
|
|
135
|
+
description="json = rows inline (default); base64 = rows inline + an "
|
|
136
|
+
".xlsx encoded in file_base64; file = download the .xlsx "
|
|
137
|
+
"directly (binary, no JSON body).",
|
|
138
|
+
),
|
|
139
|
+
):
|
|
140
|
+
"""Upload a spreadsheet (.xlsx); get the standardized mapping + rows.
|
|
141
|
+
|
|
142
|
+
`format` controls what comes back:
|
|
143
|
+
* json -> MapResponse with the rows in `transactions`
|
|
144
|
+
* base64 -> same MapResponse, plus a mapped .xlsx in `file_base64`
|
|
145
|
+
* file -> the mapped .xlsx as a downloadable attachment
|
|
146
|
+
"""
|
|
119
147
|
name = (file.filename or "").lower()
|
|
120
148
|
if not name.endswith((".xlsx", ".xls")):
|
|
121
149
|
raise HTTPException(status_code=400, detail="expected an .xlsx/.xls file")
|
|
@@ -132,6 +160,22 @@ async def map_statement(file: UploadFile = File(...)) -> MapResponse:
|
|
|
132
160
|
except Exception as exc: # noqa: BLE001
|
|
133
161
|
raise HTTPException(status_code=422, detail=f"could not process file: {exc}")
|
|
134
162
|
|
|
163
|
+
# file -> stream the mapped .xlsx straight back as a download (no JSON body).
|
|
164
|
+
if format == "file":
|
|
165
|
+
xlsx = await run_in_threadpool(
|
|
166
|
+
lambda: OutputResult(records=res.records, format="bytes").bytes)
|
|
167
|
+
stem = os.path.splitext(os.path.basename(file.filename or "mapped"))[0]
|
|
168
|
+
return Response(
|
|
169
|
+
content=xlsx,
|
|
170
|
+
media_type=_XLSX_MIME,
|
|
171
|
+
headers={"Content-Disposition": f'attachment; filename="{stem}_mapped.xlsx"'},
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
file_b64 = None
|
|
175
|
+
if format == "base64":
|
|
176
|
+
file_b64 = await run_in_threadpool(
|
|
177
|
+
lambda: OutputResult(records=res.records, format="base64").base64)
|
|
178
|
+
|
|
135
179
|
return MapResponse(
|
|
136
180
|
header_index=res.header_index,
|
|
137
181
|
needs_review=res.needs_review,
|
|
@@ -142,6 +186,7 @@ async def map_statement(file: UploadFile = File(...)) -> MapResponse:
|
|
|
142
186
|
"field": m.field, "confidence": m.confidence, "method": m.method,
|
|
143
187
|
}) for m in res.column_maps],
|
|
144
188
|
transactions=res.records,
|
|
189
|
+
file_base64=file_b64,
|
|
145
190
|
)
|
|
146
191
|
|
|
147
192
|
|
|
@@ -145,7 +145,7 @@ def _redis_proto_client(url: str, prefer: str = "redis"):
|
|
|
145
145
|
return mod.from_url(u) # module-level from_url (both expose it)
|
|
146
146
|
raise ImportError(
|
|
147
147
|
"This cache backend needs the 'valkey' or 'redis' package. Install one "
|
|
148
|
-
"with: pip install
|
|
148
|
+
"with: pip install tabularmapper[valkey] (or [redis]). Both "
|
|
149
149
|
"are optional — the default SQLite backend needs nothing extra."
|
|
150
150
|
) from last_err
|
|
151
151
|
|
|
@@ -188,7 +188,7 @@ class PostgresStore:
|
|
|
188
188
|
except ImportError as exc:
|
|
189
189
|
raise ImportError(
|
|
190
190
|
"The postgres cache backend needs the 'psycopg' package. Install "
|
|
191
|
-
"it with: pip install
|
|
191
|
+
"it with: pip install tabularmapper[postgres]. It is "
|
|
192
192
|
"optional — the default SQLite backend needs nothing extra."
|
|
193
193
|
) from exc
|
|
194
194
|
self._table = table
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tabularmapper
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
|
|
5
5
|
Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -15,6 +15,8 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
20
|
Classifier: Topic :: Office/Business
|
|
19
21
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
22
|
Requires-Python: >=3.9
|
|
@@ -59,6 +59,48 @@ def test_map_deterministic(client):
|
|
|
59
59
|
assert body["schema_columns"][0] == "Date"
|
|
60
60
|
|
|
61
61
|
|
|
62
|
+
def test_map_format_base64(client):
|
|
63
|
+
import base64
|
|
64
|
+
with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
|
|
65
|
+
payload = fh.read()
|
|
66
|
+
r = client.post("/mapper/map", params={"format": "base64"},
|
|
67
|
+
files={"file": ("stmt.xlsx", io.BytesIO(payload))})
|
|
68
|
+
assert r.status_code == 200
|
|
69
|
+
body = r.json()
|
|
70
|
+
# rows still inline...
|
|
71
|
+
assert any(t["credit"] == 45000.0 for t in body["transactions"])
|
|
72
|
+
# ...plus a base64 .xlsx that decodes to a real zip (xlsx magic = PK)
|
|
73
|
+
assert body["file_base64"]
|
|
74
|
+
assert base64.b64decode(body["file_base64"])[:2] == b"PK"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_map_format_file_download(client):
|
|
78
|
+
with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
|
|
79
|
+
payload = fh.read()
|
|
80
|
+
r = client.post("/mapper/map", params={"format": "file"},
|
|
81
|
+
files={"file": ("statement.xlsx", io.BytesIO(payload))})
|
|
82
|
+
assert r.status_code == 200
|
|
83
|
+
assert r.headers["content-type"].startswith(
|
|
84
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
|
85
|
+
assert 'attachment; filename="statement_mapped.xlsx"' in r.headers["content-disposition"]
|
|
86
|
+
assert r.content[:2] == b"PK" # real .xlsx bytes
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_map_default_format_has_no_file_base64(client):
|
|
90
|
+
with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
|
|
91
|
+
payload = fh.read()
|
|
92
|
+
r = client.post("/mapper/map", files={"file": ("stmt.xlsx", io.BytesIO(payload))})
|
|
93
|
+
assert r.json()["file_base64"] is None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_map_rejects_bad_format(client):
|
|
97
|
+
with open(os.path.join(FIX, "01_junk_split.xlsx"), "rb") as fh:
|
|
98
|
+
payload = fh.read()
|
|
99
|
+
r = client.post("/mapper/map", params={"format": "pdf"},
|
|
100
|
+
files={"file": ("stmt.xlsx", io.BytesIO(payload))})
|
|
101
|
+
assert r.status_code == 422 # fails the regex pattern
|
|
102
|
+
|
|
103
|
+
|
|
62
104
|
def test_map_rejects_non_xlsx(client):
|
|
63
105
|
r = client.post("/mapper/map",
|
|
64
106
|
files={"file": ("notes.txt", io.BytesIO(b"hello"), "text/plain")})
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|