tabularmapper 1.0.2__tar.gz → 1.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tabularmapper-1.0.2/src/tabularmapper.egg-info → tabularmapper-1.0.4}/PKG-INFO +15 -1
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/README.md +14 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/pyproject.toml +6 -1
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/__init__.py +1 -1
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/api.py +39 -2
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/engine.py +4 -1
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/schema.py +1 -1
- tabularmapper-1.0.4/src/tabularmapper/static/index.html +565 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4/src/tabularmapper.egg-info}/PKG-INFO +15 -1
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper.egg-info/SOURCES.txt +1 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/tests/test_api.py +52 -1
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/LICENSE +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/setup.cfg +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/ai_matcher.py +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/cli.py +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/learn.py +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/llm_fallback.py +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/mapping_cache.py +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/stores.py +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper.egg-info/dependency_links.txt +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper.egg-info/entry_points.txt +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper.egg-info/requires.txt +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper.egg-info/top_level.txt +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/tests/test_learn.py +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/tests/test_mapper.py +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/tests/test_schema.py +0 -0
- {tabularmapper-1.0.2 → tabularmapper-1.0.4}/tests/test_stores.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tabularmapper
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.4
|
|
4
4
|
Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
|
|
5
5
|
Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -165,6 +165,7 @@ All are optional; sensible defaults apply.
|
|
|
165
165
|
| `TABULARMAPPER_LEARN_STORE` | `memory://` (no files) | where self-learned header synonyms live |
|
|
166
166
|
| `TABULARMAPPER_CONFIG` | *(none — required)* | output template + synonyms JSON (file / `https://` / `s3://`) |
|
|
167
167
|
| `TABULARMAPPER_ROUTE_PREFIX` | `/mapper` | FastAPI router path prefix |
|
|
168
|
+
| `TABULARMAPPER_THRESHOLD` | `80` | fuzzy-accept gate (0–100); raise it to push borderline fuzzy matches to the AI matcher |
|
|
168
169
|
| `OPENAI_API_KEY` | *(unset → AI off)* | enables the AI column matcher |
|
|
169
170
|
| `OPENAI_BASE_URL` | `https://api.openai.com/v1` | any OpenAI-compatible endpoint |
|
|
170
171
|
| `OPENAI_MODEL` | `gpt-4o-mini` | model name |
|
|
@@ -272,6 +273,7 @@ app.include_router(router)
|
|
|
272
273
|
|---|---|---|
|
|
273
274
|
| `POST` | `/mapper/map` | upload an `.xlsx`, get the mapping + rows (JSON) |
|
|
274
275
|
| `GET` | `/mapper/health` | `{status, ai_enabled}` |
|
|
276
|
+
| `GET` | `/mapper/config` | config-builder web page — design a schema, export `config.json` |
|
|
275
277
|
| `GET` | `/mapper/learn/pending` | debit/credit synonyms awaiting approval |
|
|
276
278
|
| `POST` | `/mapper/learn/approve` | approve a pending synonym (`?phrase=&field=`) |
|
|
277
279
|
| `POST` | `/mapper/learn/reject` | reject a pending synonym |
|
|
@@ -280,6 +282,18 @@ app.include_router(router)
|
|
|
280
282
|
blocking work in a threadpool. Store the original file to S3 in your own endpoint
|
|
281
283
|
if you need it — the mapper stays out of AWS.
|
|
282
284
|
|
|
285
|
+
Two query params shape the request:
|
|
286
|
+
|
|
287
|
+
```bash
|
|
288
|
+
curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=base64" # json + a mapped .xlsx in file_base64
|
|
289
|
+
curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=file" -OJ # download the mapped .xlsx
|
|
290
|
+
curl -F file=@f.xlsx "http://localhost:8000/mapper/map?threshold=90" # stricter fuzzy gate for this call
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
`format` is `json` (default) / `base64` / `file`. `threshold` (0–100) overrides
|
|
294
|
+
`TABULARMAPPER_THRESHOLD` for one request — raise it to send borderline fuzzy
|
|
295
|
+
matches to the AI matcher instead of trusting them.
|
|
296
|
+
|
|
283
297
|
The `/mapper` prefix is configurable (this is a general table→schema mapper, not
|
|
284
298
|
just banks): set `TABULARMAPPER_ROUTE_PREFIX`, or build the router yourself:
|
|
285
299
|
|
|
@@ -125,6 +125,7 @@ All are optional; sensible defaults apply.
|
|
|
125
125
|
| `TABULARMAPPER_LEARN_STORE` | `memory://` (no files) | where self-learned header synonyms live |
|
|
126
126
|
| `TABULARMAPPER_CONFIG` | *(none — required)* | output template + synonyms JSON (file / `https://` / `s3://`) |
|
|
127
127
|
| `TABULARMAPPER_ROUTE_PREFIX` | `/mapper` | FastAPI router path prefix |
|
|
128
|
+
| `TABULARMAPPER_THRESHOLD` | `80` | fuzzy-accept gate (0–100); raise it to push borderline fuzzy matches to the AI matcher |
|
|
128
129
|
| `OPENAI_API_KEY` | *(unset → AI off)* | enables the AI column matcher |
|
|
129
130
|
| `OPENAI_BASE_URL` | `https://api.openai.com/v1` | any OpenAI-compatible endpoint |
|
|
130
131
|
| `OPENAI_MODEL` | `gpt-4o-mini` | model name |
|
|
@@ -232,6 +233,7 @@ app.include_router(router)
|
|
|
232
233
|
|---|---|---|
|
|
233
234
|
| `POST` | `/mapper/map` | upload an `.xlsx`, get the mapping + rows (JSON) |
|
|
234
235
|
| `GET` | `/mapper/health` | `{status, ai_enabled}` |
|
|
236
|
+
| `GET` | `/mapper/config` | config-builder web page — design a schema, export `config.json` |
|
|
235
237
|
| `GET` | `/mapper/learn/pending` | debit/credit synonyms awaiting approval |
|
|
236
238
|
| `POST` | `/mapper/learn/approve` | approve a pending synonym (`?phrase=&field=`) |
|
|
237
239
|
| `POST` | `/mapper/learn/reject` | reject a pending synonym |
|
|
@@ -240,6 +242,18 @@ app.include_router(router)
|
|
|
240
242
|
blocking work in a threadpool. Store the original file to S3 in your own endpoint
|
|
241
243
|
if you need it — the mapper stays out of AWS.
|
|
242
244
|
|
|
245
|
+
Two query params shape the request:
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=base64" # json + a mapped .xlsx in file_base64
|
|
249
|
+
curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=file" -OJ # download the mapped .xlsx
|
|
250
|
+
curl -F file=@f.xlsx "http://localhost:8000/mapper/map?threshold=90" # stricter fuzzy gate for this call
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
`format` is `json` (default) / `base64` / `file`. `threshold` (0–100) overrides
|
|
254
|
+
`TABULARMAPPER_THRESHOLD` for one request — raise it to send borderline fuzzy
|
|
255
|
+
matches to the AI matcher instead of trusting them.
|
|
256
|
+
|
|
243
257
|
The `/mapper` prefix is configurable (this is a general table→schema mapper, not
|
|
244
258
|
just banks): set `TABULARMAPPER_ROUTE_PREFIX`, or build the router yourself:
|
|
245
259
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tabularmapper"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.4"
|
|
8
8
|
description = "Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -55,6 +55,11 @@ tabularmapper = "tabularmapper.cli:main"
|
|
|
55
55
|
[tool.setuptools.packages.find]
|
|
56
56
|
where = ["src"]
|
|
57
57
|
|
|
58
|
+
# Ship the config-builder page inside the wheel so GET /mapper/config works
|
|
59
|
+
# from a pip-installed package (not just a source checkout).
|
|
60
|
+
[tool.setuptools.package-data]
|
|
61
|
+
tabularmapper = ["static/*.html"]
|
|
62
|
+
|
|
58
63
|
[tool.pytest.ini_options]
|
|
59
64
|
testpaths = ["tests"]
|
|
60
65
|
pythonpath = ["src"]
|
|
@@ -31,11 +31,12 @@ from __future__ import annotations
|
|
|
31
31
|
import os
|
|
32
32
|
from contextlib import asynccontextmanager
|
|
33
33
|
from enum import Enum
|
|
34
|
+
from importlib.resources import as_file, files
|
|
34
35
|
from typing import Any, Optional
|
|
35
36
|
|
|
36
37
|
from fastapi import APIRouter, FastAPI, File, HTTPException, Query, UploadFile
|
|
37
38
|
from fastapi.concurrency import run_in_threadpool
|
|
38
|
-
from fastapi.responses import Response
|
|
39
|
+
from fastapi.responses import HTMLResponse, Response
|
|
39
40
|
from pydantic import BaseModel
|
|
40
41
|
|
|
41
42
|
from . import engine # imported as a module so OUTPUT_SCHEMA is read
|
|
@@ -45,6 +46,17 @@ from .mapping_cache import MappingCache
|
|
|
45
46
|
_XLSX_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
46
47
|
|
|
47
48
|
|
|
49
|
+
def _default_threshold() -> int:
|
|
50
|
+
"""The fuzzy-accept gate (0-100). Below this, a column is left unmapped and,
|
|
51
|
+
if it's a critical field, the AI matcher is asked to fill it. Raise it to
|
|
52
|
+
push borderline fuzzy matches to the AI instead of trusting them. Read from
|
|
53
|
+
TABULARMAPPER_THRESHOLD at request time; falls back to 80."""
|
|
54
|
+
try:
|
|
55
|
+
return max(0, min(100, int(os.getenv("TABULARMAPPER_THRESHOLD", "80"))))
|
|
56
|
+
except (TypeError, ValueError):
|
|
57
|
+
return 80
|
|
58
|
+
|
|
59
|
+
|
|
48
60
|
class OutFormat(str, Enum):
|
|
49
61
|
"""Response shape for POST /map — rendered as a dropdown in the docs."""
|
|
50
62
|
json = "json" # rows inline (default)
|
|
@@ -128,6 +140,18 @@ async def health() -> dict:
|
|
|
128
140
|
return {"status": "ok", "ai_enabled": state.matcher is not None}
|
|
129
141
|
|
|
130
142
|
|
|
143
|
+
async def config_page() -> HTMLResponse:
|
|
144
|
+
"""Serve the self-contained config-builder page bundled at
|
|
145
|
+
tabularmapper/static/index.html. Uses importlib.resources so it works even
|
|
146
|
+
when the package is imported from a zip/egg, not only an unpacked wheel."""
|
|
147
|
+
try:
|
|
148
|
+
resource = files("tabularmapper").joinpath("static", "index.html")
|
|
149
|
+
with as_file(resource) as path:
|
|
150
|
+
return HTMLResponse(path.read_text(encoding="utf-8"))
|
|
151
|
+
except (FileNotFoundError, ModuleNotFoundError):
|
|
152
|
+
raise HTTPException(status_code=404, detail="config builder page not found")
|
|
153
|
+
|
|
154
|
+
|
|
131
155
|
async def map_statement(
|
|
132
156
|
file: UploadFile = File(...),
|
|
133
157
|
format: OutFormat = Query(
|
|
@@ -136,6 +160,13 @@ async def map_statement(
|
|
|
136
160
|
".xlsx encoded in file_base64; file = download the .xlsx "
|
|
137
161
|
"directly (binary, no JSON body).",
|
|
138
162
|
),
|
|
163
|
+
threshold: Optional[int] = Query(
|
|
164
|
+
None,
|
|
165
|
+
ge=0, le=100,
|
|
166
|
+
description="Fuzzy-accept gate 0-100. Overrides TABULARMAPPER_THRESHOLD "
|
|
167
|
+
"(default 80) for this request. Raise it to send borderline "
|
|
168
|
+
"fuzzy matches to the AI matcher instead of trusting them.",
|
|
169
|
+
),
|
|
139
170
|
):
|
|
140
171
|
"""Upload a spreadsheet (.xlsx); get the standardized mapping + rows.
|
|
141
172
|
|
|
@@ -143,18 +174,22 @@ async def map_statement(
|
|
|
143
174
|
* json -> MapResponse with the rows in `transactions`
|
|
144
175
|
* base64 -> same MapResponse, plus a mapped .xlsx in `file_base64`
|
|
145
176
|
* file -> the mapped .xlsx as a downloadable attachment
|
|
177
|
+
|
|
178
|
+
`threshold` (query) overrides the fuzzy gate for this one call; otherwise the
|
|
179
|
+
server default (TABULARMAPPER_THRESHOLD, else 80) is used.
|
|
146
180
|
"""
|
|
147
181
|
name = (file.filename or "").lower()
|
|
148
182
|
if not name.endswith((".xlsx", ".xls")):
|
|
149
183
|
raise HTTPException(status_code=400, detail="expected an .xlsx/.xls file")
|
|
150
184
|
|
|
185
|
+
gate = threshold if threshold is not None else _default_threshold()
|
|
151
186
|
data = await file.read() # raw bytes, parsed in memory (never hits disk)
|
|
152
187
|
try:
|
|
153
188
|
# blocking work -> threadpool; process_stream reads straight from bytes
|
|
154
189
|
res = await run_in_threadpool(
|
|
155
190
|
process_stream, data,
|
|
156
191
|
table_matcher=state.matcher, cache=state.cache,
|
|
157
|
-
learn_store=state.learn,
|
|
192
|
+
learn_store=state.learn, threshold=gate,
|
|
158
193
|
source_label=file.filename or "<upload>",
|
|
159
194
|
)
|
|
160
195
|
except Exception as exc: # noqa: BLE001
|
|
@@ -216,6 +251,8 @@ def make_router(prefix: Optional[str] = None, tags: Optional[list] = None) -> AP
|
|
|
216
251
|
prefix = os.getenv("TABULARMAPPER_ROUTE_PREFIX", "/mapper")
|
|
217
252
|
r = APIRouter(prefix=prefix.rstrip("/"), tags=tags or ["mapper"])
|
|
218
253
|
r.add_api_route("/health", health, methods=["GET"])
|
|
254
|
+
r.add_api_route("/config", config_page, methods=["GET"],
|
|
255
|
+
response_class=HTMLResponse, include_in_schema=False)
|
|
219
256
|
r.add_api_route("/map", map_statement, methods=["POST"], response_model=MapResponse)
|
|
220
257
|
r.add_api_route("/learn/pending", learn_pending, methods=["GET"])
|
|
221
258
|
r.add_api_route("/learn/approve", learn_approve, methods=["POST"])
|
|
@@ -810,7 +810,10 @@ def _run(rows: list[list], source_label: str, out_path, llm_fallback,
|
|
|
810
810
|
|
|
811
811
|
from_cache = False
|
|
812
812
|
col_maps = None
|
|
813
|
-
|
|
813
|
+
# Scope the cache to the active schema AND the fuzzy gate: a different
|
|
814
|
+
# threshold can change which columns map, so it must not reuse a mapping
|
|
815
|
+
# computed at another threshold.
|
|
816
|
+
schema_sig = f"{_schema_signature()}:t{threshold}"
|
|
814
817
|
if cache is not None:
|
|
815
818
|
cached = cache.get(header, namespace=schema_sig)
|
|
816
819
|
if cached is not None:
|
|
@@ -198,7 +198,7 @@ def _infer_type(field_key: str) -> str:
|
|
|
198
198
|
def default_config() -> Config:
|
|
199
199
|
"""The built-in default: EMPTY. This is a general mapper, so with no config
|
|
200
200
|
it maps nothing — you must provide an output_schema + synonyms (a file/URL via
|
|
201
|
-
|
|
201
|
+
TABULARMAPPER_CONFIG, a dict, or configure()). Use `bank_preset()` for the
|
|
202
202
|
ready-made bank-statement schema."""
|
|
203
203
|
return Config(output_schema=[], synonyms={}, critical_fields=[])
|
|
204
204
|
|
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>Tabular Mapper</title>
|
|
7
|
+
<script src="https://cdn.tailwindcss.com"></script>
|
|
8
|
+
<script>
|
|
9
|
+
tailwind.config = {
|
|
10
|
+
theme: {
|
|
11
|
+
extend: {
|
|
12
|
+
colors: {
|
|
13
|
+
brand: {
|
|
14
|
+
DEFAULT: '#ED0E4C',
|
|
15
|
+
dark: '#C90B40',
|
|
16
|
+
tint: '#FDE7EE',
|
|
17
|
+
},
|
|
18
|
+
},
|
|
19
|
+
},
|
|
20
|
+
},
|
|
21
|
+
};
|
|
22
|
+
</script>
|
|
23
|
+
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
|
|
24
|
+
<style>
|
|
25
|
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
|
|
26
|
+
|
|
27
|
+
body {
|
|
28
|
+
font-family: 'Inter', system-ui, sans-serif;
|
|
29
|
+
background-color: #f7f8fa;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
.code-font {
|
|
33
|
+
font-family: ui-monospace, 'SF Mono', SFMono-Regular, Menlo, Consolas, monospace;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
::-webkit-scrollbar {
|
|
37
|
+
width: 8px;
|
|
38
|
+
height: 8px;
|
|
39
|
+
}
|
|
40
|
+
::-webkit-scrollbar-track {
|
|
41
|
+
background: transparent;
|
|
42
|
+
}
|
|
43
|
+
::-webkit-scrollbar-thumb {
|
|
44
|
+
background: #d4d7dd;
|
|
45
|
+
border-radius: 4px;
|
|
46
|
+
}
|
|
47
|
+
::-webkit-scrollbar-thumb:hover {
|
|
48
|
+
background: #b9bdc6;
|
|
49
|
+
}
|
|
50
|
+
.code-panel ::-webkit-scrollbar-thumb {
|
|
51
|
+
background: #2a2f3a;
|
|
52
|
+
}
|
|
53
|
+
.code-panel ::-webkit-scrollbar-thumb:hover {
|
|
54
|
+
background: #3a404d;
|
|
55
|
+
}
|
|
56
|
+
</style>
|
|
57
|
+
</head>
|
|
58
|
+
<body class="h-screen flex flex-col overflow-hidden text-slate-800">
|
|
59
|
+
|
|
60
|
+
<!-- Header -->
|
|
61
|
+
<header class="bg-white border-b border-slate-200 h-14 flex items-center justify-between px-6 shrink-0">
|
|
62
|
+
<div class="flex items-center gap-2.5">
|
|
63
|
+
<div class="bg-brand text-white w-8 h-8 flex items-center justify-center rounded-lg">
|
|
64
|
+
<i class="fa-solid fa-layer-group text-sm"></i>
|
|
65
|
+
</div>
|
|
66
|
+
<div>
|
|
67
|
+
<h1 class="font-semibold text-[15px] leading-tight text-slate-900">Tabular Mapper</h1>
|
|
68
|
+
<p class="text-[11px] text-slate-400 leading-tight">xlsx mapping config</p>
|
|
69
|
+
</div>
|
|
70
|
+
</div>
|
|
71
|
+
<div class="flex items-center gap-2">
|
|
72
|
+
<input type="file" id="import-file" accept=".json,application/json" class="hidden" onchange="app.importFromFile(event)">
|
|
73
|
+
<button onclick="app.loadSample()" class="px-3 py-1.5 text-[13px] font-medium text-slate-600 border border-slate-200 rounded-lg hover:bg-slate-50 transition-colors">
|
|
74
|
+
<i class="fa-solid fa-table-list mr-1.5 text-xs"></i>Load sample
|
|
75
|
+
</button>
|
|
76
|
+
<button onclick="document.getElementById('import-file').click()" class="px-3 py-1.5 text-[13px] font-medium text-slate-600 border border-slate-200 rounded-lg hover:bg-slate-50 transition-colors">
|
|
77
|
+
<i class="fa-solid fa-upload mr-1.5 text-xs"></i>Import
|
|
78
|
+
</button>
|
|
79
|
+
<button onclick="app.exportToFile()" class="px-3 py-1.5 text-[13px] font-medium text-white bg-brand hover:bg-brand-dark rounded-lg transition-colors">
|
|
80
|
+
<i class="fa-solid fa-download mr-1.5 text-xs"></i>Export
|
|
81
|
+
</button>
|
|
82
|
+
</div>
|
|
83
|
+
</header>
|
|
84
|
+
|
|
85
|
+
<!-- Main Content -->
|
|
86
|
+
<div class="flex flex-1 overflow-hidden">
|
|
87
|
+
|
|
88
|
+
<!-- Left Panel: Editor -->
|
|
89
|
+
<div class="flex-1 flex flex-col min-w-0 overflow-y-auto">
|
|
90
|
+
<div class="p-6 max-w-4xl mx-auto w-full space-y-6 pb-16">
|
|
91
|
+
|
|
92
|
+
<!-- Fields -->
|
|
93
|
+
<section class="bg-white rounded-xl border border-slate-200 shadow-sm overflow-hidden">
|
|
94
|
+
<div class="px-5 py-3.5 border-b border-slate-100 flex justify-between items-center">
|
|
95
|
+
<div>
|
|
96
|
+
<h2 class="font-semibold text-sm text-slate-800">Fields</h2>
|
|
97
|
+
<p class="text-xs text-slate-400 mt-0.5">Header, type, description, synonyms, and the critical flag — all in one place.</p>
|
|
98
|
+
</div>
|
|
99
|
+
<button onclick="app.addField()" class="text-xs font-medium text-slate-600 border border-slate-200 px-2.5 py-1.5 rounded-lg hover:bg-slate-50 hover:text-brand hover:border-brand/40 transition-colors">
|
|
100
|
+
<i class="fa-solid fa-plus mr-1 text-[10px]"></i>Add field
|
|
101
|
+
</button>
|
|
102
|
+
</div>
|
|
103
|
+
|
|
104
|
+
<div class="p-5 space-y-3" id="fields-container"></div>
|
|
105
|
+
|
|
106
|
+
<div id="add-field-footer" class="hidden px-5 pb-5">
|
|
107
|
+
<button onclick="app.addField()" class="w-full py-2 border border-dashed border-slate-300 rounded-lg text-slate-500 text-[13px] font-medium hover:border-brand/50 hover:text-brand transition-colors">
|
|
108
|
+
<i class="fa-solid fa-plus mr-1 text-[10px]"></i>Add field
|
|
109
|
+
</button>
|
|
110
|
+
</div>
|
|
111
|
+
|
|
112
|
+
<div id="empty-state-schema" class="hidden py-14 text-center">
|
|
113
|
+
<div class="inline-flex items-center justify-center w-10 h-10 rounded-full bg-slate-100 mb-3">
|
|
114
|
+
<i class="fa-solid fa-table-list text-slate-400 text-sm"></i>
|
|
115
|
+
</div>
|
|
116
|
+
<p class="text-slate-500 text-sm mb-4">No schema loaded yet.</p>
|
|
117
|
+
<div class="flex items-center justify-center gap-2">
|
|
118
|
+
<button onclick="app.loadSample()" class="text-[13px] font-medium bg-brand hover:bg-brand-dark text-white px-3.5 py-1.5 rounded-lg transition-colors">Load sample data</button>
|
|
119
|
+
<button onclick="document.getElementById('import-file').click()" class="text-[13px] font-medium text-slate-600 border border-slate-200 px-3.5 py-1.5 rounded-lg hover:bg-slate-50 transition-colors">Import schema.json</button>
|
|
120
|
+
<button onclick="app.addField()" class="text-[13px] font-medium text-slate-600 border border-slate-200 px-3.5 py-1.5 rounded-lg hover:bg-slate-50 transition-colors">Start blank</button>
|
|
121
|
+
</div>
|
|
122
|
+
</div>
|
|
123
|
+
</section>
|
|
124
|
+
|
|
125
|
+
<!-- Require Any -->
|
|
126
|
+
<section class="bg-white rounded-xl border border-slate-200 shadow-sm overflow-hidden">
|
|
127
|
+
<div class="px-5 py-3.5 border-b border-slate-100">
|
|
128
|
+
<h2 class="font-semibold text-sm text-slate-800">Require any</h2>
|
|
129
|
+
<p class="text-xs text-slate-400 mt-0.5">At least one field from each group must exist in the output.</p>
|
|
130
|
+
</div>
|
|
131
|
+
<div class="p-5 space-y-3" id="require-any-container"></div>
|
|
132
|
+
<div class="px-5 pb-5">
|
|
133
|
+
<button onclick="app.addRequireAnyGroup()" class="w-full py-2 border border-dashed border-slate-300 rounded-lg text-slate-500 text-[13px] font-medium hover:border-brand/50 hover:text-brand transition-colors">
|
|
134
|
+
<i class="fa-solid fa-plus mr-1 text-[10px]"></i>Add group
|
|
135
|
+
</button>
|
|
136
|
+
</div>
|
|
137
|
+
</section>
|
|
138
|
+
|
|
139
|
+
</div>
|
|
140
|
+
</div>
|
|
141
|
+
|
|
142
|
+
<!-- Right Panel: Preview -->
|
|
143
|
+
<div class="code-panel w-[420px] bg-[#0d1117] flex flex-col shrink-0 border-l border-slate-200">
|
|
144
|
+
<div class="px-4 py-2.5 border-b border-white/10 flex justify-between items-center">
|
|
145
|
+
<span class="text-xs font-mono text-slate-400 code-font">schema.json</span>
|
|
146
|
+
<button onclick="app.copyToClipboard()" class="text-xs font-medium text-slate-300 border border-white/15 px-2.5 py-1 rounded-md hover:bg-white/10 transition-colors">
|
|
147
|
+
<i class="fa-regular fa-copy mr-1.5 text-[10px]"></i>Copy
|
|
148
|
+
</button>
|
|
149
|
+
</div>
|
|
150
|
+
<div class="flex-1 overflow-auto p-4">
|
|
151
|
+
<pre class="code-font text-[13px] leading-relaxed"><code id="json-preview" class="text-slate-200"></code></pre>
|
|
152
|
+
</div>
|
|
153
|
+
<div class="px-4 py-2 border-t border-white/10 text-xs text-slate-500 flex justify-between code-font">
|
|
154
|
+
<span>live preview</span>
|
|
155
|
+
<span id="char-count">0 chars</span>
|
|
156
|
+
</div>
|
|
157
|
+
</div>
|
|
158
|
+
|
|
159
|
+
</div>
|
|
160
|
+
|
|
161
|
+
<!-- Toast Notification -->
|
|
162
|
+
<div id="toast" class="fixed bottom-6 right-6 bg-slate-800 text-white px-4 py-2.5 rounded-lg shadow-lg transform translate-y-20 opacity-0 transition-all duration-300 flex items-center gap-2.5 z-50 text-sm">
|
|
163
|
+
<i class="fa-solid fa-circle-check text-brand"></i>
|
|
164
|
+
<span id="toast-message">Action Successful</span>
|
|
165
|
+
</div>
|
|
166
|
+
|
|
167
|
+
<script>
|
|
168
|
+
const DEFAULT_DATA = {
|
|
169
|
+
"output_schema": [
|
|
170
|
+
{
|
|
171
|
+
"field": "date",
|
|
172
|
+
"header": "Date",
|
|
173
|
+
"type": "date",
|
|
174
|
+
"description": "The date the transaction was posted or executed. May appear as transaction date, value date, or posting date in source data — these can differ by 1-2 days for the same transaction; prefer transaction/posting date over value date unless the source only provides one."
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
"field": "narration",
|
|
178
|
+
"header": "Narration",
|
|
179
|
+
"type": "string",
|
|
180
|
+
"description": "Free-text description of the transaction, such as the payee, purpose, or transaction type (e.g., 'NEFT TRANSFER TO XYZ', 'ATM WITHDRAWAL'). Often the longest and most variably formatted field in the source; do not truncate or attempt to parse structured data out of it unless separately instructed."
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
"field": "reference_number",
|
|
184
|
+
"header": "Reference Number",
|
|
185
|
+
"type": "string",
|
|
186
|
+
"description": "A unique identifier for the transaction, such as a cheque number, UTR, RRN, or transaction ID. Frequently blank for cash, UPI, or POS transactions — absence is normal and should not be treated as a mapping failure."
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
"field": "debit",
|
|
190
|
+
"header": "Debit",
|
|
191
|
+
"type": "currency",
|
|
192
|
+
"description": "Amount withdrawn or paid out in this transaction. Should be blank or zero if the transaction is a credit. Strip currency symbols and thousand separators before parsing to numeric."
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
"field": "credit",
|
|
196
|
+
"header": "Credit",
|
|
197
|
+
"type": "currency",
|
|
198
|
+
"description": "Amount deposited or received in this transaction. Should be blank or zero if the transaction is a debit. Strip currency symbols and thousand separators before parsing to numeric."
|
|
199
|
+
}
|
|
200
|
+
],
|
|
201
|
+
"synonyms": {
|
|
202
|
+
"date": ["date", "txn date", "transaction date", "value date", "posting date", "entry date", "book date", "date of transaction", "trans date", "value dt", "txn dt", "date/time", "date & time"],
|
|
203
|
+
"narration": ["narration", "description", "particulars", "transaction details", "transaction description", "details", "remarks", "memo", "transaction narration", "particulars/remarks", "desc", "transaction particulars", "purpose", "comments", "notes"],
|
|
204
|
+
"reference_number": ["reference number", "ref no", "ref number", "reference no", "reference", "cheque no", "cheque number", "chq no", "chq number", "instrument no", "instrument number", "transaction id", "txn id", "transaction reference", "transaction ref no", "utr", "utr no", "utr number", "rrn", "rrn number", "cheque/ref no", "cheque/ref number", "doc no", "document number", "voucher no", "voucher number", "receipt no", "receipt number", "transaction reference number"],
|
|
205
|
+
"debit": ["debit", "debit amount", "debit amt", "withdrawal", "withdrawal amount", "withdrawal amt", "dr", "dr amount", "amount debited", "amount withdrawn", "paid out", "debit (dr)", "debit(dr)", "outflow", "money out", "payment", "debits"],
|
|
206
|
+
"credit": ["credit", "credit amount", "credit amt", "deposit", "deposit amount", "deposit amt", "cr", "cr amount", "amount credited", "amount deposited", "paid in", "credit (cr)", "credit(cr)", "inflow", "money in", "receipt", "credits"]
|
|
207
|
+
},
|
|
208
|
+
"critical_fields": ["date", "narration"],
|
|
209
|
+
"require_any": [
|
|
210
|
+
["debit", "credit"]
|
|
211
|
+
]
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
class SchemaApp {
|
|
215
|
+
constructor() {
|
|
216
|
+
this.data = SchemaApp.emptyData();
|
|
217
|
+
this.init();
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
static emptyData() {
|
|
221
|
+
return { output_schema: [], synonyms: {}, critical_fields: [], require_any: [] };
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
static normalize(obj) {
|
|
225
|
+
obj = obj || {};
|
|
226
|
+
return {
|
|
227
|
+
output_schema: Array.isArray(obj.output_schema) ? obj.output_schema : [],
|
|
228
|
+
synonyms: (obj.synonyms && typeof obj.synonyms === 'object' && !Array.isArray(obj.synonyms)) ? obj.synonyms : {},
|
|
229
|
+
critical_fields: Array.isArray(obj.critical_fields) ? obj.critical_fields : [],
|
|
230
|
+
require_any: Array.isArray(obj.require_any) ? obj.require_any : [],
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
init() {
|
|
235
|
+
this.renderFields();
|
|
236
|
+
this.renderRequireAny();
|
|
237
|
+
this.updatePreview();
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// --- Core Logic ---
|
|
241
|
+
|
|
242
|
+
getJSON() {
|
|
243
|
+
return this.data;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
updatePreview() {
|
|
247
|
+
const json = JSON.stringify(this.data, null, 2);
|
|
248
|
+
document.getElementById('json-preview').textContent = json;
|
|
249
|
+
document.getElementById('char-count').textContent = `${json.length} chars`;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
showToast(msg) {
|
|
253
|
+
const toast = document.getElementById('toast');
|
|
254
|
+
document.getElementById('toast-message').textContent = msg;
|
|
255
|
+
toast.classList.remove('translate-y-20', 'opacity-0');
|
|
256
|
+
setTimeout(() => {
|
|
257
|
+
toast.classList.add('translate-y-20', 'opacity-0');
|
|
258
|
+
}, 3000);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
loadSample() {
|
|
262
|
+
if (this.data.output_schema.length && !confirm("Load sample schema? This replaces the current schema.")) return;
|
|
263
|
+
this.data = JSON.parse(JSON.stringify(DEFAULT_DATA));
|
|
264
|
+
this.init();
|
|
265
|
+
this.showToast("Sample schema loaded");
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
importFromFile(event) {
|
|
269
|
+
const file = event.target.files[0];
|
|
270
|
+
if (!file) return;
|
|
271
|
+
const reader = new FileReader();
|
|
272
|
+
reader.onload = (e) => {
|
|
273
|
+
try {
|
|
274
|
+
this.data = SchemaApp.normalize(JSON.parse(e.target.result));
|
|
275
|
+
this.init();
|
|
276
|
+
this.showToast(`Imported ${file.name}`);
|
|
277
|
+
} catch (err) {
|
|
278
|
+
this.showToast("Could not import — not valid JSON");
|
|
279
|
+
}
|
|
280
|
+
};
|
|
281
|
+
reader.readAsText(file);
|
|
282
|
+
event.target.value = '';
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
exportToFile() {
|
|
286
|
+
const json = JSON.stringify(this.data, null, 2);
|
|
287
|
+
const blob = new Blob([json], { type: 'application/json' });
|
|
288
|
+
const url = URL.createObjectURL(blob);
|
|
289
|
+
const a = document.createElement('a');
|
|
290
|
+
a.href = url;
|
|
291
|
+
a.download = 'schema.json';
|
|
292
|
+
document.body.appendChild(a);
|
|
293
|
+
a.click();
|
|
294
|
+
document.body.removeChild(a);
|
|
295
|
+
URL.revokeObjectURL(url);
|
|
296
|
+
this.showToast("schema.json exported");
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
copyToClipboard() {
|
|
300
|
+
navigator.clipboard.writeText(JSON.stringify(this.data, null, 2)).then(() => {
|
|
301
|
+
this.showToast("JSON copied to clipboard");
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// --- Fields (schema + synonyms + critical, unified) ---
|
|
306
|
+
|
|
307
|
+
renderFields() {
|
|
308
|
+
const container = document.getElementById('fields-container');
|
|
309
|
+
const empty = document.getElementById('empty-state-schema');
|
|
310
|
+
const footer = document.getElementById('add-field-footer');
|
|
311
|
+
container.innerHTML = '';
|
|
312
|
+
|
|
313
|
+
if (this.data.output_schema.length === 0) {
|
|
314
|
+
empty.classList.remove('hidden');
|
|
315
|
+
container.classList.add('hidden');
|
|
316
|
+
footer.classList.add('hidden');
|
|
317
|
+
return;
|
|
318
|
+
}
|
|
319
|
+
empty.classList.add('hidden');
|
|
320
|
+
container.classList.remove('hidden');
|
|
321
|
+
footer.classList.remove('hidden');
|
|
322
|
+
|
|
323
|
+
this.data.output_schema.forEach((field, index) => {
|
|
324
|
+
const name = field.field;
|
|
325
|
+
if (!this.data.synonyms[name]) this.data.synonyms[name] = [];
|
|
326
|
+
const syns = this.data.synonyms[name];
|
|
327
|
+
const isCritical = this.data.critical_fields.includes(name);
|
|
328
|
+
|
|
329
|
+
const synTags = syns.map((syn, i) => `
|
|
330
|
+
<span class="inline-flex items-center bg-white border border-slate-200 text-slate-600 text-xs px-2 py-0.5 rounded-md mr-1.5 mb-1.5">
|
|
331
|
+
${syn}
|
|
332
|
+
<button onclick="app.removeSynonym('${name}', ${i})" class="ml-1.5 text-slate-300 hover:text-brand focus:outline-none">
|
|
333
|
+
<i class="fa-solid fa-xmark text-[10px]"></i>
|
|
334
|
+
</button>
|
|
335
|
+
</span>`).join('');
|
|
336
|
+
|
|
337
|
+
const card = document.createElement('div');
|
|
338
|
+
card.className = "bg-slate-50 rounded-lg border border-slate-200 p-3.5";
|
|
339
|
+
card.innerHTML = `
|
|
340
|
+
<div class="flex flex-wrap items-center gap-2">
|
|
341
|
+
<span class="text-xs text-slate-300 tabular-nums w-4 text-center shrink-0">${index + 1}</span>
|
|
342
|
+
<input type="text" value="${field.field}" placeholder="field_id"
|
|
343
|
+
onchange="app.updateField(${index}, 'field', this.value)"
|
|
344
|
+
class="flex-1 min-w-[140px] bg-white border border-slate-300 rounded-md px-2.5 py-1.5 text-[13px] font-medium text-slate-700 outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 transition-colors">
|
|
345
|
+
<input type="text" value="${field.header}" placeholder="Header"
|
|
346
|
+
onchange="app.updateField(${index}, 'header', this.value)"
|
|
347
|
+
class="flex-1 min-w-[140px] bg-white border border-slate-300 rounded-md px-2.5 py-1.5 text-[13px] text-slate-600 outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 transition-colors">
|
|
348
|
+
<select onchange="app.updateField(${index}, 'type', this.value)"
|
|
349
|
+
class="w-28 shrink-0 bg-white border border-slate-300 rounded-md px-2 py-1.5 text-[13px] text-slate-600 outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 cursor-pointer">
|
|
350
|
+
<option value="string" ${field.type === 'string' ? 'selected' : ''}>string</option>
|
|
351
|
+
<option value="date" ${field.type === 'date' ? 'selected' : ''}>date</option>
|
|
352
|
+
<option value="currency" ${field.type === 'currency' ? 'selected' : ''}>currency</option>
|
|
353
|
+
<option value="number" ${field.type === 'number' ? 'selected' : ''}>number</option>
|
|
354
|
+
<option value="boolean" ${field.type === 'boolean' ? 'selected' : ''}>boolean</option>
|
|
355
|
+
</select>
|
|
356
|
+
<button onclick="app.toggleCritical('${name}')" title="Critical: must be present in the output"
|
|
357
|
+
class="shrink-0 border text-[11px] font-medium px-2 py-1.5 rounded-md transition-colors ${isCritical ? 'bg-brand-tint text-brand border-brand/30' : 'bg-white text-slate-400 border-slate-200 hover:text-slate-600'}">
|
|
358
|
+
<i class="fa-${isCritical ? 'solid' : 'regular'} fa-star text-[10px] mr-1"></i>Critical
|
|
359
|
+
</button>
|
|
360
|
+
<button onclick="app.removeField(${index})" title="Remove field"
|
|
361
|
+
class="shrink-0 text-slate-300 hover:text-brand transition-colors p-1.5 rounded">
|
|
362
|
+
<i class="fa-solid fa-trash-can text-xs"></i>
|
|
363
|
+
</button>
|
|
364
|
+
</div>
|
|
365
|
+
<textarea onchange="app.updateField(${index}, 'description', this.value)"
|
|
366
|
+
placeholder="Description — what this field means and how to map it"
|
|
367
|
+
class="w-full mt-2 bg-white border border-slate-200 rounded-md px-2.5 py-1.5 text-[13px] text-slate-500 outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 transition-colors resize-none overflow-hidden"
|
|
368
|
+
oninput="this.style.height=''; this.style.height=this.scrollHeight+'px'">${field.description || ''}</textarea>
|
|
369
|
+
<div class="mt-2.5">
|
|
370
|
+
<div class="flex items-center justify-between mb-1.5">
|
|
371
|
+
<span class="text-[11px] font-semibold uppercase tracking-wide text-slate-400">Synonyms</span>
|
|
372
|
+
<span class="text-[11px] text-slate-400 code-font">${syns.length}</span>
|
|
373
|
+
</div>
|
|
374
|
+
<div class="flex flex-wrap">${synTags}</div>
|
|
375
|
+
<div class="flex gap-2 mt-0.5">
|
|
376
|
+
<input type="text" id="synonym-input-${name}" placeholder="Add synonym, press Enter..."
|
|
377
|
+
class="flex-1 bg-white border border-slate-300 text-slate-700 text-[13px] rounded-md focus:outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 px-2.5 py-1.5"
|
|
378
|
+
onkeypress="if(event.key==='Enter') app.addSynonym('${name}')">
|
|
379
|
+
<button onclick="app.addSynonym('${name}')"
|
|
380
|
+
class="bg-white border border-slate-300 hover:bg-slate-100 hover:text-brand text-slate-600 rounded-md text-[13px] px-3 py-1.5 transition-colors">
|
|
381
|
+
<i class="fa-solid fa-plus text-xs"></i>
|
|
382
|
+
</button>
|
|
383
|
+
</div>
|
|
384
|
+
</div>
|
|
385
|
+
`;
|
|
386
|
+
container.appendChild(card);
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
// Auto-size description textareas to fit their content
|
|
390
|
+
container.querySelectorAll('textarea').forEach(ta => {
|
|
391
|
+
ta.style.height = '';
|
|
392
|
+
ta.style.height = ta.scrollHeight + 'px';
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
addField() {
|
|
397
|
+
this.data.output_schema.push({ field: "new_field", header: "New Field", type: "string", description: "" });
|
|
398
|
+
this.renderFields();
|
|
399
|
+
this.renderRequireAny();
|
|
400
|
+
this.updatePreview();
|
|
401
|
+
// Focus the new field's id for immediate typing
|
|
402
|
+
const container = document.getElementById('fields-container');
|
|
403
|
+
const last = container.lastElementChild;
|
|
404
|
+
if (last) {
|
|
405
|
+
last.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
|
|
406
|
+
const input = last.querySelector('input');
|
|
407
|
+
if (input) { input.focus(); input.select(); }
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
removeField(index) {
|
|
412
|
+
const name = this.data.output_schema[index].field;
|
|
413
|
+
this.data.output_schema.splice(index, 1);
|
|
414
|
+
delete this.data.synonyms[name];
|
|
415
|
+
this.data.critical_fields = this.data.critical_fields.filter(f => f !== name);
|
|
416
|
+
this.data.require_any = this.data.require_any.map(g => g.filter(f => f !== name)).filter(g => g.length > 0);
|
|
417
|
+
this.renderFields();
|
|
418
|
+
this.renderRequireAny();
|
|
419
|
+
this.updatePreview();
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
updateField(index, key, value) {
|
|
423
|
+
const oldName = this.data.output_schema[index].field;
|
|
424
|
+
this.data.output_schema[index][key] = value;
|
|
425
|
+
|
|
426
|
+
if (key === 'field' && oldName !== value) {
|
|
427
|
+
if (this.data.synonyms[oldName]) {
|
|
428
|
+
this.data.synonyms[value] = this.data.synonyms[oldName];
|
|
429
|
+
delete this.data.synonyms[oldName];
|
|
430
|
+
}
|
|
431
|
+
this.data.critical_fields = this.data.critical_fields.map(f => f === oldName ? value : f);
|
|
432
|
+
this.data.require_any = this.data.require_any.map(g => g.map(f => f === oldName ? value : f));
|
|
433
|
+
this.renderFields();
|
|
434
|
+
this.renderRequireAny();
|
|
435
|
+
}
|
|
436
|
+
this.updatePreview();
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
toggleCritical(field) {
|
|
440
|
+
const i = this.data.critical_fields.indexOf(field);
|
|
441
|
+
if (i === -1) this.data.critical_fields.push(field);
|
|
442
|
+
else this.data.critical_fields.splice(i, 1);
|
|
443
|
+
this.renderFields();
|
|
444
|
+
this.updatePreview();
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// --- Synonyms ---
|
|
448
|
+
|
|
449
|
+
addSynonym(field) {
|
|
450
|
+
const input = document.getElementById(`synonym-input-${field}`);
|
|
451
|
+
if (!input) return;
|
|
452
|
+
const value = input.value.trim().toLowerCase();
|
|
453
|
+
if (!value) return;
|
|
454
|
+
if (!this.data.synonyms[field]) this.data.synonyms[field] = [];
|
|
455
|
+
if (this.data.synonyms[field].includes(value)) {
|
|
456
|
+
this.showToast("Synonym already exists");
|
|
457
|
+
return;
|
|
458
|
+
}
|
|
459
|
+
this.data.synonyms[field].push(value);
|
|
460
|
+
this.renderFields();
|
|
461
|
+
this.updatePreview();
|
|
462
|
+
// Refocus the same input for rapid entry
|
|
463
|
+
const again = document.getElementById(`synonym-input-${field}`);
|
|
464
|
+
if (again) again.focus();
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
removeSynonym(field, index) {
|
|
468
|
+
this.data.synonyms[field].splice(index, 1);
|
|
469
|
+
this.renderFields();
|
|
470
|
+
this.updatePreview();
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// --- Require Any ---
|
|
474
|
+
|
|
475
|
+
renderRequireAny() {
|
|
476
|
+
const container = document.getElementById('require-any-container');
|
|
477
|
+
container.innerHTML = '';
|
|
478
|
+
|
|
479
|
+
const currentFields = this.data.output_schema.map(f => f.field);
|
|
480
|
+
|
|
481
|
+
if (this.data.require_any.length === 0) {
|
|
482
|
+
container.innerHTML = '<p class="text-xs text-slate-400">No groups yet. Add one to require at least one of a set of fields.</p>';
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
this.data.require_any.forEach((group, groupIndex) => {
|
|
487
|
+
const groupDiv = document.createElement('div');
|
|
488
|
+
groupDiv.className = "bg-slate-50 border border-slate-200 rounded-lg p-3 relative group";
|
|
489
|
+
|
|
490
|
+
let tagsHtml = group.map((field, i) => {
|
|
491
|
+
if (!currentFields.includes(field)) return '';
|
|
492
|
+
return `
|
|
493
|
+
<span class="inline-flex items-center bg-slate-100 text-slate-600 text-xs px-2 py-0.5 rounded-md border border-slate-200 mr-1.5 mb-1.5">
|
|
494
|
+
${field}
|
|
495
|
+
<button onclick="app.removeRequireAnyField(${groupIndex}, ${i})" class="ml-1.5 text-slate-400 hover:text-brand">
|
|
496
|
+
<i class="fa-solid fa-xmark text-[10px]"></i>
|
|
497
|
+
</button>
|
|
498
|
+
</span>
|
|
499
|
+
`}).join('');
|
|
500
|
+
|
|
501
|
+
let availableFields = currentFields.filter(f => !group.includes(f));
|
|
502
|
+
let optionsHtml = availableFields.map(f => `<option value="${f}">${f}</option>`).join('');
|
|
503
|
+
|
|
504
|
+
groupDiv.innerHTML = `
|
|
505
|
+
<div class="absolute top-2 right-2 opacity-0 group-hover:opacity-100 transition-opacity">
|
|
506
|
+
<button onclick="app.removeRequireAnyGroup(${groupIndex})" class="text-slate-400 hover:text-brand p-1">
|
|
507
|
+
<i class="fa-solid fa-trash-can text-xs"></i>
|
|
508
|
+
</button>
|
|
509
|
+
</div>
|
|
510
|
+
<div class="text-[11px] font-semibold text-slate-400 mb-2 uppercase tracking-wide">Group ${groupIndex + 1}</div>
|
|
511
|
+
<div class="flex flex-wrap mb-1.5 min-h-[1.75rem]">
|
|
512
|
+
${tagsHtml}
|
|
513
|
+
</div>
|
|
514
|
+
<div class="flex gap-2 max-w-md">
|
|
515
|
+
<select id="require-any-select-${groupIndex}" class="flex-1 bg-white border border-slate-300 text-slate-700 text-[13px] rounded-lg focus:outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 block px-2.5 py-1.5">
|
|
516
|
+
<option value="">Add field...</option>
|
|
517
|
+
${optionsHtml}
|
|
518
|
+
</select>
|
|
519
|
+
<button onclick="app.addRequireAnyField(${groupIndex})" class="bg-white border border-slate-300 hover:bg-slate-50 hover:text-brand text-slate-600 text-[13px] px-3 py-1.5 rounded-lg transition-colors">
|
|
520
|
+
<i class="fa-solid fa-plus text-xs"></i>
|
|
521
|
+
</button>
|
|
522
|
+
</div>
|
|
523
|
+
`;
|
|
524
|
+
container.appendChild(groupDiv);
|
|
525
|
+
});
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
addRequireAnyGroup() {
|
|
529
|
+
this.data.require_any.push([]);
|
|
530
|
+
this.renderRequireAny();
|
|
531
|
+
this.updatePreview();
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
removeRequireAnyGroup(index) {
|
|
535
|
+
this.data.require_any.splice(index, 1);
|
|
536
|
+
this.renderRequireAny();
|
|
537
|
+
this.updatePreview();
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
addRequireAnyField(groupIndex) {
|
|
541
|
+
const select = document.getElementById(`require-any-select-${groupIndex}`);
|
|
542
|
+
const value = select.value;
|
|
543
|
+
if (value) {
|
|
544
|
+
this.data.require_any[groupIndex].push(value);
|
|
545
|
+
this.renderRequireAny();
|
|
546
|
+
this.updatePreview();
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
removeRequireAnyField(groupIndex, fieldIndex) {
|
|
551
|
+
this.data.require_any[groupIndex].splice(fieldIndex, 1);
|
|
552
|
+
if (this.data.require_any[groupIndex].length === 0) {
|
|
553
|
+
this.data.require_any.splice(groupIndex, 1);
|
|
554
|
+
}
|
|
555
|
+
this.renderRequireAny();
|
|
556
|
+
this.updatePreview();
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// Initialize App
|
|
561
|
+
const app = new SchemaApp();
|
|
562
|
+
|
|
563
|
+
</script>
|
|
564
|
+
</body>
|
|
565
|
+
</html>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tabularmapper
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.4
|
|
4
4
|
Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
|
|
5
5
|
Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -165,6 +165,7 @@ All are optional; sensible defaults apply.
|
|
|
165
165
|
| `TABULARMAPPER_LEARN_STORE` | `memory://` (no files) | where self-learned header synonyms live |
|
|
166
166
|
| `TABULARMAPPER_CONFIG` | *(none — required)* | output template + synonyms JSON (file / `https://` / `s3://`) |
|
|
167
167
|
| `TABULARMAPPER_ROUTE_PREFIX` | `/mapper` | FastAPI router path prefix |
|
|
168
|
+
| `TABULARMAPPER_THRESHOLD` | `80` | fuzzy-accept gate (0–100); raise it to push borderline fuzzy matches to the AI matcher |
|
|
168
169
|
| `OPENAI_API_KEY` | *(unset → AI off)* | enables the AI column matcher |
|
|
169
170
|
| `OPENAI_BASE_URL` | `https://api.openai.com/v1` | any OpenAI-compatible endpoint |
|
|
170
171
|
| `OPENAI_MODEL` | `gpt-4o-mini` | model name |
|
|
@@ -272,6 +273,7 @@ app.include_router(router)
|
|
|
272
273
|
|---|---|---|
|
|
273
274
|
| `POST` | `/mapper/map` | upload an `.xlsx`, get the mapping + rows (JSON) |
|
|
274
275
|
| `GET` | `/mapper/health` | `{status, ai_enabled}` |
|
|
276
|
+
| `GET` | `/mapper/config` | config-builder web page — design a schema, export `config.json` |
|
|
275
277
|
| `GET` | `/mapper/learn/pending` | debit/credit synonyms awaiting approval |
|
|
276
278
|
| `POST` | `/mapper/learn/approve` | approve a pending synonym (`?phrase=&field=`) |
|
|
277
279
|
| `POST` | `/mapper/learn/reject` | reject a pending synonym |
|
|
@@ -280,6 +282,18 @@ app.include_router(router)
|
|
|
280
282
|
blocking work in a threadpool. Store the original file to S3 in your own endpoint
|
|
281
283
|
if you need it — the mapper stays out of AWS.
|
|
282
284
|
|
|
285
|
+
Two query params shape the request:
|
|
286
|
+
|
|
287
|
+
```bash
|
|
288
|
+
curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=base64" # json + a mapped .xlsx in file_base64
|
|
289
|
+
curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=file" -OJ # download the mapped .xlsx
|
|
290
|
+
curl -F file=@f.xlsx "http://localhost:8000/mapper/map?threshold=90" # stricter fuzzy gate for this call
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
`format` is `json` (default) / `base64` / `file`. `threshold` (0–100) overrides
|
|
294
|
+
`TABULARMAPPER_THRESHOLD` for one request — raise it to send borderline fuzzy
|
|
295
|
+
matches to the AI matcher instead of trusting them.
|
|
296
|
+
|
|
283
297
|
The `/mapper` prefix is configurable (this is a general table→schema mapper, not
|
|
284
298
|
just banks): set `TABULARMAPPER_ROUTE_PREFIX`, or build the router yourself:
|
|
285
299
|
|
|
@@ -17,6 +17,7 @@ src/tabularmapper.egg-info/dependency_links.txt
|
|
|
17
17
|
src/tabularmapper.egg-info/entry_points.txt
|
|
18
18
|
src/tabularmapper.egg-info/requires.txt
|
|
19
19
|
src/tabularmapper.egg-info/top_level.txt
|
|
20
|
+
src/tabularmapper/static/index.html
|
|
20
21
|
tests/test_api.py
|
|
21
22
|
tests/test_learn.py
|
|
22
23
|
tests/test_mapper.py
|
|
@@ -107,10 +107,61 @@ def test_map_rejects_non_xlsx(client):
|
|
|
107
107
|
assert r.status_code == 400
|
|
108
108
|
|
|
109
109
|
|
|
110
|
+
def _fuzzy_xlsx_bytes():
|
|
111
|
+
"""A tiny bank sheet whose 'Descriptn' header only fuzzy-matches (score 90)."""
|
|
112
|
+
from openpyxl import Workbook
|
|
113
|
+
wb = Workbook(); ws = wb.active
|
|
114
|
+
ws.append(["Date", "Descriptn", "Debit", "Credit"])
|
|
115
|
+
ws.append(["01-06-2026", "Coffee", "150", ""])
|
|
116
|
+
ws.append(["02-06-2026", "Salary", "", "45000"])
|
|
117
|
+
buf = io.BytesIO(); wb.save(buf); return buf.getvalue()
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_map_threshold_query_changes_mapping(client):
|
|
121
|
+
payload = _fuzzy_xlsx_bytes()
|
|
122
|
+
|
|
123
|
+
# default gate (80): 'Descriptn' (score 90) is accepted as fuzzy
|
|
124
|
+
r = client.post("/mapper/map", files={"file": ("s.xlsx", io.BytesIO(payload))})
|
|
125
|
+
cols = {c["raw_header"]: c for c in r.json()["columns"]}
|
|
126
|
+
assert cols["Descriptn"]["field"] == "description"
|
|
127
|
+
assert cols["Descriptn"]["method"] == "fuzzy"
|
|
128
|
+
|
|
129
|
+
# raise the gate above 90: the same column now falls through -> unmapped
|
|
130
|
+
r = client.post("/mapper/map", params={"threshold": 95},
|
|
131
|
+
files={"file": ("s.xlsx", io.BytesIO(payload))})
|
|
132
|
+
cols = {c["raw_header"]: c for c in r.json()["columns"]}
|
|
133
|
+
assert cols["Descriptn"]["field"] is None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def test_map_threshold_out_of_range(client):
|
|
137
|
+
payload = _fuzzy_xlsx_bytes()
|
|
138
|
+
for bad in (150, -1):
|
|
139
|
+
r = client.post("/mapper/map", params={"threshold": bad},
|
|
140
|
+
files={"file": ("s.xlsx", io.BytesIO(payload))})
|
|
141
|
+
assert r.status_code == 422 # ge=0 / le=100 validation
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def test_default_threshold_reads_env(monkeypatch):
|
|
145
|
+
import tabularmapper.api as api
|
|
146
|
+
monkeypatch.setenv("TABULARMAPPER_THRESHOLD", "90")
|
|
147
|
+
assert api._default_threshold() == 90
|
|
148
|
+
monkeypatch.setenv("TABULARMAPPER_THRESHOLD", "banana") # invalid -> falls back
|
|
149
|
+
assert api._default_threshold() == 80
|
|
150
|
+
monkeypatch.delenv("TABULARMAPPER_THRESHOLD", raising=False)
|
|
151
|
+
assert api._default_threshold() == 80
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def test_config_page_served(client):
|
|
155
|
+
r = client.get("/mapper/config")
|
|
156
|
+
assert r.status_code == 200
|
|
157
|
+
assert r.headers["content-type"].startswith("text/html")
|
|
158
|
+
assert "<!DOCTYPE html>" in r.text or "<html" in r.text.lower()
|
|
159
|
+
|
|
160
|
+
|
|
110
161
|
def test_router_prefix_default_and_custom():
|
|
111
162
|
import tabularmapper.api as api
|
|
112
163
|
assert {r.path for r in api.router.routes} == {
|
|
113
|
-
"/mapper/health", "/mapper/map",
|
|
164
|
+
"/mapper/health", "/mapper/config", "/mapper/map",
|
|
114
165
|
"/mapper/learn/pending", "/mapper/learn/approve", "/mapper/learn/reject"}
|
|
115
166
|
custom = api.make_router("/catalog/")
|
|
116
167
|
assert "/catalog/map" in {r.path for r in custom.routes}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|