tabularmapper 1.0.2__tar.gz → 1.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {tabularmapper-1.0.2/src/tabularmapper.egg-info → tabularmapper-1.0.4}/PKG-INFO +15 -1
  2. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/README.md +14 -0
  3. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/pyproject.toml +6 -1
  4. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/__init__.py +1 -1
  5. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/api.py +39 -2
  6. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/engine.py +4 -1
  7. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/schema.py +1 -1
  8. tabularmapper-1.0.4/src/tabularmapper/static/index.html +565 -0
  9. {tabularmapper-1.0.2 → tabularmapper-1.0.4/src/tabularmapper.egg-info}/PKG-INFO +15 -1
  10. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper.egg-info/SOURCES.txt +1 -0
  11. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/tests/test_api.py +52 -1
  12. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/LICENSE +0 -0
  13. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/setup.cfg +0 -0
  14. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/ai_matcher.py +0 -0
  15. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/cli.py +0 -0
  16. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/learn.py +0 -0
  17. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/llm_fallback.py +0 -0
  18. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/mapping_cache.py +0 -0
  19. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper/stores.py +0 -0
  20. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper.egg-info/dependency_links.txt +0 -0
  21. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper.egg-info/entry_points.txt +0 -0
  22. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper.egg-info/requires.txt +0 -0
  23. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/src/tabularmapper.egg-info/top_level.txt +0 -0
  24. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/tests/test_learn.py +0 -0
  25. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/tests/test_mapper.py +0 -0
  26. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/tests/test_schema.py +0 -0
  27. {tabularmapper-1.0.2 → tabularmapper-1.0.4}/tests/test_stores.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabularmapper
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
5
5
  Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
6
6
  License-Expression: MIT
@@ -165,6 +165,7 @@ All are optional; sensible defaults apply.
165
165
  | `TABULARMAPPER_LEARN_STORE` | `memory://` (no files) | where self-learned header synonyms live |
166
166
  | `TABULARMAPPER_CONFIG` | *(none — required)* | output template + synonyms JSON (file / `https://` / `s3://`) |
167
167
  | `TABULARMAPPER_ROUTE_PREFIX` | `/mapper` | FastAPI router path prefix |
168
+ | `TABULARMAPPER_THRESHOLD` | `80` | fuzzy-accept gate (0–100); raise it to push borderline fuzzy matches to the AI matcher |
168
169
  | `OPENAI_API_KEY` | *(unset → AI off)* | enables the AI column matcher |
169
170
  | `OPENAI_BASE_URL` | `https://api.openai.com/v1` | any OpenAI-compatible endpoint |
170
171
  | `OPENAI_MODEL` | `gpt-4o-mini` | model name |
@@ -272,6 +273,7 @@ app.include_router(router)
272
273
  |---|---|---|
273
274
  | `POST` | `/mapper/map` | upload an `.xlsx`, get the mapping + rows (JSON) |
274
275
  | `GET` | `/mapper/health` | `{status, ai_enabled}` |
276
+ | `GET` | `/mapper/config` | config-builder web page — design a schema, export `config.json` |
275
277
  | `GET` | `/mapper/learn/pending` | debit/credit synonyms awaiting approval |
276
278
  | `POST` | `/mapper/learn/approve` | approve a pending synonym (`?phrase=&field=`) |
277
279
  | `POST` | `/mapper/learn/reject` | reject a pending synonym |
@@ -280,6 +282,18 @@ app.include_router(router)
280
282
  blocking work in a threadpool. Store the original file to S3 in your own endpoint
281
283
  if you need it — the mapper stays out of AWS.
282
284
 
285
+ Two query params shape the request:
286
+
287
+ ```bash
288
+ curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=base64" # json + a mapped .xlsx in file_base64
289
+ curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=file" -OJ # download the mapped .xlsx
290
+ curl -F file=@f.xlsx "http://localhost:8000/mapper/map?threshold=90" # stricter fuzzy gate for this call
291
+ ```
292
+
293
+ `format` is `json` (default) / `base64` / `file`. `threshold` (0–100) overrides
294
+ `TABULARMAPPER_THRESHOLD` for one request — raise it to send borderline fuzzy
295
+ matches to the AI matcher instead of trusting them.
296
+
283
297
  The `/mapper` prefix is configurable (this is a general table→schema mapper, not
284
298
  just banks): set `TABULARMAPPER_ROUTE_PREFIX`, or build the router yourself:
285
299
 
@@ -125,6 +125,7 @@ All are optional; sensible defaults apply.
125
125
  | `TABULARMAPPER_LEARN_STORE` | `memory://` (no files) | where self-learned header synonyms live |
126
126
  | `TABULARMAPPER_CONFIG` | *(none — required)* | output template + synonyms JSON (file / `https://` / `s3://`) |
127
127
  | `TABULARMAPPER_ROUTE_PREFIX` | `/mapper` | FastAPI router path prefix |
128
+ | `TABULARMAPPER_THRESHOLD` | `80` | fuzzy-accept gate (0–100); raise it to push borderline fuzzy matches to the AI matcher |
128
129
  | `OPENAI_API_KEY` | *(unset → AI off)* | enables the AI column matcher |
129
130
  | `OPENAI_BASE_URL` | `https://api.openai.com/v1` | any OpenAI-compatible endpoint |
130
131
  | `OPENAI_MODEL` | `gpt-4o-mini` | model name |
@@ -232,6 +233,7 @@ app.include_router(router)
232
233
  |---|---|---|
233
234
  | `POST` | `/mapper/map` | upload an `.xlsx`, get the mapping + rows (JSON) |
234
235
  | `GET` | `/mapper/health` | `{status, ai_enabled}` |
236
+ | `GET` | `/mapper/config` | config-builder web page — design a schema, export `config.json` |
235
237
  | `GET` | `/mapper/learn/pending` | debit/credit synonyms awaiting approval |
236
238
  | `POST` | `/mapper/learn/approve` | approve a pending synonym (`?phrase=&field=`) |
237
239
  | `POST` | `/mapper/learn/reject` | reject a pending synonym |
@@ -240,6 +242,18 @@ app.include_router(router)
240
242
  blocking work in a threadpool. Store the original file to S3 in your own endpoint
241
243
  if you need it — the mapper stays out of AWS.
242
244
 
245
+ Two query params shape the request:
246
+
247
+ ```bash
248
+ curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=base64" # json + a mapped .xlsx in file_base64
249
+ curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=file" -OJ # download the mapped .xlsx
250
+ curl -F file=@f.xlsx "http://localhost:8000/mapper/map?threshold=90" # stricter fuzzy gate for this call
251
+ ```
252
+
253
+ `format` is `json` (default) / `base64` / `file`. `threshold` (0–100) overrides
254
+ `TABULARMAPPER_THRESHOLD` for one request — raise it to send borderline fuzzy
255
+ matches to the AI matcher instead of trusting them.
256
+
243
257
  The `/mapper` prefix is configurable (this is a general table→schema mapper, not
244
258
  just banks): set `TABULARMAPPER_ROUTE_PREFIX`, or build the router yourself:
245
259
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "tabularmapper"
7
- version = "1.0.2"
7
+ version = "1.0.4"
8
8
  description = "Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -55,6 +55,11 @@ tabularmapper = "tabularmapper.cli:main"
55
55
  [tool.setuptools.packages.find]
56
56
  where = ["src"]
57
57
 
58
+ # Ship the config-builder page inside the wheel so GET /mapper/config works
59
+ # from a pip-installed package (not just a source checkout).
60
+ [tool.setuptools.package-data]
61
+ tabularmapper = ["static/*.html"]
62
+
58
63
  [tool.pytest.ini_options]
59
64
  testpaths = ["tests"]
60
65
  pythonpath = ["src"]
@@ -44,7 +44,7 @@ from .schema import (
44
44
  )
45
45
  from .stores import open_store
46
46
 
47
- __version__ = "1.0.2"
47
+ __version__ = "1.0.4"
48
48
 
49
49
  __all__ = [
50
50
  "process_file",
@@ -31,11 +31,12 @@ from __future__ import annotations
31
31
  import os
32
32
  from contextlib import asynccontextmanager
33
33
  from enum import Enum
34
+ from importlib.resources import as_file, files
34
35
  from typing import Any, Optional
35
36
 
36
37
  from fastapi import APIRouter, FastAPI, File, HTTPException, Query, UploadFile
37
38
  from fastapi.concurrency import run_in_threadpool
38
- from fastapi.responses import Response
39
+ from fastapi.responses import HTMLResponse, Response
39
40
  from pydantic import BaseModel
40
41
 
41
42
  from . import engine # imported as a module so OUTPUT_SCHEMA is read
@@ -45,6 +46,17 @@ from .mapping_cache import MappingCache
45
46
  _XLSX_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
46
47
 
47
48
 
49
+ def _default_threshold() -> int:
50
+ """The fuzzy-accept gate (0-100). Below this, a column is left unmapped and,
51
+ if it's a critical field, the AI matcher is asked to fill it. Raise it to
52
+ push borderline fuzzy matches to the AI instead of trusting them. Read from
53
+ TABULARMAPPER_THRESHOLD at request time; falls back to 80."""
54
+ try:
55
+ return max(0, min(100, int(os.getenv("TABULARMAPPER_THRESHOLD", "80"))))
56
+ except (TypeError, ValueError):
57
+ return 80
58
+
59
+
48
60
  class OutFormat(str, Enum):
49
61
  """Response shape for POST /map — rendered as a dropdown in the docs."""
50
62
  json = "json" # rows inline (default)
@@ -128,6 +140,18 @@ async def health() -> dict:
128
140
  return {"status": "ok", "ai_enabled": state.matcher is not None}
129
141
 
130
142
 
143
+ async def config_page() -> HTMLResponse:
144
+ """Serve the self-contained config-builder page bundled at
145
+ tabularmapper/static/index.html. Uses importlib.resources so it works even
146
+ when the package is imported from a zip/egg, not only an unpacked wheel."""
147
+ try:
148
+ resource = files("tabularmapper").joinpath("static", "index.html")
149
+ with as_file(resource) as path:
150
+ return HTMLResponse(path.read_text(encoding="utf-8"))
151
+ except (FileNotFoundError, ModuleNotFoundError):
152
+ raise HTTPException(status_code=404, detail="config builder page not found")
153
+
154
+
131
155
  async def map_statement(
132
156
  file: UploadFile = File(...),
133
157
  format: OutFormat = Query(
@@ -136,6 +160,13 @@ async def map_statement(
136
160
  ".xlsx encoded in file_base64; file = download the .xlsx "
137
161
  "directly (binary, no JSON body).",
138
162
  ),
163
+ threshold: Optional[int] = Query(
164
+ None,
165
+ ge=0, le=100,
166
+ description="Fuzzy-accept gate 0-100. Overrides TABULARMAPPER_THRESHOLD "
167
+ "(default 80) for this request. Raise it to send borderline "
168
+ "fuzzy matches to the AI matcher instead of trusting them.",
169
+ ),
139
170
  ):
140
171
  """Upload a spreadsheet (.xlsx); get the standardized mapping + rows.
141
172
 
@@ -143,18 +174,22 @@ async def map_statement(
143
174
  * json -> MapResponse with the rows in `transactions`
144
175
  * base64 -> same MapResponse, plus a mapped .xlsx in `file_base64`
145
176
  * file -> the mapped .xlsx as a downloadable attachment
177
+
178
+ `threshold` (query) overrides the fuzzy gate for this one call; otherwise the
179
+ server default (TABULARMAPPER_THRESHOLD, else 80) is used.
146
180
  """
147
181
  name = (file.filename or "").lower()
148
182
  if not name.endswith((".xlsx", ".xls")):
149
183
  raise HTTPException(status_code=400, detail="expected an .xlsx/.xls file")
150
184
 
185
+ gate = threshold if threshold is not None else _default_threshold()
151
186
  data = await file.read() # raw bytes, parsed in memory (never hits disk)
152
187
  try:
153
188
  # blocking work -> threadpool; process_stream reads straight from bytes
154
189
  res = await run_in_threadpool(
155
190
  process_stream, data,
156
191
  table_matcher=state.matcher, cache=state.cache,
157
- learn_store=state.learn,
192
+ learn_store=state.learn, threshold=gate,
158
193
  source_label=file.filename or "<upload>",
159
194
  )
160
195
  except Exception as exc: # noqa: BLE001
@@ -216,6 +251,8 @@ def make_router(prefix: Optional[str] = None, tags: Optional[list] = None) -> AP
216
251
  prefix = os.getenv("TABULARMAPPER_ROUTE_PREFIX", "/mapper")
217
252
  r = APIRouter(prefix=prefix.rstrip("/"), tags=tags or ["mapper"])
218
253
  r.add_api_route("/health", health, methods=["GET"])
254
+ r.add_api_route("/config", config_page, methods=["GET"],
255
+ response_class=HTMLResponse, include_in_schema=False)
219
256
  r.add_api_route("/map", map_statement, methods=["POST"], response_model=MapResponse)
220
257
  r.add_api_route("/learn/pending", learn_pending, methods=["GET"])
221
258
  r.add_api_route("/learn/approve", learn_approve, methods=["POST"])
@@ -810,7 +810,10 @@ def _run(rows: list[list], source_label: str, out_path, llm_fallback,
810
810
 
811
811
  from_cache = False
812
812
  col_maps = None
813
- schema_sig = _schema_signature() # scope the cache to the active schema
813
+ # Scope the cache to the active schema AND the fuzzy gate: a different
814
+ # threshold can change which columns map, so it must not reuse a mapping
815
+ # computed at another threshold.
816
+ schema_sig = f"{_schema_signature()}:t{threshold}"
814
817
  if cache is not None:
815
818
  cached = cache.get(header, namespace=schema_sig)
816
819
  if cached is not None:
@@ -198,7 +198,7 @@ def _infer_type(field_key: str) -> str:
198
198
  def default_config() -> Config:
199
199
  """The built-in default: EMPTY. This is a general mapper, so with no config
200
200
  it maps nothing — you must provide an output_schema + synonyms (a file/URL via
201
- BANK_MAPPER_CONFIG, a dict, or configure()). Use `bank_preset()` for the
201
+ TABULARMAPPER_CONFIG, a dict, or configure()). Use `bank_preset()` for the
202
202
  ready-made bank-statement schema."""
203
203
  return Config(output_schema=[], synonyms={}, critical_fields=[])
204
204
 
@@ -0,0 +1,565 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Tabular Mapper</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <script>
9
+ tailwind.config = {
10
+ theme: {
11
+ extend: {
12
+ colors: {
13
+ brand: {
14
+ DEFAULT: '#ED0E4C',
15
+ dark: '#C90B40',
16
+ tint: '#FDE7EE',
17
+ },
18
+ },
19
+ },
20
+ },
21
+ };
22
+ </script>
23
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
24
+ <style>
25
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
26
+
27
+ body {
28
+ font-family: 'Inter', system-ui, sans-serif;
29
+ background-color: #f7f8fa;
30
+ }
31
+
32
+ .code-font {
33
+ font-family: ui-monospace, 'SF Mono', SFMono-Regular, Menlo, Consolas, monospace;
34
+ }
35
+
36
+ ::-webkit-scrollbar {
37
+ width: 8px;
38
+ height: 8px;
39
+ }
40
+ ::-webkit-scrollbar-track {
41
+ background: transparent;
42
+ }
43
+ ::-webkit-scrollbar-thumb {
44
+ background: #d4d7dd;
45
+ border-radius: 4px;
46
+ }
47
+ ::-webkit-scrollbar-thumb:hover {
48
+ background: #b9bdc6;
49
+ }
50
+ .code-panel ::-webkit-scrollbar-thumb {
51
+ background: #2a2f3a;
52
+ }
53
+ .code-panel ::-webkit-scrollbar-thumb:hover {
54
+ background: #3a404d;
55
+ }
56
+ </style>
57
+ </head>
58
+ <body class="h-screen flex flex-col overflow-hidden text-slate-800">
59
+
60
+ <!-- Header -->
61
+ <header class="bg-white border-b border-slate-200 h-14 flex items-center justify-between px-6 shrink-0">
62
+ <div class="flex items-center gap-2.5">
63
+ <div class="bg-brand text-white w-8 h-8 flex items-center justify-center rounded-lg">
64
+ <i class="fa-solid fa-layer-group text-sm"></i>
65
+ </div>
66
+ <div>
67
+ <h1 class="font-semibold text-[15px] leading-tight text-slate-900">Tabular Mapper</h1>
68
+ <p class="text-[11px] text-slate-400 leading-tight">xlsx mapping config</p>
69
+ </div>
70
+ </div>
71
+ <div class="flex items-center gap-2">
72
+ <input type="file" id="import-file" accept=".json,application/json" class="hidden" onchange="app.importFromFile(event)">
73
+ <button onclick="app.loadSample()" class="px-3 py-1.5 text-[13px] font-medium text-slate-600 border border-slate-200 rounded-lg hover:bg-slate-50 transition-colors">
74
+ <i class="fa-solid fa-table-list mr-1.5 text-xs"></i>Load sample
75
+ </button>
76
+ <button onclick="document.getElementById('import-file').click()" class="px-3 py-1.5 text-[13px] font-medium text-slate-600 border border-slate-200 rounded-lg hover:bg-slate-50 transition-colors">
77
+ <i class="fa-solid fa-upload mr-1.5 text-xs"></i>Import
78
+ </button>
79
+ <button onclick="app.exportToFile()" class="px-3 py-1.5 text-[13px] font-medium text-white bg-brand hover:bg-brand-dark rounded-lg transition-colors">
80
+ <i class="fa-solid fa-download mr-1.5 text-xs"></i>Export
81
+ </button>
82
+ </div>
83
+ </header>
84
+
85
+ <!-- Main Content -->
86
+ <div class="flex flex-1 overflow-hidden">
87
+
88
+ <!-- Left Panel: Editor -->
89
+ <div class="flex-1 flex flex-col min-w-0 overflow-y-auto">
90
+ <div class="p-6 max-w-4xl mx-auto w-full space-y-6 pb-16">
91
+
92
+ <!-- Fields -->
93
+ <section class="bg-white rounded-xl border border-slate-200 shadow-sm overflow-hidden">
94
+ <div class="px-5 py-3.5 border-b border-slate-100 flex justify-between items-center">
95
+ <div>
96
+ <h2 class="font-semibold text-sm text-slate-800">Fields</h2>
97
+ <p class="text-xs text-slate-400 mt-0.5">Header, type, description, synonyms, and the critical flag — all in one place.</p>
98
+ </div>
99
+ <button onclick="app.addField()" class="text-xs font-medium text-slate-600 border border-slate-200 px-2.5 py-1.5 rounded-lg hover:bg-slate-50 hover:text-brand hover:border-brand/40 transition-colors">
100
+ <i class="fa-solid fa-plus mr-1 text-[10px]"></i>Add field
101
+ </button>
102
+ </div>
103
+
104
+ <div class="p-5 space-y-3" id="fields-container"></div>
105
+
106
+ <div id="add-field-footer" class="hidden px-5 pb-5">
107
+ <button onclick="app.addField()" class="w-full py-2 border border-dashed border-slate-300 rounded-lg text-slate-500 text-[13px] font-medium hover:border-brand/50 hover:text-brand transition-colors">
108
+ <i class="fa-solid fa-plus mr-1 text-[10px]"></i>Add field
109
+ </button>
110
+ </div>
111
+
112
+ <div id="empty-state-schema" class="hidden py-14 text-center">
113
+ <div class="inline-flex items-center justify-center w-10 h-10 rounded-full bg-slate-100 mb-3">
114
+ <i class="fa-solid fa-table-list text-slate-400 text-sm"></i>
115
+ </div>
116
+ <p class="text-slate-500 text-sm mb-4">No schema loaded yet.</p>
117
+ <div class="flex items-center justify-center gap-2">
118
+ <button onclick="app.loadSample()" class="text-[13px] font-medium bg-brand hover:bg-brand-dark text-white px-3.5 py-1.5 rounded-lg transition-colors">Load sample data</button>
119
+ <button onclick="document.getElementById('import-file').click()" class="text-[13px] font-medium text-slate-600 border border-slate-200 px-3.5 py-1.5 rounded-lg hover:bg-slate-50 transition-colors">Import schema.json</button>
120
+ <button onclick="app.addField()" class="text-[13px] font-medium text-slate-600 border border-slate-200 px-3.5 py-1.5 rounded-lg hover:bg-slate-50 transition-colors">Start blank</button>
121
+ </div>
122
+ </div>
123
+ </section>
124
+
125
+ <!-- Require Any -->
126
+ <section class="bg-white rounded-xl border border-slate-200 shadow-sm overflow-hidden">
127
+ <div class="px-5 py-3.5 border-b border-slate-100">
128
+ <h2 class="font-semibold text-sm text-slate-800">Require any</h2>
129
+ <p class="text-xs text-slate-400 mt-0.5">At least one field from each group must exist in the output.</p>
130
+ </div>
131
+ <div class="p-5 space-y-3" id="require-any-container"></div>
132
+ <div class="px-5 pb-5">
133
+ <button onclick="app.addRequireAnyGroup()" class="w-full py-2 border border-dashed border-slate-300 rounded-lg text-slate-500 text-[13px] font-medium hover:border-brand/50 hover:text-brand transition-colors">
134
+ <i class="fa-solid fa-plus mr-1 text-[10px]"></i>Add group
135
+ </button>
136
+ </div>
137
+ </section>
138
+
139
+ </div>
140
+ </div>
141
+
142
+ <!-- Right Panel: Preview -->
143
+ <div class="code-panel w-[420px] bg-[#0d1117] flex flex-col shrink-0 border-l border-slate-200">
144
+ <div class="px-4 py-2.5 border-b border-white/10 flex justify-between items-center">
145
+ <span class="text-xs font-mono text-slate-400 code-font">schema.json</span>
146
+ <button onclick="app.copyToClipboard()" class="text-xs font-medium text-slate-300 border border-white/15 px-2.5 py-1 rounded-md hover:bg-white/10 transition-colors">
147
+ <i class="fa-regular fa-copy mr-1.5 text-[10px]"></i>Copy
148
+ </button>
149
+ </div>
150
+ <div class="flex-1 overflow-auto p-4">
151
+ <pre class="code-font text-[13px] leading-relaxed"><code id="json-preview" class="text-slate-200"></code></pre>
152
+ </div>
153
+ <div class="px-4 py-2 border-t border-white/10 text-xs text-slate-500 flex justify-between code-font">
154
+ <span>live preview</span>
155
+ <span id="char-count">0 chars</span>
156
+ </div>
157
+ </div>
158
+
159
+ </div>
160
+
161
+ <!-- Toast Notification -->
162
+ <div id="toast" class="fixed bottom-6 right-6 bg-slate-800 text-white px-4 py-2.5 rounded-lg shadow-lg transform translate-y-20 opacity-0 transition-all duration-300 flex items-center gap-2.5 z-50 text-sm">
163
+ <i class="fa-solid fa-circle-check text-brand"></i>
164
+ <span id="toast-message">Action Successful</span>
165
+ </div>
166
+
167
+ <script>
168
+ const DEFAULT_DATA = {
169
+ "output_schema": [
170
+ {
171
+ "field": "date",
172
+ "header": "Date",
173
+ "type": "date",
174
+ "description": "The date the transaction was posted or executed. May appear as transaction date, value date, or posting date in source data — these can differ by 1-2 days for the same transaction; prefer transaction/posting date over value date unless the source only provides one."
175
+ },
176
+ {
177
+ "field": "narration",
178
+ "header": "Narration",
179
+ "type": "string",
180
+ "description": "Free-text description of the transaction, such as the payee, purpose, or transaction type (e.g., 'NEFT TRANSFER TO XYZ', 'ATM WITHDRAWAL'). Often the longest and most variably formatted field in the source; do not truncate or attempt to parse structured data out of it unless separately instructed."
181
+ },
182
+ {
183
+ "field": "reference_number",
184
+ "header": "Reference Number",
185
+ "type": "string",
186
+ "description": "A unique identifier for the transaction, such as a cheque number, UTR, RRN, or transaction ID. Frequently blank for cash, UPI, or POS transactions — absence is normal and should not be treated as a mapping failure."
187
+ },
188
+ {
189
+ "field": "debit",
190
+ "header": "Debit",
191
+ "type": "currency",
192
+ "description": "Amount withdrawn or paid out in this transaction. Should be blank or zero if the transaction is a credit. Strip currency symbols and thousand separators before parsing to numeric."
193
+ },
194
+ {
195
+ "field": "credit",
196
+ "header": "Credit",
197
+ "type": "currency",
198
+ "description": "Amount deposited or received in this transaction. Should be blank or zero if the transaction is a debit. Strip currency symbols and thousand separators before parsing to numeric."
199
+ }
200
+ ],
201
+ "synonyms": {
202
+ "date": ["date", "txn date", "transaction date", "value date", "posting date", "entry date", "book date", "date of transaction", "trans date", "value dt", "txn dt", "date/time", "date & time"],
203
+ "narration": ["narration", "description", "particulars", "transaction details", "transaction description", "details", "remarks", "memo", "transaction narration", "particulars/remarks", "desc", "transaction particulars", "purpose", "comments", "notes"],
204
+ "reference_number": ["reference number", "ref no", "ref number", "reference no", "reference", "cheque no", "cheque number", "chq no", "chq number", "instrument no", "instrument number", "transaction id", "txn id", "transaction reference", "transaction ref no", "utr", "utr no", "utr number", "rrn", "rrn number", "cheque/ref no", "cheque/ref number", "doc no", "document number", "voucher no", "voucher number", "receipt no", "receipt number", "transaction reference number"],
205
+ "debit": ["debit", "debit amount", "debit amt", "withdrawal", "withdrawal amount", "withdrawal amt", "dr", "dr amount", "amount debited", "amount withdrawn", "paid out", "debit (dr)", "debit(dr)", "outflow", "money out", "payment", "debits"],
206
+ "credit": ["credit", "credit amount", "credit amt", "deposit", "deposit amount", "deposit amt", "cr", "cr amount", "amount credited", "amount deposited", "paid in", "credit (cr)", "credit(cr)", "inflow", "money in", "receipt", "credits"]
207
+ },
208
+ "critical_fields": ["date", "narration"],
209
+ "require_any": [
210
+ ["debit", "credit"]
211
+ ]
212
+ };
213
+
214
+ class SchemaApp {
215
+ constructor() {
216
+ this.data = SchemaApp.emptyData();
217
+ this.init();
218
+ }
219
+
220
+ static emptyData() {
221
+ return { output_schema: [], synonyms: {}, critical_fields: [], require_any: [] };
222
+ }
223
+
224
+ static normalize(obj) {
225
+ obj = obj || {};
226
+ return {
227
+ output_schema: Array.isArray(obj.output_schema) ? obj.output_schema : [],
228
+ synonyms: (obj.synonyms && typeof obj.synonyms === 'object' && !Array.isArray(obj.synonyms)) ? obj.synonyms : {},
229
+ critical_fields: Array.isArray(obj.critical_fields) ? obj.critical_fields : [],
230
+ require_any: Array.isArray(obj.require_any) ? obj.require_any : [],
231
+ };
232
+ }
233
+
234
+ init() {
235
+ this.renderFields();
236
+ this.renderRequireAny();
237
+ this.updatePreview();
238
+ }
239
+
240
+ // --- Core Logic ---
241
+
242
+ getJSON() {
243
+ return this.data;
244
+ }
245
+
246
+ updatePreview() {
247
+ const json = JSON.stringify(this.data, null, 2);
248
+ document.getElementById('json-preview').textContent = json;
249
+ document.getElementById('char-count').textContent = `${json.length} chars`;
250
+ }
251
+
252
+ showToast(msg) {
253
+ const toast = document.getElementById('toast');
254
+ document.getElementById('toast-message').textContent = msg;
255
+ toast.classList.remove('translate-y-20', 'opacity-0');
256
+ setTimeout(() => {
257
+ toast.classList.add('translate-y-20', 'opacity-0');
258
+ }, 3000);
259
+ }
260
+
261
+ loadSample() {
262
+ if (this.data.output_schema.length && !confirm("Load sample schema? This replaces the current schema.")) return;
263
+ this.data = JSON.parse(JSON.stringify(DEFAULT_DATA));
264
+ this.init();
265
+ this.showToast("Sample schema loaded");
266
+ }
267
+
268
+ importFromFile(event) {
269
+ const file = event.target.files[0];
270
+ if (!file) return;
271
+ const reader = new FileReader();
272
+ reader.onload = (e) => {
273
+ try {
274
+ this.data = SchemaApp.normalize(JSON.parse(e.target.result));
275
+ this.init();
276
+ this.showToast(`Imported ${file.name}`);
277
+ } catch (err) {
278
+ this.showToast("Could not import — not valid JSON");
279
+ }
280
+ };
281
+ reader.readAsText(file);
282
+ event.target.value = '';
283
+ }
284
+
285
+ exportToFile() {
286
+ const json = JSON.stringify(this.data, null, 2);
287
+ const blob = new Blob([json], { type: 'application/json' });
288
+ const url = URL.createObjectURL(blob);
289
+ const a = document.createElement('a');
290
+ a.href = url;
291
+ a.download = 'schema.json';
292
+ document.body.appendChild(a);
293
+ a.click();
294
+ document.body.removeChild(a);
295
+ URL.revokeObjectURL(url);
296
+ this.showToast("schema.json exported");
297
+ }
298
+
299
+ copyToClipboard() {
300
+ navigator.clipboard.writeText(JSON.stringify(this.data, null, 2)).then(() => {
301
+ this.showToast("JSON copied to clipboard");
302
+ });
303
+ }
304
+
305
+ // --- Fields (schema + synonyms + critical, unified) ---
306
+
307
+ renderFields() {
308
+ const container = document.getElementById('fields-container');
309
+ const empty = document.getElementById('empty-state-schema');
310
+ const footer = document.getElementById('add-field-footer');
311
+ container.innerHTML = '';
312
+
313
+ if (this.data.output_schema.length === 0) {
314
+ empty.classList.remove('hidden');
315
+ container.classList.add('hidden');
316
+ footer.classList.add('hidden');
317
+ return;
318
+ }
319
+ empty.classList.add('hidden');
320
+ container.classList.remove('hidden');
321
+ footer.classList.remove('hidden');
322
+
323
+ this.data.output_schema.forEach((field, index) => {
324
+ const name = field.field;
325
+ if (!this.data.synonyms[name]) this.data.synonyms[name] = [];
326
+ const syns = this.data.synonyms[name];
327
+ const isCritical = this.data.critical_fields.includes(name);
328
+
329
+ const synTags = syns.map((syn, i) => `
330
+ <span class="inline-flex items-center bg-white border border-slate-200 text-slate-600 text-xs px-2 py-0.5 rounded-md mr-1.5 mb-1.5">
331
+ ${syn}
332
+ <button onclick="app.removeSynonym('${name}', ${i})" class="ml-1.5 text-slate-300 hover:text-brand focus:outline-none">
333
+ <i class="fa-solid fa-xmark text-[10px]"></i>
334
+ </button>
335
+ </span>`).join('');
336
+
337
+ const card = document.createElement('div');
338
+ card.className = "bg-slate-50 rounded-lg border border-slate-200 p-3.5";
339
+ card.innerHTML = `
340
+ <div class="flex flex-wrap items-center gap-2">
341
+ <span class="text-xs text-slate-300 tabular-nums w-4 text-center shrink-0">${index + 1}</span>
342
+ <input type="text" value="${field.field}" placeholder="field_id"
343
+ onchange="app.updateField(${index}, 'field', this.value)"
344
+ class="flex-1 min-w-[140px] bg-white border border-slate-300 rounded-md px-2.5 py-1.5 text-[13px] font-medium text-slate-700 outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 transition-colors">
345
+ <input type="text" value="${field.header}" placeholder="Header"
346
+ onchange="app.updateField(${index}, 'header', this.value)"
347
+ class="flex-1 min-w-[140px] bg-white border border-slate-300 rounded-md px-2.5 py-1.5 text-[13px] text-slate-600 outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 transition-colors">
348
+ <select onchange="app.updateField(${index}, 'type', this.value)"
349
+ class="w-28 shrink-0 bg-white border border-slate-300 rounded-md px-2 py-1.5 text-[13px] text-slate-600 outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 cursor-pointer">
350
+ <option value="string" ${field.type === 'string' ? 'selected' : ''}>string</option>
351
+ <option value="date" ${field.type === 'date' ? 'selected' : ''}>date</option>
352
+ <option value="currency" ${field.type === 'currency' ? 'selected' : ''}>currency</option>
353
+ <option value="number" ${field.type === 'number' ? 'selected' : ''}>number</option>
354
+ <option value="boolean" ${field.type === 'boolean' ? 'selected' : ''}>boolean</option>
355
+ </select>
356
+ <button onclick="app.toggleCritical('${name}')" title="Critical: must be present in the output"
357
+ class="shrink-0 border text-[11px] font-medium px-2 py-1.5 rounded-md transition-colors ${isCritical ? 'bg-brand-tint text-brand border-brand/30' : 'bg-white text-slate-400 border-slate-200 hover:text-slate-600'}">
358
+ <i class="fa-${isCritical ? 'solid' : 'regular'} fa-star text-[10px] mr-1"></i>Critical
359
+ </button>
360
+ <button onclick="app.removeField(${index})" title="Remove field"
361
+ class="shrink-0 text-slate-300 hover:text-brand transition-colors p-1.5 rounded">
362
+ <i class="fa-solid fa-trash-can text-xs"></i>
363
+ </button>
364
+ </div>
365
+ <textarea onchange="app.updateField(${index}, 'description', this.value)"
366
+ placeholder="Description — what this field means and how to map it"
367
+ class="w-full mt-2 bg-white border border-slate-200 rounded-md px-2.5 py-1.5 text-[13px] text-slate-500 outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 transition-colors resize-none overflow-hidden"
368
+ oninput="this.style.height=''; this.style.height=this.scrollHeight+'px'">${field.description || ''}</textarea>
369
+ <div class="mt-2.5">
370
+ <div class="flex items-center justify-between mb-1.5">
371
+ <span class="text-[11px] font-semibold uppercase tracking-wide text-slate-400">Synonyms</span>
372
+ <span class="text-[11px] text-slate-400 code-font">${syns.length}</span>
373
+ </div>
374
+ <div class="flex flex-wrap">${synTags}</div>
375
+ <div class="flex gap-2 mt-0.5">
376
+ <input type="text" id="synonym-input-${name}" placeholder="Add synonym, press Enter..."
377
+ class="flex-1 bg-white border border-slate-300 text-slate-700 text-[13px] rounded-md focus:outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 px-2.5 py-1.5"
378
+ onkeypress="if(event.key==='Enter') app.addSynonym('${name}')">
379
+ <button onclick="app.addSynonym('${name}')"
380
+ class="bg-white border border-slate-300 hover:bg-slate-100 hover:text-brand text-slate-600 rounded-md text-[13px] px-3 py-1.5 transition-colors">
381
+ <i class="fa-solid fa-plus text-xs"></i>
382
+ </button>
383
+ </div>
384
+ </div>
385
+ `;
386
+ container.appendChild(card);
387
+ });
388
+
389
+ // Auto-size description textareas to fit their content
390
+ container.querySelectorAll('textarea').forEach(ta => {
391
+ ta.style.height = '';
392
+ ta.style.height = ta.scrollHeight + 'px';
393
+ });
394
+ }
395
+
396
+ addField() {
397
+ this.data.output_schema.push({ field: "new_field", header: "New Field", type: "string", description: "" });
398
+ this.renderFields();
399
+ this.renderRequireAny();
400
+ this.updatePreview();
401
+ // Focus the new field's id for immediate typing
402
+ const container = document.getElementById('fields-container');
403
+ const last = container.lastElementChild;
404
+ if (last) {
405
+ last.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
406
+ const input = last.querySelector('input');
407
+ if (input) { input.focus(); input.select(); }
408
+ }
409
+ }
410
+
411
+ removeField(index) {
412
+ const name = this.data.output_schema[index].field;
413
+ this.data.output_schema.splice(index, 1);
414
+ delete this.data.synonyms[name];
415
+ this.data.critical_fields = this.data.critical_fields.filter(f => f !== name);
416
+ this.data.require_any = this.data.require_any.map(g => g.filter(f => f !== name)).filter(g => g.length > 0);
417
+ this.renderFields();
418
+ this.renderRequireAny();
419
+ this.updatePreview();
420
+ }
421
+
422
+ updateField(index, key, value) {
423
+ const oldName = this.data.output_schema[index].field;
424
+ this.data.output_schema[index][key] = value;
425
+
426
+ if (key === 'field' && oldName !== value) {
427
+ if (this.data.synonyms[oldName]) {
428
+ this.data.synonyms[value] = this.data.synonyms[oldName];
429
+ delete this.data.synonyms[oldName];
430
+ }
431
+ this.data.critical_fields = this.data.critical_fields.map(f => f === oldName ? value : f);
432
+ this.data.require_any = this.data.require_any.map(g => g.map(f => f === oldName ? value : f));
433
+ this.renderFields();
434
+ this.renderRequireAny();
435
+ }
436
+ this.updatePreview();
437
+ }
438
+
439
+ toggleCritical(field) {
440
+ const i = this.data.critical_fields.indexOf(field);
441
+ if (i === -1) this.data.critical_fields.push(field);
442
+ else this.data.critical_fields.splice(i, 1);
443
+ this.renderFields();
444
+ this.updatePreview();
445
+ }
446
+
447
+ // --- Synonyms ---
448
+
449
+ addSynonym(field) {
450
+ const input = document.getElementById(`synonym-input-${field}`);
451
+ if (!input) return;
452
+ const value = input.value.trim().toLowerCase();
453
+ if (!value) return;
454
+ if (!this.data.synonyms[field]) this.data.synonyms[field] = [];
455
+ if (this.data.synonyms[field].includes(value)) {
456
+ this.showToast("Synonym already exists");
457
+ return;
458
+ }
459
+ this.data.synonyms[field].push(value);
460
+ this.renderFields();
461
+ this.updatePreview();
462
+ // Refocus the same input for rapid entry
463
+ const again = document.getElementById(`synonym-input-${field}`);
464
+ if (again) again.focus();
465
+ }
466
+
467
+ removeSynonym(field, index) {
468
+ this.data.synonyms[field].splice(index, 1);
469
+ this.renderFields();
470
+ this.updatePreview();
471
+ }
472
+
473
+ // --- Require Any ---
474
+
475
+ renderRequireAny() {
476
+ const container = document.getElementById('require-any-container');
477
+ container.innerHTML = '';
478
+
479
+ const currentFields = this.data.output_schema.map(f => f.field);
480
+
481
+ if (this.data.require_any.length === 0) {
482
+ container.innerHTML = '<p class="text-xs text-slate-400">No groups yet. Add one to require at least one of a set of fields.</p>';
483
+ return;
484
+ }
485
+
486
+ this.data.require_any.forEach((group, groupIndex) => {
487
+ const groupDiv = document.createElement('div');
488
+ groupDiv.className = "bg-slate-50 border border-slate-200 rounded-lg p-3 relative group";
489
+
490
+ let tagsHtml = group.map((field, i) => {
491
+ if (!currentFields.includes(field)) return '';
492
+ return `
493
+ <span class="inline-flex items-center bg-slate-100 text-slate-600 text-xs px-2 py-0.5 rounded-md border border-slate-200 mr-1.5 mb-1.5">
494
+ ${field}
495
+ <button onclick="app.removeRequireAnyField(${groupIndex}, ${i})" class="ml-1.5 text-slate-400 hover:text-brand">
496
+ <i class="fa-solid fa-xmark text-[10px]"></i>
497
+ </button>
498
+ </span>
499
+ `}).join('');
500
+
501
+ let availableFields = currentFields.filter(f => !group.includes(f));
502
+ let optionsHtml = availableFields.map(f => `<option value="${f}">${f}</option>`).join('');
503
+
504
+ groupDiv.innerHTML = `
505
+ <div class="absolute top-2 right-2 opacity-0 group-hover:opacity-100 transition-opacity">
506
+ <button onclick="app.removeRequireAnyGroup(${groupIndex})" class="text-slate-400 hover:text-brand p-1">
507
+ <i class="fa-solid fa-trash-can text-xs"></i>
508
+ </button>
509
+ </div>
510
+ <div class="text-[11px] font-semibold text-slate-400 mb-2 uppercase tracking-wide">Group ${groupIndex + 1}</div>
511
+ <div class="flex flex-wrap mb-1.5 min-h-[1.75rem]">
512
+ ${tagsHtml}
513
+ </div>
514
+ <div class="flex gap-2 max-w-md">
515
+ <select id="require-any-select-${groupIndex}" class="flex-1 bg-white border border-slate-300 text-slate-700 text-[13px] rounded-lg focus:outline-none focus:border-brand focus:ring-1 focus:ring-brand/20 block px-2.5 py-1.5">
516
+ <option value="">Add field...</option>
517
+ ${optionsHtml}
518
+ </select>
519
+ <button onclick="app.addRequireAnyField(${groupIndex})" class="bg-white border border-slate-300 hover:bg-slate-50 hover:text-brand text-slate-600 text-[13px] px-3 py-1.5 rounded-lg transition-colors">
520
+ <i class="fa-solid fa-plus text-xs"></i>
521
+ </button>
522
+ </div>
523
+ `;
524
+ container.appendChild(groupDiv);
525
+ });
526
+ }
527
+
528
+ addRequireAnyGroup() {
529
+ this.data.require_any.push([]);
530
+ this.renderRequireAny();
531
+ this.updatePreview();
532
+ }
533
+
534
+ removeRequireAnyGroup(index) {
535
+ this.data.require_any.splice(index, 1);
536
+ this.renderRequireAny();
537
+ this.updatePreview();
538
+ }
539
+
540
+ addRequireAnyField(groupIndex) {
541
+ const select = document.getElementById(`require-any-select-${groupIndex}`);
542
+ const value = select.value;
543
+ if (value) {
544
+ this.data.require_any[groupIndex].push(value);
545
+ this.renderRequireAny();
546
+ this.updatePreview();
547
+ }
548
+ }
549
+
550
+ removeRequireAnyField(groupIndex, fieldIndex) {
551
+ this.data.require_any[groupIndex].splice(fieldIndex, 1);
552
+ if (this.data.require_any[groupIndex].length === 0) {
553
+ this.data.require_any.splice(groupIndex, 1);
554
+ }
555
+ this.renderRequireAny();
556
+ this.updatePreview();
557
+ }
558
+ }
559
+
560
+ // Initialize App
561
+ const app = new SchemaApp();
562
+
563
+ </script>
564
+ </body>
565
+ </html>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tabularmapper
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: Map any spreadsheet (.xlsx) to a schema you define — deterministic column mapping with an optional AI matcher
5
5
  Author-email: Karthikeyan Duraisamy <karthikeyanduraisamy@kultivateindia.com>
6
6
  License-Expression: MIT
@@ -165,6 +165,7 @@ All are optional; sensible defaults apply.
165
165
  | `TABULARMAPPER_LEARN_STORE` | `memory://` (no files) | where self-learned header synonyms live |
166
166
  | `TABULARMAPPER_CONFIG` | *(none — required)* | output template + synonyms JSON (file / `https://` / `s3://`) |
167
167
  | `TABULARMAPPER_ROUTE_PREFIX` | `/mapper` | FastAPI router path prefix |
168
+ | `TABULARMAPPER_THRESHOLD` | `80` | fuzzy-accept gate (0–100); raise it to push borderline fuzzy matches to the AI matcher |
168
169
  | `OPENAI_API_KEY` | *(unset → AI off)* | enables the AI column matcher |
169
170
  | `OPENAI_BASE_URL` | `https://api.openai.com/v1` | any OpenAI-compatible endpoint |
170
171
  | `OPENAI_MODEL` | `gpt-4o-mini` | model name |
@@ -272,6 +273,7 @@ app.include_router(router)
272
273
  |---|---|---|
273
274
  | `POST` | `/mapper/map` | upload an `.xlsx`, get the mapping + rows (JSON) |
274
275
  | `GET` | `/mapper/health` | `{status, ai_enabled}` |
276
+ | `GET` | `/mapper/config` | config-builder web page — design a schema, export `config.json` |
275
277
  | `GET` | `/mapper/learn/pending` | debit/credit synonyms awaiting approval |
276
278
  | `POST` | `/mapper/learn/approve` | approve a pending synonym (`?phrase=&field=`) |
277
279
  | `POST` | `/mapper/learn/reject` | reject a pending synonym |
@@ -280,6 +282,18 @@ app.include_router(router)
280
282
  blocking work in a threadpool. Store the original file to S3 in your own endpoint
281
283
  if you need it — the mapper stays out of AWS.
282
284
 
285
+ Two query params shape the request:
286
+
287
+ ```bash
288
+ curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=base64" # json + a mapped .xlsx in file_base64
289
+ curl -F file=@f.xlsx "http://localhost:8000/mapper/map?format=file" -OJ # download the mapped .xlsx
290
+ curl -F file=@f.xlsx "http://localhost:8000/mapper/map?threshold=90" # stricter fuzzy gate for this call
291
+ ```
292
+
293
+ `format` is `json` (default) / `base64` / `file`. `threshold` (0–100) overrides
294
+ `TABULARMAPPER_THRESHOLD` for one request — raise it to send borderline fuzzy
295
+ matches to the AI matcher instead of trusting them.
296
+
283
297
  The `/mapper` prefix is configurable (this is a general table→schema mapper, not
284
298
  just banks): set `TABULARMAPPER_ROUTE_PREFIX`, or build the router yourself:
285
299
 
@@ -17,6 +17,7 @@ src/tabularmapper.egg-info/dependency_links.txt
17
17
  src/tabularmapper.egg-info/entry_points.txt
18
18
  src/tabularmapper.egg-info/requires.txt
19
19
  src/tabularmapper.egg-info/top_level.txt
20
+ src/tabularmapper/static/index.html
20
21
  tests/test_api.py
21
22
  tests/test_learn.py
22
23
  tests/test_mapper.py
@@ -107,10 +107,61 @@ def test_map_rejects_non_xlsx(client):
107
107
  assert r.status_code == 400
108
108
 
109
109
 
110
+ def _fuzzy_xlsx_bytes():
111
+ """A tiny bank sheet whose 'Descriptn' header only fuzzy-matches (score 90)."""
112
+ from openpyxl import Workbook
113
+ wb = Workbook(); ws = wb.active
114
+ ws.append(["Date", "Descriptn", "Debit", "Credit"])
115
+ ws.append(["01-06-2026", "Coffee", "150", ""])
116
+ ws.append(["02-06-2026", "Salary", "", "45000"])
117
+ buf = io.BytesIO(); wb.save(buf); return buf.getvalue()
118
+
119
+
120
+ def test_map_threshold_query_changes_mapping(client):
121
+ payload = _fuzzy_xlsx_bytes()
122
+
123
+ # default gate (80): 'Descriptn' (score 90) is accepted as fuzzy
124
+ r = client.post("/mapper/map", files={"file": ("s.xlsx", io.BytesIO(payload))})
125
+ cols = {c["raw_header"]: c for c in r.json()["columns"]}
126
+ assert cols["Descriptn"]["field"] == "description"
127
+ assert cols["Descriptn"]["method"] == "fuzzy"
128
+
129
+ # raise the gate above 90: the same column now falls through -> unmapped
130
+ r = client.post("/mapper/map", params={"threshold": 95},
131
+ files={"file": ("s.xlsx", io.BytesIO(payload))})
132
+ cols = {c["raw_header"]: c for c in r.json()["columns"]}
133
+ assert cols["Descriptn"]["field"] is None
134
+
135
+
136
+ def test_map_threshold_out_of_range(client):
137
+ payload = _fuzzy_xlsx_bytes()
138
+ for bad in (150, -1):
139
+ r = client.post("/mapper/map", params={"threshold": bad},
140
+ files={"file": ("s.xlsx", io.BytesIO(payload))})
141
+ assert r.status_code == 422 # ge=0 / le=100 validation
142
+
143
+
144
+ def test_default_threshold_reads_env(monkeypatch):
145
+ import tabularmapper.api as api
146
+ monkeypatch.setenv("TABULARMAPPER_THRESHOLD", "90")
147
+ assert api._default_threshold() == 90
148
+ monkeypatch.setenv("TABULARMAPPER_THRESHOLD", "banana") # invalid -> falls back
149
+ assert api._default_threshold() == 80
150
+ monkeypatch.delenv("TABULARMAPPER_THRESHOLD", raising=False)
151
+ assert api._default_threshold() == 80
152
+
153
+
154
+ def test_config_page_served(client):
155
+ r = client.get("/mapper/config")
156
+ assert r.status_code == 200
157
+ assert r.headers["content-type"].startswith("text/html")
158
+ assert "<!DOCTYPE html>" in r.text or "<html" in r.text.lower()
159
+
160
+
110
161
  def test_router_prefix_default_and_custom():
111
162
  import tabularmapper.api as api
112
163
  assert {r.path for r in api.router.routes} == {
113
- "/mapper/health", "/mapper/map",
164
+ "/mapper/health", "/mapper/config", "/mapper/map",
114
165
  "/mapper/learn/pending", "/mapper/learn/approve", "/mapper/learn/reject"}
115
166
  custom = api.make_router("/catalog/")
116
167
  assert "/catalog/map" in {r.path for r in custom.routes}
File without changes
File without changes