tabularmapper 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,341 @@
1
+ """
2
+ schema.py — externalized, loadable configuration for the mapper.
3
+
4
+ Everything that used to be a hardcoded constant in `engine.py` — the output
5
+ template (`OUTPUT_SCHEMA`), the header vocabulary (`SYNONYMS`), and the critical
6
+ fields — lives here as data, and can be loaded from a JSON file, an HTTP(S) URL,
7
+ an S3 object, or an in-memory dict. Change the template by editing JSON in a
8
+ bucket; no code change, no redeploy.
9
+
10
+ Config JSON shape (all keys optional; missing keys fall back to the defaults):
11
+
12
+ {
13
+ "version": 1,
14
+ "output_schema": [
15
+ {"field": "date", "header": "Date", "type": "date"},
16
+ {"field": "description", "header": "Narration", "type": "text"},
17
+ {"field": "debit", "header": "Debit", "type": "money"},
18
+ {"field": "credit", "header": "Credit", "type": "money"}
19
+ ],
20
+ "critical_fields": ["date"],
21
+ "synonyms": { "date": ["date", "txn date"], "debit": ["withdrawal"] }
22
+ }
23
+
24
+ `type` ∈ {"date", "money", "text"} drives generic extraction, so adding a NEW
25
+ column is a config-only change. The field keys `debit`, `credit`, `amount` keep
26
+ their special money-reconciliation behavior (a single signed `amount` column is
27
+ split into debit/credit).
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import json
33
+ import logging
34
+ import os
35
+ import urllib.request
36
+ from dataclasses import dataclass, field as _field
37
+ from typing import Optional, Union
38
+
39
+ _log = logging.getLogger("engine.schema")
40
+
41
+ # Field types the engine understands, grouped by how they're parsed. Many
42
+ # aliases so configs read naturally ("string", "integer", "currency", …).
43
+ DATE_TYPES = {"date", "datetime"}
44
+ NUMERIC_TYPES = {"money", "number", "currency", "numeric", "decimal", "float",
45
+ "integer", "int"}
46
+ INTEGER_TYPES = {"integer", "int"} # coerced to int when whole
47
+ TEXT_TYPES = {"text", "string", "str"}
48
+ VALID_TYPES = DATE_TYPES | NUMERIC_TYPES | TEXT_TYPES
49
+
50
+ # --------------------------------------------------------------------------
51
+ # Defaults — copied VERBATIM from the original engine.py constants so the
52
+ # out-of-the-box behavior is byte-identical.
53
+ # --------------------------------------------------------------------------
54
+ BANK_SCHEMA: list[dict] = [
55
+ {"field": "date", "header": "Date", "type": "date",
56
+ "description": "the transaction date (post/value/booking date)"},
57
+ {"field": "description", "header": "Narration", "type": "text",
58
+ "description": "free-text narration / particulars / details of the transaction"},
59
+ {"field": "reference", "header": "Reference Number", "type": "text",
60
+ "description": "reference or cheque/UTR/instrument number identifying the entry"},
61
+ {"field": "debit", "header": "Debit", "type": "money",
62
+ "description": "money leaving the account (withdrawal / paid out); a debit-only column"},
63
+ {"field": "credit", "header": "Credit", "type": "money",
64
+ "description": "money entering the account (deposit / paid in); a credit-only column"},
65
+ {"field": "balance", "header": "Balance", "type": "money",
66
+ "description": "running account balance after the transaction"},
67
+ ]
68
+
69
+ BANK_CRITICAL_FIELDS: list[str] = ["date"]
70
+
71
+ # --- Bank preset behavior (all data, not engine logic) -------------------
72
+ # reconcile: a single signed `amount` column is split into debit(-)/credit(+);
73
+ # when debit/credit are their own columns they're taken as positive.
74
+ BANK_RECONCILE: dict = {"signed": "amount", "negative": "debit", "positive": "credit"}
75
+ # require_any: each group needs >=1 mapped field or the statement is flagged.
76
+ BANK_REQUIRE_ANY: list = [["debit", "credit", "amount"]]
77
+ # row_keep_if_any: a row is a real record if >=1 of these has a value.
78
+ BANK_ROW_KEEP_IF_ANY: list = ["date", "debit", "credit"]
79
+ # continuation_field: a row with only this field folds into the row above it.
80
+ BANK_CONTINUATION_FIELD: Optional[str] = "description"
81
+ # descriptions for fields the AI matcher may see but that aren't output columns
82
+ BANK_FIELD_DESCRIPTIONS: dict = {
83
+ "amount": "a SINGLE signed amount column (one column, +credit / -debit)",
84
+ }
85
+
86
+ BANK_SYNONYMS: dict[str, list[str]] = {
87
+ "date": [
88
+ "date", "txn date", "transaction date", "value date", "posting date",
89
+ "post date", "tran date", "date of transaction", "trans date", "dt",
90
+ "booking date", "entry date",
91
+ ],
92
+ "description": [
93
+ "description", "narration", "particulars", "details", "remarks",
94
+ "transaction details", "transaction remarks", "narrative", "memo",
95
+ "transaction description", "txn description", "notes", "purpose",
96
+ ],
97
+ "reference": [
98
+ "reference", "reference number", "reference no", "ref no", "ref no.",
99
+ "ref no./cheque no", "ref no./cheque no.", "cheque no", "cheque no.",
100
+ "chq no", "chq no.", "ref", "reference id", "utr", "utr no",
101
+ "instrument no", "cheque/ref no", "chq/ref no", "transaction id",
102
+ "ref/cheque no",
103
+ ],
104
+ "debit": [
105
+ "debit", "withdrawal", "withdrawals", "withdrawal amt", "withdrawal amount",
106
+ "withdrawal (dr)", "dr", "dr amount", "debit amount", "debit amt",
107
+ "paid out", "payments", "money out", "amount debited", "outflow",
108
+ "debit(dr)", "withdrawal amt.",
109
+ ],
110
+ "credit": [
111
+ "credit", "deposit", "deposits", "deposit amt", "deposit amount",
112
+ "deposit (cr)", "cr", "cr amount", "credit amount", "credit amt",
113
+ "paid in", "receipts", "money in", "amount credited", "inflow",
114
+ "credit(cr)", "deposit amt.",
115
+ ],
116
+ "balance": [
117
+ "balance", "closing balance", "running balance", "available balance",
118
+ "balance amount", "bal", "closing bal", "ledger balance", "book balance",
119
+ "balance (inr)",
120
+ ],
121
+ "amount": [
122
+ "amount", "transaction amount", "txn amount", "amt", "value",
123
+ "signed amount", "amount (inr)", "amount(dr/cr)", "transaction amt",
124
+ ],
125
+ }
126
+
127
+
128
+ # --------------------------------------------------------------------------
129
+ # Data classes
130
+ # --------------------------------------------------------------------------
131
+ @dataclass
132
+ class FieldSpec:
133
+ field: str # internal key: date, description, debit, ...
134
+ header: str # display name written to the output file
135
+ type: str = "text" # date | number/money | text
136
+ description: str = "" # optional; used by the AI matcher
137
+
138
+
139
+ @dataclass
140
+ class Config:
141
+ output_schema: list[FieldSpec]
142
+ synonyms: dict[str, list[str]]
143
+ critical_fields: list[str]
144
+ # domain behavior — all data-driven, empty by default for a generic mapper
145
+ reconcile: dict = _field(default_factory=dict) # {signed,negative,positive}
146
+ require_any: list = _field(default_factory=list) # [[field, ...], ...]
147
+ row_keep_if_any: list = _field(default_factory=list) # keep row if any has a value
148
+ continuation_field: Optional[str] = None # multi-line fold target
149
+ extra_field_descriptions: dict = _field(default_factory=dict) # non-output field defs
150
+
151
+ # -- derived views the engine consumes --
152
+ @property
153
+ def fields(self) -> list[str]:
154
+ return [f.field for f in self.output_schema]
155
+
156
+ @property
157
+ def headers(self) -> list[tuple[str, str]]:
158
+ """Back-compat shape: list of (field_key, display_header)."""
159
+ return [(f.field, f.header) for f in self.output_schema]
160
+
161
+ @property
162
+ def field_types(self) -> dict[str, str]:
163
+ return {f.field: f.type for f in self.output_schema}
164
+
165
+ @property
166
+ def field_descriptions(self) -> dict[str, str]:
167
+ """{field: description} for the AI matcher (output fields + extras)."""
168
+ out = {f.field: (f.description or f.field) for f in self.output_schema}
169
+ out.update(self.extra_field_descriptions)
170
+ return out
171
+
172
+ @property
173
+ def reconcile_fields(self) -> list[str]:
174
+ """The fields involved in signed/split reconciliation, if any."""
175
+ r = self.reconcile or {}
176
+ return [r[k] for k in ("signed", "negative", "positive") if r.get(k)]
177
+
178
+ @property
179
+ def allowed_fields(self) -> list[str]:
180
+ fs = list(self.fields)
181
+ for extra in list(self.extra_field_descriptions) + self.reconcile_fields:
182
+ if extra not in fs:
183
+ fs.append(extra)
184
+ return fs
185
+
186
+
187
+ # --------------------------------------------------------------------------
188
+ # Builders
189
+ # --------------------------------------------------------------------------
190
+ def _infer_type(field_key: str) -> str:
191
+ if field_key == "date":
192
+ return "date"
193
+ if field_key in {"debit", "credit", "balance", "amount"}:
194
+ return "money"
195
+ return "text"
196
+
197
+
198
+ def default_config() -> Config:
199
+ """The built-in default: EMPTY. This is a general mapper, so with no config
200
+ it maps nothing — you must provide an output_schema + synonyms (a file/URL via
201
+ BANK_MAPPER_CONFIG, a dict, or configure()). Use `bank_preset()` for the
202
+ ready-made bank-statement schema."""
203
+ return Config(output_schema=[], synonyms={}, critical_fields=[])
204
+
205
+
206
+ def bank_preset() -> Config:
207
+ """Ready-made preset for bank statements (Date, Narration, Reference, Debit,
208
+ Credit, Balance) with debit/credit reconciliation. Also in config.example.json.
209
+
210
+ from tabularmapper import bank_preset, configure
211
+ configure(config=bank_preset())
212
+ """
213
+ return Config(
214
+ output_schema=[FieldSpec(**d) for d in BANK_SCHEMA],
215
+ synonyms={k: list(v) for k, v in BANK_SYNONYMS.items()},
216
+ critical_fields=list(BANK_CRITICAL_FIELDS),
217
+ reconcile=dict(BANK_RECONCILE),
218
+ require_any=[list(g) for g in BANK_REQUIRE_ANY],
219
+ row_keep_if_any=list(BANK_ROW_KEEP_IF_ANY),
220
+ continuation_field=BANK_CONTINUATION_FIELD,
221
+ extra_field_descriptions=dict(BANK_FIELD_DESCRIPTIONS),
222
+ )
223
+
224
+
225
+ def config_from_dict(d: dict, _origin: str = "<dict>") -> Config:
226
+ """Build a Config from a parsed JSON dict. This is the GENERIC path — nothing
227
+ bank-specific is assumed; declare what you want."""
228
+ if not d.get("output_schema"):
229
+ _log.warning(
230
+ "config %s has no non-empty 'output_schema' — nothing will be mapped. "
231
+ "Provide output_schema (or use bank_preset() for the bank layout).",
232
+ _origin)
233
+ specs: list[FieldSpec] = []
234
+ for item in d.get("output_schema") or []:
235
+ if isinstance(item, dict):
236
+ key = item["field"]
237
+ specs.append(FieldSpec(
238
+ field=key,
239
+ header=item.get("header", key),
240
+ type=item.get("type") or _infer_type(key),
241
+ description=item.get("description", ""),
242
+ ))
243
+ elif isinstance(item, (list, tuple)) and len(item) >= 2:
244
+ specs.append(FieldSpec(field=item[0], header=item[1],
245
+ type=_infer_type(item[0])))
246
+ for s in specs:
247
+ if s.type not in VALID_TYPES:
248
+ s.type = _infer_type(s.field)
249
+ # Synonyms are exactly what you declare — no bank defaults are merged in.
250
+ syn = {k: list(v) for k, v in (d.get("synonyms") or {}).items()}
251
+ crit = d.get("critical_fields") or []
252
+ return Config(
253
+ output_schema=specs,
254
+ synonyms=syn,
255
+ critical_fields=list(crit),
256
+ reconcile=dict(d.get("reconcile") or {}),
257
+ require_any=[list(g) for g in (d.get("require_any") or [])],
258
+ row_keep_if_any=list(d.get("row_keep_if_any") or []),
259
+ continuation_field=d.get("continuation_field"),
260
+ extra_field_descriptions=dict(d.get("field_descriptions") or {}),
261
+ )
262
+
263
+
264
+ # --------------------------------------------------------------------------
265
+ # Loading — file / http(s) / s3 / dict, with a fail-safe to defaults
266
+ # --------------------------------------------------------------------------
267
+ def _read_source(source: str, timeout: float = 10.0) -> bytes:
268
+ if source.startswith("s3://"):
269
+ return _read_s3(source)
270
+ if source.startswith(("http://", "https://")):
271
+ with urllib.request.urlopen(source, timeout=timeout) as resp:
272
+ return resp.read()
273
+ if source.startswith("file://"):
274
+ source = source[len("file://"):]
275
+ with open(source, "rb") as fh:
276
+ return fh.read()
277
+
278
+
279
+ def _read_s3(uri: str) -> bytes:
280
+ from urllib.parse import urlparse
281
+ try:
282
+ import boto3 # optional; only for s3:// sources — or use a presigned https URL
283
+ except ImportError as exc:
284
+ raise ImportError(
285
+ "Loading config from s3:// needs the 'boto3' package (pip install "
286
+ "boto3), or pass a presigned https:// URL instead (no dependency)."
287
+ ) from exc
288
+ parts = urlparse(uri)
289
+ obj = boto3.client("s3").get_object(Bucket=parts.netloc,
290
+ Key=parts.path.lstrip("/"))
291
+ return obj["Body"].read()
292
+
293
+
294
+ def load_config(source: Optional[Union[str, dict]] = None,
295
+ strict: bool = False) -> Config:
296
+ """Load configuration.
297
+
298
+ source:
299
+ * None -> env TABULARMAPPER_CONFIG, else the built-in defaults
300
+ * dict -> used directly
301
+ * "s3://…" -> S3 object (needs boto3) OR use a presigned https URL instead
302
+ * "http(s)://…" / path / "file://…" -> fetched via stdlib urllib
303
+
304
+ On any load/parse error, falls back to the defaults (so a bad or unreachable
305
+ config never takes the service down) unless `strict=True`.
306
+ """
307
+ if source is None:
308
+ source = os.getenv("TABULARMAPPER_CONFIG")
309
+ if source is None:
310
+ return default_config()
311
+ if isinstance(source, dict):
312
+ return config_from_dict(source)
313
+ try:
314
+ raw = _read_source(str(source))
315
+ return config_from_dict(json.loads(raw), _origin=str(source))
316
+ except Exception as exc:
317
+ if strict:
318
+ raise
319
+ _log.warning(
320
+ "TABULARMAPPER config %r failed to load (%s: %s) — falling back to "
321
+ "built-in defaults", source, type(exc).__name__, exc)
322
+ return default_config()
323
+
324
+
325
+ def config_to_dict(cfg: Config) -> dict:
326
+ """Serialize a Config back to the JSON-friendly shape (for saving/harvest)."""
327
+ return {
328
+ "version": 1,
329
+ "output_schema": [
330
+ {"field": f.field, "header": f.header, "type": f.type,
331
+ **({"description": f.description} if f.description else {})}
332
+ for f in cfg.output_schema
333
+ ],
334
+ "critical_fields": list(cfg.critical_fields),
335
+ "reconcile": dict(cfg.reconcile),
336
+ "require_any": [list(g) for g in cfg.require_any],
337
+ "row_keep_if_any": list(cfg.row_keep_if_any),
338
+ "continuation_field": cfg.continuation_field,
339
+ "field_descriptions": dict(cfg.extra_field_descriptions),
340
+ "synonyms": {k: list(v) for k, v in cfg.synonyms.items()},
341
+ }
@@ -0,0 +1,238 @@
1
+ """
2
+ stores.py — pluggable key/value backends behind one URL convention.
3
+
4
+ Every persistent store in the package (the mapping cache today, the learned
5
+ synonyms next) is a `KeyValueStore`. You pick the backend with a URL, exactly
6
+ like SQLAlchemy / Celery — swap it with an env var, no code change:
7
+
8
+ memory:// in-process dict (tests, single worker)
9
+ ./mapping_cache.db / sqlite:///mapping_cache.db
10
+ SQLite file — no server, concurrency-safe (DEFAULT)
11
+ ./mapping_cache.json / file://... legacy JSON file (NOT multi-worker safe)
12
+ redis://host:6379/0 Redis (pip install ...[redis])
13
+ valkey://host:6379/0 Valkey (pip install ...[valkey])
14
+ postgresql://user@host/db Postgres (pip install ...[postgres])
15
+
16
+ Escape hatch: any object with get()/put() works — pass your own to open_store's
17
+ consumers directly if you have a backend we don't ship.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import os
24
+ import threading
25
+ from typing import Optional
26
+
27
+ try: # typing only; Protocol may be absent on very old pythons
28
+ from typing import Protocol
29
+ except ImportError: # pragma: no cover
30
+ Protocol = object # type: ignore
31
+
32
+
33
+ class KeyValueStore(Protocol):
34
+ def get(self, key: str) -> Optional[dict]: ...
35
+ def put(self, key: str, value: dict) -> None: ...
36
+ def close(self) -> None: ...
37
+
38
+
39
+ # --------------------------------------------------------------------------
40
+ # In-memory
41
+ # --------------------------------------------------------------------------
42
+ class MemoryStore:
43
+ def __init__(self) -> None:
44
+ self._d: dict[str, dict] = {}
45
+
46
+ def get(self, key: str) -> Optional[dict]:
47
+ return self._d.get(key)
48
+
49
+ def put(self, key: str, value: dict) -> None:
50
+ self._d[key] = value
51
+
52
+ def close(self) -> None:
53
+ pass
54
+
55
+
56
+ # --------------------------------------------------------------------------
57
+ # JSON file (legacy default; whole-file rewrite, NOT multi-worker safe)
58
+ # --------------------------------------------------------------------------
59
+ class JsonFileStore:
60
+ def __init__(self, path: str) -> None:
61
+ self.path = path
62
+ self._data: dict[str, dict] = {}
63
+ self._lock = threading.Lock()
64
+ if os.path.exists(path):
65
+ try:
66
+ with open(path, "r", encoding="utf-8") as fh:
67
+ self._data = json.load(fh)
68
+ except (json.JSONDecodeError, OSError):
69
+ self._data = {}
70
+
71
+ def get(self, key: str) -> Optional[dict]:
72
+ return self._data.get(key)
73
+
74
+ def put(self, key: str, value: dict) -> None:
75
+ with self._lock:
76
+ self._data[key] = value
77
+ tmp = f"{self.path}.tmp"
78
+ with open(tmp, "w", encoding="utf-8") as fh:
79
+ json.dump(self._data, fh, indent=2)
80
+ os.replace(tmp, self.path) # atomic-ish within a single process
81
+
82
+ def close(self) -> None:
83
+ pass
84
+
85
+
86
+ # --------------------------------------------------------------------------
87
+ # SQLite (default) — file-based, no server, concurrency-safe via WAL
88
+ # --------------------------------------------------------------------------
89
+ class SqliteStore:
90
+ def __init__(self, path: str) -> None:
91
+ import sqlite3
92
+ self.path = path
93
+ self._lock = threading.Lock()
94
+ self._conn = sqlite3.connect(path, check_same_thread=False)
95
+ self._conn.execute("PRAGMA journal_mode=WAL")
96
+ self._conn.execute("PRAGMA busy_timeout=5000")
97
+ self._conn.execute(
98
+ "CREATE TABLE IF NOT EXISTS kv (key TEXT PRIMARY KEY, value TEXT NOT NULL)")
99
+ self._conn.commit()
100
+
101
+ def get(self, key: str) -> Optional[dict]:
102
+ cur = self._conn.execute("SELECT value FROM kv WHERE key = ?", (key,))
103
+ row = cur.fetchone()
104
+ return json.loads(row[0]) if row else None
105
+
106
+ def put(self, key: str, value: dict) -> None:
107
+ with self._lock:
108
+ self._conn.execute(
109
+ "INSERT INTO kv (key, value) VALUES (?, ?) "
110
+ "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
111
+ (key, json.dumps(value)))
112
+ self._conn.commit()
113
+
114
+ def close(self) -> None:
115
+ self._conn.close()
116
+
117
+
118
+ # --------------------------------------------------------------------------
119
+ # Redis / Valkey (optional deps, lazy import).
120
+ # Valkey is the open-source Redis fork; the two speak the same wire protocol,
121
+ # so a single client resolver + get/put serves both. Per the Aiven docs, the
122
+ # Valkey client is built with the module-level `valkey.from_url(uri)`.
123
+ # --------------------------------------------------------------------------
124
+ def _redis_proto_client(url: str, prefer: str = "redis"):
125
+ """Build a client for any redis-protocol server (Redis or Valkey).
126
+
127
+ Both drivers are wire-compatible and either can serve either scheme, so we
128
+ try the preferred driver first, then the other, normalizing the URL scheme
129
+ for whichever library is used. Managed Valkey (e.g. Aiven) hands out a TLS
130
+ URI — pass it straight through as valkey:// / valkeys:// / rediss://.
131
+ """
132
+ order = ["valkey", "redis"] if prefer == "valkey" else ["redis", "valkey"]
133
+ last_err = None
134
+ for lib in order:
135
+ try:
136
+ mod = __import__(lib) # valkey-py or redis-py
137
+ except ImportError as exc:
138
+ last_err = exc
139
+ continue
140
+ u = url
141
+ if lib == "redis": # redis-py doesn't know valkey://
142
+ u = u.replace("valkeys://", "rediss://", 1).replace("valkey://", "redis://", 1)
143
+ else: # valkey-py: normalize redis:// -> valkey://
144
+ u = u.replace("rediss://", "valkeys://", 1).replace("redis://", "valkey://", 1)
145
+ return mod.from_url(u) # module-level from_url (both expose it)
146
+ raise ImportError(
147
+ "This cache backend needs the 'valkey' or 'redis' package. Install one "
148
+ "with: pip install bank-statement-mapper[valkey] (or [redis]). Both "
149
+ "are optional — the default SQLite backend needs nothing extra."
150
+ ) from last_err
151
+
152
+
153
+ class _RedisProtocolStore:
154
+ def __init__(self, client, prefix: str = "bankmap:") -> None:
155
+ self._r = client
156
+ self._prefix = prefix
157
+
158
+ def get(self, key: str) -> Optional[dict]:
159
+ raw = self._r.get(self._prefix + key)
160
+ return json.loads(raw) if raw else None # json.loads accepts bytes
161
+
162
+ def put(self, key: str, value: dict) -> None:
163
+ self._r.set(self._prefix + key, json.dumps(value))
164
+
165
+ def close(self) -> None:
166
+ pass
167
+
168
+
169
+ class RedisStore(_RedisProtocolStore):
170
+ def __init__(self, url: str, prefix: str = "bankmap:") -> None:
171
+ super().__init__(_redis_proto_client(url, prefer="redis"), prefix)
172
+
173
+
174
+ class ValkeyStore(_RedisProtocolStore):
175
+ """Valkey (the open-source Redis fork). Uses valkey-py (`valkey.from_url`)
176
+ if installed, else falls back to the wire-compatible redis-py."""
177
+ def __init__(self, url: str, prefix: str = "bankmap:") -> None:
178
+ super().__init__(_redis_proto_client(url, prefer="valkey"), prefix)
179
+
180
+
181
+ # --------------------------------------------------------------------------
182
+ # Postgres (optional dep, lazy import)
183
+ # --------------------------------------------------------------------------
184
+ class PostgresStore:
185
+ def __init__(self, url: str, table: str = "engine_kv") -> None:
186
+ try:
187
+ import psycopg
188
+ except ImportError as exc:
189
+ raise ImportError(
190
+ "The postgres cache backend needs the 'psycopg' package. Install "
191
+ "it with: pip install bank-statement-mapper[postgres]. It is "
192
+ "optional — the default SQLite backend needs nothing extra."
193
+ ) from exc
194
+ self._table = table
195
+ self._conn = psycopg.connect(url, autocommit=True)
196
+ self._conn.execute(
197
+ f"CREATE TABLE IF NOT EXISTS {table} "
198
+ "(key TEXT PRIMARY KEY, value JSONB NOT NULL)")
199
+
200
+ def get(self, key: str) -> Optional[dict]:
201
+ cur = self._conn.execute(
202
+ f"SELECT value FROM {self._table} WHERE key = %s", (key,))
203
+ row = cur.fetchone()
204
+ return row[0] if row else None
205
+
206
+ def put(self, key: str, value: dict) -> None:
207
+ self._conn.execute(
208
+ f"INSERT INTO {self._table} (key, value) VALUES (%s, %s) "
209
+ "ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value",
210
+ (key, json.dumps(value)))
211
+
212
+ def close(self) -> None:
213
+ self._conn.close()
214
+
215
+
216
+ # --------------------------------------------------------------------------
217
+ # The factory
218
+ # --------------------------------------------------------------------------
219
+ def open_store(url: Optional[str]) -> KeyValueStore:
220
+ """Return a KeyValueStore for a URL/path. `None` -> in-memory."""
221
+ if not url or url == "memory://" or url == "memory:":
222
+ return MemoryStore()
223
+ if url.startswith(("valkey://", "valkeys://")):
224
+ return ValkeyStore(url)
225
+ if url.startswith(("redis://", "rediss://")):
226
+ return RedisStore(url)
227
+ if url.startswith(("postgresql://", "postgres://")):
228
+ return PostgresStore(url)
229
+ if url.startswith("sqlite://"):
230
+ # sqlite:///abs/or/rel.db -> strip scheme
231
+ path = url[len("sqlite:///"):] if url.startswith("sqlite:///") else url[len("sqlite://"):]
232
+ return SqliteStore(path or ":memory:")
233
+ if url.startswith("file://"):
234
+ url = url[len("file://"):]
235
+ # bare path: choose by extension
236
+ if url.endswith((".db", ".sqlite", ".sqlite3")):
237
+ return SqliteStore(url)
238
+ return JsonFileStore(url)