titto 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
titto/__init__.py ADDED
@@ -0,0 +1,629 @@
1
+ import collections
2
+ import contextlib
3
+ import dataclasses
4
+ import json
5
+ import pathlib
6
+ import typing as t
7
+ import urllib.parse
8
+
9
+ import cyclopts
10
+ import pydantic
11
+ from myrtille.lib import cfg as myrtille_cfg
12
+ from myrtille.lib import db
13
+ from myrtille.mysql import export, parser
14
+ from myrtille.mysql import types as myrtille_types
15
+
16
+ from . import util
17
+
18
+ CACHE_VERSION = 1
19
+ TABLES_ADAPTER = pydantic.TypeAdapter(dict[str, myrtille_types.Table])
20
+
21
+
22
+ type Ddls = t.Sequence[str]
23
+ type TableNameMap = t.Mapping[str, myrtille_types.Table]
24
+ type Row = dict[str, t.Any]
25
+ type RowKey = tuple[t.Hashable, ...]
26
+ type TableRows = dict[str, dict[RowKey, Row]]
27
+ type RefId = tuple[str, str]
28
+
29
+
30
+ @dataclasses.dataclass(frozen=True, slots=True)
31
+ class Ref:
32
+ src: myrtille_types.Table
33
+ dst: myrtille_types.Table
34
+ constraint: myrtille_types.ForeignConstraint
35
+
36
+ @property
37
+ def id(self):
38
+ return f'{self.src.name},{self.constraint.name or ""}'
39
+
40
+
41
+ type TableNameRefMap = t.Mapping[str, t.Sequence[Ref]]
42
+ type TableRefPathMap = t.Mapping[str, t.Sequence[Ref]]
43
+
44
+
45
+ @dataclasses.dataclass(frozen=True, slots=True)
46
+ class TableNameRefMaps:
47
+ forward: TableNameRefMap
48
+ reverse: TableNameRefMap
49
+
50
+
51
+ def quote_identifier(identifier: str) -> str:
52
+ return f'`{identifier.replace("`", "``")}`'
53
+
54
+
55
+ def quote_table(schema_name: str, table_name: str) -> str:
56
+ return f'{quote_identifier(schema_name)}.{quote_identifier(table_name)}'
57
+
58
+
59
+ def column_list(columns: t.Iterable[str]) -> str:
60
+ return ', '.join(quote_identifier(column) for column in columns)
61
+
62
+
63
+ def qualified_column(alias: str, column: str) -> str:
64
+ return f'{quote_identifier(alias)}.{quote_identifier(column)}'
65
+
66
+
67
+ def placeholders(count: int) -> str:
68
+ return ', '.join('%s' for _ in range(count))
69
+
70
+
71
+ def is_ref_required(ref: Ref):
72
+ column_name_map = {column.name: column for column in ref.src.columns}
73
+ return all(column_name_map[column_name].non_nullable for column_name in ref.constraint.columns)
74
+
75
+
76
+ def has_unique_key(table: myrtille_types.Table):
77
+ return any(
78
+ isinstance(constraint, myrtille_types.UniqueConstraint) for constraint in table.constraints
79
+ )
80
+
81
+
82
+ def parse_count(result: t.Sequence[object]) -> int:
83
+ count = result[0]
84
+ if not isinstance(count, int):
85
+ raise TypeError(f'Expected count to be an int, got {type(count).__name__}')
86
+ return count
87
+
88
+
89
+ def get_primary_key(table: myrtille_types.Table):
90
+ for constraint in table.constraints:
91
+ if isinstance(constraint, myrtille_types.PrimaryConstraint):
92
+ return constraint
93
+ raise ValueError(f'Table {table.name!r} has no primary key')
94
+
95
+
96
+ def get_shortest_ref_path(
97
+ ref_maps: TableNameRefMaps, *, src_table_name: str, dst_table_name: str
98
+ ) -> list[Ref]:
99
+ if src_table_name == dst_table_name:
100
+ return []
101
+
102
+ visited = {src_table_name}
103
+ queue = collections.deque[tuple[str, list[Ref]]]([(src_table_name, [])])
104
+ while queue:
105
+ table_name, path = queue.popleft()
106
+ for ref in ref_maps.forward.get(table_name, []):
107
+ next_table_name = ref.dst.name
108
+ if next_table_name in visited:
109
+ continue
110
+ next_path = [*path, ref]
111
+ if next_table_name == dst_table_name:
112
+ return next_path
113
+ visited.add(next_table_name)
114
+ queue.append((next_table_name, next_path))
115
+
116
+ raise ValueError(
117
+ f'Table {src_table_name!r} has no forward foreign-key path '
118
+ f'to tenant table {dst_table_name!r}'
119
+ )
120
+
121
+
122
+ @dataclasses.dataclass(frozen=True, slots=True)
123
+ class Context:
124
+ cnx: db.Connection
125
+ schema_name: str
126
+ tenant_table_name: str
127
+ tenant_pk_value: t.Sequence[object]
128
+ commit: bool
129
+ cache: pathlib.Path | None
130
+
131
+ @contextlib.asynccontextmanager
132
+ @staticmethod
133
+ async def create(
134
+ db_config: myrtille_cfg.Database,
135
+ *,
136
+ schema_name: str,
137
+ tenant_table_name: str,
138
+ tenant_pk_value: t.Sequence[object],
139
+ commit: bool | None,
140
+ cache: pathlib.Path | None,
141
+ ):
142
+
143
+ async with db.make_database(db_config) as database, database.acquire() as cnx:
144
+ await cnx.execute(f'USE {quote_identifier(schema_name)}')
145
+ yield Context(
146
+ cnx,
147
+ schema_name=schema_name,
148
+ tenant_table_name=tenant_table_name,
149
+ tenant_pk_value=tenant_pk_value,
150
+ commit=commit or False,
151
+ cache=cache,
152
+ )
153
+
154
+ async def get_ddls(self) -> Ddls:
155
+ return await export.export(schema_name=self.schema_name, config=self.cnx.database.config)
156
+
157
+ async def load_tables(self) -> TableNameMap:
158
+ if self.cache is not None and self.cache.exists():
159
+ cached_tables = self.load_tables_cache(self.cache)
160
+ if cached_tables is not None:
161
+ return cached_tables
162
+
163
+ tables = self.get_tables(await self.get_ddls())
164
+ if self.cache is not None:
165
+ self.write_tables_cache(self.cache, tables)
166
+ return tables
167
+
168
+ def get_tables_cache_metadata(self) -> dict[str, str | int]:
169
+ config = self.cnx.database.config
170
+ return {
171
+ 'version': CACHE_VERSION,
172
+ 'host': config.host,
173
+ 'port': config.port,
174
+ 'schema': self.schema_name,
175
+ }
176
+
177
+ def load_tables_cache(self, cache_path: pathlib.Path) -> TableNameMap | None:
178
+ try:
179
+ payload = json.loads(cache_path.read_text(encoding='utf-8'))
180
+ except OSError, UnicodeDecodeError, json.JSONDecodeError:
181
+ return None
182
+
183
+ if not util.is_dict(payload):
184
+ return None
185
+
186
+ for key, value in self.get_tables_cache_metadata().items():
187
+ if payload.get(key) != value:
188
+ return None
189
+
190
+ try:
191
+ return TABLES_ADAPTER.validate_python(payload.get('tables'))
192
+ except pydantic.ValidationError:
193
+ return None
194
+
195
+ def write_tables_cache(self, cache_path: pathlib.Path, tables: TableNameMap) -> None:
196
+ payload: dict[str, object] = {
197
+ **self.get_tables_cache_metadata(),
198
+ 'tables': TABLES_ADAPTER.dump_python(dict(tables), mode='json'),
199
+ }
200
+ cache_path.write_text(json.dumps(payload, sort_keys=True), encoding='utf-8')
201
+
202
+ def get_tables(self, ddls: t.Sequence[str]) -> TableNameMap:
203
+ ddl_parser = parser.DDLParser.make()
204
+ return {
205
+ table.name: table
206
+ for ddl in ddls
207
+ for table in [ddl_parser.parse(ddl, parser.Transformer)]
208
+ }
209
+
210
+ def validate_key_values(self, tables: TableNameMap) -> None:
211
+ table = tables[self.tenant_table_name]
212
+ primary_key = get_primary_key(table)
213
+ if len(self.tenant_pk_value) != len(primary_key.key_list):
214
+ joined_primary_key = ', '.join(part.identifier for part in primary_key.key_list)
215
+ raise ValueError(
216
+ f'{self.schema_name} needs {len(primary_key.key_list)} value(s) for '
217
+ f'{table.name}({joined_primary_key}), got {len(self.tenant_pk_value)}'
218
+ )
219
+
220
+ def get_ref_maps(self, tables: TableNameMap):
221
+ refs = [
222
+ Ref(src=table, dst=tables[constraint.references.ref_table], constraint=constraint)
223
+ for table in tables.values()
224
+ for constraint in table.constraints
225
+ if isinstance(constraint, myrtille_types.ForeignConstraint)
226
+ ]
227
+ return TableNameRefMaps(
228
+ forward=util.groupby(refs, lambda r: r.src.name),
229
+ reverse=util.groupby(refs, lambda r: r.dst.name),
230
+ )
231
+
232
+ def get_refs_to_tenant(
233
+ self, ref_maps: TableNameRefMaps, *, required_only: bool = True
234
+ ) -> TableRefPathMap:
235
+ refs_to_tenant: dict[str, list[Ref]] = {}
236
+ enqueued = {self.tenant_table_name}
237
+ queue = collections.deque[tuple[str, list[Ref]]]([(self.tenant_table_name, [])])
238
+ while queue:
239
+ table_name, path_to_tenant = queue.popleft()
240
+ refs_to_tenant[table_name] = path_to_tenant
241
+ for ref in ref_maps.reverse.get(table_name, []):
242
+ if required_only and not is_ref_required(ref):
243
+ continue
244
+ next_table_name = ref.src.name
245
+ if next_table_name == table_name:
246
+ continue
247
+ if next_table_name in refs_to_tenant or next_table_name in enqueued:
248
+ if required_only:
249
+ raise ValueError(
250
+ f'Table {next_table_name!r} has multiple foreign-key paths '
251
+ f'to tenant table {self.tenant_table_name!r}'
252
+ )
253
+ continue
254
+ enqueued.add(next_table_name)
255
+ queue.append((next_table_name, [ref, *path_to_tenant]))
256
+ return refs_to_tenant
257
+
258
+ def get_cleanup_tables(
259
+ self, tables: TableNameMap, ref_maps: TableNameRefMaps
260
+ ) -> list[myrtille_types.Table]:
261
+ refs_to_tenant = self.get_refs_to_tenant(ref_maps, required_only=False)
262
+ candidate_table_names = {
263
+ table_name
264
+ for table_name, _ in util.explore_all(
265
+ refs_to_tenant, lambda n: [ref.dst.name for ref in ref_maps.forward.get(n, [])]
266
+ )
267
+ if table_name not in refs_to_tenant and not has_unique_key(tables[table_name])
268
+ }
269
+ return [tables[table_name] for table_name in candidate_table_names]
270
+
271
+ def get_sorted_tables(self, tables: TableNameMap, ref_maps: TableNameRefMaps):
272
+ return list(
273
+ reversed(
274
+ list(
275
+ util.topological_sort(
276
+ tables,
277
+ lambda n: [
278
+ ref.dst.name
279
+ for ref in ref_maps.forward.get(n, [])
280
+ if is_ref_required(ref)
281
+ ],
282
+ )
283
+ )
284
+ )
285
+ )
286
+
287
+ def get_tenant_table_for_path(
288
+ self, table: myrtille_types.Table, path_to_tenant: t.Sequence[Ref]
289
+ ) -> myrtille_types.Table:
290
+ current = table
291
+ for ref in path_to_tenant:
292
+ if ref.src.name != current.name:
293
+ raise ValueError(
294
+ f'Path from table {table.name!r} uses foreign key '
295
+ f'{ref.constraint.name!r} from {ref.src.name!r} '
296
+ f'after {current.name!r}'
297
+ )
298
+ current = ref.dst
299
+ if current.name != self.tenant_table_name:
300
+ raise ValueError(
301
+ f'Path from table {table.name!r} ends at {current.name!r}, '
302
+ f'not tenant table {self.tenant_table_name!r}'
303
+ )
304
+ return current
305
+
306
+ def get_tenant_filter(
307
+ self, path: t.Sequence[Ref], tenant_table: myrtille_types.Table
308
+ ) -> tuple[list[str], str, t.Sequence[object]]:
309
+ if tenant_table.name != self.tenant_table_name:
310
+ raise ValueError(
311
+ f'Expected tenant table {self.tenant_table_name!r}, got {tenant_table.name!r}'
312
+ )
313
+
314
+ primary_key = get_primary_key(tenant_table)
315
+ tenant_pk_columns = tuple(part.identifier for part in primary_key.key_list)
316
+
317
+ query_path = path
318
+ where_values: t.Sequence[object] = self.tenant_pk_value
319
+ if path:
320
+ last_ref = path[-1]
321
+ last_ref_columns = tuple(last_ref.constraint.references.ref_columns)
322
+ if set(last_ref_columns) == set(tenant_pk_columns) and len(last_ref_columns) == len(
323
+ tenant_pk_columns
324
+ ):
325
+ query_path = path[:-1]
326
+ tenant_key_by_column = dict(
327
+ zip(tenant_pk_columns, self.tenant_pk_value, strict=True)
328
+ )
329
+ tenant_alias = f't{len(query_path)}'
330
+ where_columns = last_ref.constraint.columns
331
+ where_values = tuple(tenant_key_by_column[column] for column in last_ref_columns)
332
+ else:
333
+ tenant_alias = f't{len(path)}'
334
+ where_columns = tenant_pk_columns
335
+ else:
336
+ tenant_alias = 't0'
337
+ where_columns = tenant_pk_columns
338
+
339
+ joins: list[str] = []
340
+ for i, ref in enumerate(query_path, start=1):
341
+ src_alias = f't{i - 1}'
342
+ dst_alias = f't{i}'
343
+ join_condition = ' AND '.join(
344
+ f'{qualified_column(src_alias, src_column)} = '
345
+ f'{qualified_column(dst_alias, dst_column)}'
346
+ for src_column, dst_column in zip(
347
+ ref.constraint.columns, ref.constraint.references.ref_columns, strict=True
348
+ )
349
+ )
350
+ joins.append(
351
+ f'JOIN {quote_table(self.schema_name, ref.dst.name)} '
352
+ f'AS {quote_identifier(dst_alias)} ON {join_condition}'
353
+ )
354
+
355
+ where = ' AND '.join(
356
+ f'{qualified_column(tenant_alias, column)} = %s' for column in where_columns
357
+ )
358
+ return joins, where, where_values
359
+
360
+ async def delete_by_tenant(
361
+ self, table: myrtille_types.Table, path_to_tenant: t.Sequence[Ref]
362
+ ) -> None:
363
+ tenant_table = self.get_tenant_table_for_path(table, path_to_tenant)
364
+ joins, where, where_values = self.get_tenant_filter(path_to_tenant, tenant_table)
365
+ root_alias_prefix = f'{quote_identifier("t0")}.'
366
+ if joins:
367
+ stmt = ' '.join(
368
+ [
369
+ f'DELETE {quote_identifier("t0")}',
370
+ f'FROM {quote_table(self.schema_name, table.name)} AS {quote_identifier("t0")}',
371
+ *joins,
372
+ f'WHERE {where}',
373
+ ]
374
+ )
375
+ else:
376
+ stmt = ' '.join(
377
+ [
378
+ f'DELETE FROM {quote_table(self.schema_name, table.name)}',
379
+ f'WHERE {where.replace(root_alias_prefix, "")}',
380
+ ]
381
+ )
382
+ await self.cnx.execute(stmt, params=where_values)
383
+
384
+ async def count_by_tenant(
385
+ self, table: myrtille_types.Table, path_to_tenant: t.Sequence[Ref]
386
+ ) -> int:
387
+ tenant_table = self.get_tenant_table_for_path(table, path_to_tenant)
388
+ joins, where, where_values = self.get_tenant_filter(path_to_tenant, tenant_table)
389
+ result = t.cast(
390
+ t.Sequence[object],
391
+ await self.cnx.fetch_optional(
392
+ ' '.join(
393
+ [
394
+ 'SELECT COUNT(*)',
395
+ f'FROM {quote_table(self.schema_name, table.name)}',
396
+ f'AS {quote_identifier("t0")}',
397
+ *joins,
398
+ f'WHERE {where}',
399
+ ]
400
+ ),
401
+ where_values,
402
+ ),
403
+ )
404
+ return parse_count(result)
405
+
406
+ def get_unreferenced_filter(self, refs: t.Sequence[Ref]) -> str:
407
+ if not refs:
408
+ return 'TRUE'
409
+ conditions: list[str] = []
410
+ for i, ref in enumerate(refs):
411
+ ref_alias = f'r{i}'
412
+ join_condition = ' AND '.join(
413
+ f'{qualified_column(ref_alias, src_column)} = {qualified_column("t0", dst_column)}'
414
+ for src_column, dst_column in zip(
415
+ ref.constraint.columns, ref.constraint.references.ref_columns, strict=True
416
+ )
417
+ )
418
+ conditions.append(
419
+ ' '.join(
420
+ [
421
+ 'NOT EXISTS (',
422
+ 'SELECT 1',
423
+ f'FROM {quote_table(self.schema_name, ref.src.name)}',
424
+ f'AS {quote_identifier(ref_alias)}',
425
+ f'WHERE {join_condition}',
426
+ ')',
427
+ ]
428
+ )
429
+ )
430
+ return ' AND '.join(conditions)
431
+
432
+ async def count_unreferenced_rows(
433
+ self, table: myrtille_types.Table, ref_maps: TableNameRefMaps
434
+ ) -> int:
435
+ where = self.get_unreferenced_filter(ref_maps.reverse.get(table.name, []))
436
+ result = t.cast(
437
+ t.Sequence[object],
438
+ await self.cnx.fetch_optional(
439
+ ' '.join(
440
+ [
441
+ 'SELECT COUNT(*)',
442
+ f'FROM {quote_table(self.schema_name, table.name)}',
443
+ f'AS {quote_identifier("t0")}',
444
+ f'WHERE {where}',
445
+ ]
446
+ )
447
+ ),
448
+ )
449
+ return parse_count(result)
450
+
451
+ async def cleanup_unreferenced_rows(
452
+ self, table: myrtille_types.Table, ref_maps: TableNameRefMaps
453
+ ) -> int:
454
+ count = await self.count_unreferenced_rows(table, ref_maps)
455
+ if count == 0:
456
+ return 0
457
+
458
+ where = self.get_unreferenced_filter(ref_maps.reverse.get(table.name, []))
459
+ await self.cnx.execute(
460
+ ' '.join(
461
+ [
462
+ f'DELETE {quote_identifier("t0")}',
463
+ f'FROM {quote_table(self.schema_name, table.name)} AS {quote_identifier("t0")}',
464
+ f'WHERE {where}',
465
+ ]
466
+ )
467
+ )
468
+ return count
469
+
470
+ async def set_optional_ref_to_null_by_tenant(
471
+ self, ref: Ref, path_to_tenant: t.Sequence[Ref]
472
+ ) -> None:
473
+ if is_ref_required(ref):
474
+ raise ValueError(
475
+ f'Foreign key {ref.constraint.name!r} on table {ref.src.name!r} is required'
476
+ )
477
+
478
+ tenant_table = self.get_tenant_table_for_path(ref.dst, path_to_tenant)
479
+ path = [ref, *path_to_tenant]
480
+ joins, where, where_values = self.get_tenant_filter(path, tenant_table)
481
+ column_name_map = {column.name: column for column in ref.src.columns}
482
+ assignments = ', '.join(
483
+ f'{qualified_column("t0", column)} = NULL'
484
+ for column in ref.constraint.columns
485
+ if not column_name_map[column].non_nullable
486
+ )
487
+ await self.cnx.execute(
488
+ ' '.join(
489
+ [
490
+ f'UPDATE {quote_table(self.schema_name, ref.src.name)}',
491
+ f'AS {quote_identifier("t0")}',
492
+ *joins,
493
+ f'SET {assignments}',
494
+ f'WHERE {where}',
495
+ ]
496
+ ),
497
+ params=where_values,
498
+ )
499
+
500
+
501
+ APP = cyclopts.App()
502
+
503
+
504
+ def parse_db_config(url: str, *, echo: bool | None):
505
+ parsed = urllib.parse.urlparse(url)
506
+ if parsed.hostname is None:
507
+ raise ValueError('Missing host in database URL')
508
+ schema_name = parsed.path.lstrip('/') if parsed.path else None
509
+ if not schema_name:
510
+ raise ValueError('Missing schema name in database URL')
511
+ user = urllib.parse.unquote(parsed.username) if parsed.username else None
512
+ if not user:
513
+ raise ValueError('Missing user in database URL')
514
+ password = urllib.parse.unquote(parsed.password) if parsed.password else None
515
+ if not password:
516
+ raise ValueError('Missing password in database URL')
517
+
518
+ return (
519
+ myrtille_cfg.Database(
520
+ user=user,
521
+ password=password,
522
+ host=parsed.hostname,
523
+ port=parsed.port or 3306,
524
+ echo=echo,
525
+ pool_size=1,
526
+ ),
527
+ schema_name,
528
+ )
529
+
530
+
531
+ def parse_key_values(raw_key: str) -> t.Sequence[object]:
532
+ try:
533
+ value = json.loads(raw_key)
534
+ except json.JSONDecodeError:
535
+ return (raw_key,)
536
+ if util.is_list(value):
537
+ return tuple(value)
538
+ return (value,)
539
+
540
+
541
+ @APP.command
542
+ async def delete(
543
+ url: str,
544
+ tenant_table: str,
545
+ tenant_pk_value: str,
546
+ *,
547
+ skip: set[str] | None = None,
548
+ commit: bool | None = None,
549
+ cache: pathlib.Path | None = None,
550
+ echo: bool | None = None,
551
+ ) -> None:
552
+ db_config, schema_name = parse_db_config(url, echo=echo)
553
+ async with Context.create(
554
+ db_config,
555
+ schema_name=schema_name,
556
+ tenant_table_name=tenant_table,
557
+ tenant_pk_value=parse_key_values(tenant_pk_value),
558
+ commit=commit,
559
+ cache=cache,
560
+ ) as ctx:
561
+ tables = await ctx.load_tables()
562
+ ctx.validate_key_values(tables)
563
+ ref_maps = ctx.get_ref_maps(tables)
564
+ refs_to_tenant = ctx.get_refs_to_tenant(ref_maps)
565
+
566
+ for table_name in skip or list[str]():
567
+ path_to_tenant = refs_to_tenant.get(table_name)
568
+ if path_to_tenant is not None:
569
+ count = await ctx.count_by_tenant(tables[table_name], path_to_tenant)
570
+ if count != 0:
571
+ raise RuntimeError(f'Expected 0 {table_name} in tenant but got {count}')
572
+
573
+ for refs in ref_maps.forward.values():
574
+ for ref in refs:
575
+ if (
576
+ not is_ref_required(ref)
577
+ and (path_to_tenant := refs_to_tenant.get(ref.dst.name)) is not None
578
+ ):
579
+ await ctx.set_optional_ref_to_null_by_tenant(ref, path_to_tenant)
580
+
581
+ for table_name in ctx.get_sorted_tables(tables, ref_maps):
582
+ if (path_to_tenant := refs_to_tenant.get(table_name)) is not None:
583
+ await ctx.delete_by_tenant(tables[table_name], path_to_tenant)
584
+ if ctx.commit:
585
+ await ctx.cnx.commit()
586
+ else:
587
+ await ctx.cnx.execute('ROLLBACK')
588
+
589
+
590
+ @APP.command
591
+ async def cleanup(
592
+ url: str,
593
+ tenant_table: str,
594
+ *,
595
+ commit: bool | None = None,
596
+ cache: pathlib.Path | None = None,
597
+ echo: bool | None = None,
598
+ ) -> None:
599
+ db_config, schema_name = parse_db_config(url, echo=echo)
600
+ async with Context.create(
601
+ db_config,
602
+ schema_name=schema_name,
603
+ tenant_table_name=tenant_table,
604
+ tenant_pk_value=(),
605
+ commit=commit,
606
+ cache=cache,
607
+ ) as ctx:
608
+ tables = await ctx.load_tables()
609
+ if tenant_table not in tables:
610
+ raise ValueError(f'Unknown tenant table {tenant_table!r}')
611
+
612
+ ref_maps = ctx.get_ref_maps(tables)
613
+ cleanup_tables = ctx.get_cleanup_tables(tables, ref_maps)
614
+ while True:
615
+ deleted_count = 0
616
+ for table in cleanup_tables:
617
+ count = await ctx.cleanup_unreferenced_rows(table, ref_maps)
618
+ deleted_count += count
619
+ if deleted_count == 0:
620
+ break
621
+
622
+ if ctx.commit:
623
+ await ctx.cnx.commit()
624
+ else:
625
+ await ctx.cnx.execute('ROLLBACK')
626
+
627
+
628
+ def main():
629
+ APP(print_error=True, exit_on_error=False)
titto/util.py ADDED
@@ -0,0 +1,63 @@
1
+ import collections
2
+ import graphlib
3
+ import typing as t
4
+
5
+
6
+ def explore_all[T: t.Hashable](
7
+ root: t.Iterable[T], explorer: t.Callable[[T], t.Iterable[T]]
8
+ ) -> t.Iterator[tuple[T, list[T]]]:
9
+ explored: set[T] = set()
10
+ enqueued: set[T] = set()
11
+ queue: collections.deque[tuple[T, list[T]]] = collections.deque()
12
+
13
+ for node in root:
14
+ if node in enqueued:
15
+ continue
16
+ enqueued.add(node)
17
+ queue.append((node, []))
18
+
19
+ while queue:
20
+ node, path_to_node = queue.popleft()
21
+ if node in explored:
22
+ continue
23
+ explored.add(node)
24
+ path_to_neighbor = [*path_to_node, node]
25
+ yield node, path_to_node
26
+ for neighbor in explorer(node):
27
+ if neighbor in explored or neighbor in enqueued:
28
+ continue
29
+ enqueued.add(neighbor)
30
+ queue.append((neighbor, path_to_neighbor))
31
+
32
+
33
+ def topological_sort[T: t.Hashable](
34
+ root: t.Iterable[T], explorer: t.Callable[[T], t.Iterable[T]]
35
+ ) -> t.Iterable[T]:
36
+ sorter = graphlib.TopologicalSorter[T]()
37
+ visited = set[T]()
38
+ stack = list[T](root)
39
+ while stack:
40
+ node = stack.pop()
41
+ if node not in visited:
42
+ visited.add(node)
43
+ dependencies = list(explorer(node))
44
+ sorter.add(node, *dependencies)
45
+ stack.extend(dependencies)
46
+ return sorter.static_order()
47
+
48
+
49
+ def groupby[T, K: t.Hashable](
50
+ values: t.Iterable[T], key: t.Callable[[T], K]
51
+ ) -> t.Mapping[K, t.Sequence[T]]:
52
+ key_values_map: collections.defaultdict[K, list[T]] = collections.defaultdict(list)
53
+ for value in values:
54
+ key_values_map[key(value)].append(value)
55
+ return dict(key_values_map)
56
+
57
+
58
+ def is_list(o: object) -> t.TypeGuard[list[object]]:
59
+ return isinstance(o, list)
60
+
61
+
62
+ def is_dict(o: object) -> t.TypeGuard[dict[object, object]]:
63
+ return isinstance(o, dict)
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.3
2
+ Name: titto
3
+ Version: 0.1.0
4
+ Summary: Delete tenant-scoped MySQL data by walking foreign-key relationships
5
+ Author: Florian Daude
6
+ Author-email: Florian Daude <floriandaude@hotmail.fr>
7
+ Requires-Dist: cyclopts>=4.16.1
8
+ Requires-Dist: myrtille>=0.1.3
9
+ Requires-Dist: pydantic>=2.13.4
10
+ Requires-Dist: slcfg>=0.3.4
11
+ Requires-Python: >=3.14
12
+ Description-Content-Type: text/markdown
13
+
14
+ # Titto
15
+
16
+ Titto deletes tenant-scoped rows from a MySQL schema by reading table metadata,
17
+ following foreign-key paths back to a tenant table, nulling optional references,
18
+ and deleting rows in dependency order.
19
+
20
+ ## Warnings
21
+
22
+ This is a destructive database tool. Run it against production only after testing
23
+ the exact command against a restored copy or staging database.
24
+
25
+ `--commit` permanently commits the deletes. Without `--commit`, Titto still runs
26
+ the `UPDATE` and `DELETE` statements inside a transaction, then rolls them back.
27
+ That dry run can still take locks, fire triggers, and affect non-transactional
28
+ tables.
29
+
30
+ Take a backup before any committed run. Prefer a database snapshot or logical dump
31
+ that you have already tested restoring. Keep the backup until the tenant removal
32
+ has been verified.
33
+
34
+ ## Usage
35
+
36
+ Show commands:
37
+
38
+ ```sh
39
+ uv run titto --help
40
+ ```
41
+
42
+ Dry-run a single-column tenant key and cache table metadata as JSON:
43
+
44
+ ```sh
45
+ uv run titto delete "$DB_URL" tenant 42 --cache .tables.json
46
+ ```
47
+
48
+ Commit the delete:
49
+
50
+ ```sh
51
+ uv run titto delete "$DB_URL" tenant 42 --cache .tables.json --commit
52
+ ```
53
+
54
+ Use a composite tenant key by passing JSON:
55
+
56
+ ```sh
57
+ uv run titto delete "$DB_URL" tenant '[42, "eu"]' --cache .tables.json
58
+ ```
59
+
60
+ Require selected tables to be empty for the tenant before deleting anything:
61
+
62
+ ```sh
63
+ uv run titto delete "$DB_URL" tenant 42 --skip invoice --skip payment
64
+ ```
65
+
66
+ Clean up unreferenced rows in tables that are not directly tenant-scoped:
67
+
68
+ ```sh
69
+ uv run titto cleanup "$DB_URL" tenant --cache .tables.json
70
+ ```
71
+
72
+ `DB_URL` is a MySQL URL such as:
73
+
74
+ ```sh
75
+ mysql://user:password@localhost:3306/app_schema
76
+ ```
77
+
78
+ Avoid putting real passwords directly in shell history. Use an environment
79
+ variable, secret manager, or another shell-safe mechanism appropriate for your
80
+ environment.
81
+
82
+ ## Cache Notes
83
+
84
+ `--cache` stores parsed table metadata as JSON. The recommended local cache path
85
+ is `.tables.json`; it is ignored by Git.
86
+
87
+ The cache is tied to the host, port, schema name, and cache format version. Titto
88
+ rejects mismatched cache metadata and reparses the database schema. If the schema
89
+ changes on the same host and schema name, delete `.tables.json` so Titto can
90
+ refresh it.
91
+
92
+ ## Development
93
+
94
+ Run the test suite:
95
+
96
+ ```sh
97
+ uv run python -m unittest
98
+ ```
@@ -0,0 +1,6 @@
1
+ titto/__init__.py,sha256=cxL--HBd5qRxOIyJgCUp_WU4qMZGKVIpxdnIF_mCAQQ,22362
2
+ titto/util.py,sha256=hfT1rRh9WwXamTct4kNoPoQTV5VjeGzrIu7OJH8qWoQ,1870
3
+ titto-0.1.0.dist-info/WHEEL,sha256=o6xtdofIa8Zz80kUveEHMWeAWtEyZSzYS1bbyKDCgzA,80
4
+ titto-0.1.0.dist-info/entry_points.txt,sha256=HtCIaae-ZjDuOvzVeNvBXDuOuwtVlS2ds4otEz99fcg,38
5
+ titto-0.1.0.dist-info/METADATA,sha256=BDz_1ZserQ9fHA1vCF4hQCsILmwtq8OVQ3fnTsniZ8o,2652
6
+ titto-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.10.4
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ titto = titto:main
3
+