winipedia-django 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ """__init__ module."""
2
+
3
+ import django
4
+ import django_stubs_ext
5
+ from django.conf import settings
6
+ from winipedia_utils.logging.logger import get_logger
7
+
8
+ logger = get_logger(__name__)
9
+
10
+ django_stubs_ext.monkeypatch()
11
+ logger.info("Monkeypatched django-stubs")
12
+
13
+ if not settings.configured:
14
+ logger.info("Configuring minimal django settings")
15
+ settings.configure(
16
+ DATABASES={
17
+ "default": {
18
+ "ENGINE": "django.db.backends.sqlite3",
19
+ "NAME": ":memory:",
20
+ }
21
+ },
22
+ )
23
+ django.setup()
@@ -0,0 +1,538 @@
1
+ """Bulk utilities for Django models.
2
+
3
+ This module provides utility functions for working with Django models,
4
+ including bulk operations and validation. These utilities help with
5
+ efficiently managing large amounts of data in Django applications.
6
+ """
7
+
8
+ from collections import defaultdict
9
+ from collections.abc import Callable, Generator, Iterable
10
+ from functools import partial
11
+ from itertools import islice
12
+ from typing import TYPE_CHECKING, Any, Literal, cast, get_args
13
+
14
+ from django.db import router, transaction
15
+ from django.db.models import (
16
+ Field,
17
+ Model,
18
+ QuerySet,
19
+ )
20
+ from django.db.models.deletion import Collector
21
+ from winipedia_utils.concurrent.multithreading import multithread_loop
22
+ from winipedia_utils.logging.logger import get_logger
23
+
24
+ from winipedia_django.database import (
25
+ hash_model_instance,
26
+ topological_sort_models,
27
+ )
28
+
29
+ if TYPE_CHECKING:
30
+ from django.contrib.contenttypes.fields import GenericForeignKey
31
+ from django.db.models.fields.related import ForeignObjectRel
32
+
33
+ logger = get_logger(__name__)
34
+
35
+ MODE_TYPES = Literal["create", "update", "delete"]
36
+ MODES = get_args(MODE_TYPES)
37
+
38
+ MODE_CREATE = MODES[0]
39
+ MODE_UPDATE = MODES[1]
40
+ MODE_DELETE = MODES[2]
41
+
42
+ STANDARD_BULK_SIZE = 1000
43
+
44
+
45
+ def bulk_create_in_steps(
46
+ model: type[Model],
47
+ bulk: Iterable[Model],
48
+ step: int = STANDARD_BULK_SIZE,
49
+ ) -> list[Model]:
50
+ """Create model instances from bulk and saves them to the database in steps.
51
+
52
+ Takes a list of model instances and creates them in the database in steps.
53
+ This is useful when you want to create a large number of objects
54
+ in the database. It also uses multithreading to speed up the process.
55
+
56
+ Args:
57
+ model (type[Model]): The Django model class to create.
58
+ bulk (Iterable[Model]): a list of model instances to create.
59
+ step (int, optional): The step size of the bulk creation.
60
+ Defaults to STANDARD_BULK_SIZE.
61
+
62
+ Returns:
63
+ list[Model]: a list of created objects.
64
+ """
65
+ return cast(
66
+ "list[Model]",
67
+ bulk_method_in_steps(model=model, bulk=bulk, step=step, mode=MODE_CREATE),
68
+ )
69
+
70
+
71
+ def bulk_update_in_steps(
72
+ model: type[Model],
73
+ bulk: Iterable[Model],
74
+ update_fields: list[str],
75
+ step: int = STANDARD_BULK_SIZE,
76
+ ) -> int:
77
+ """Update model instances in the database in steps using multithreading.
78
+
79
+ Takes a list of model instances and updates them in the database in chunks.
80
+ This is useful when you want to update a large number of objects efficiently.
81
+ Uses multithreading to speed up the process by processing chunks in parallel.
82
+
83
+ Args:
84
+ model (type[Model]): The Django model class to update.
85
+ bulk (Iterable[Model]): A list of model instances to update.
86
+ update_fields (list[str]): List of field names to update on the models.
87
+ step (int, optional): The step size for bulk updates.
88
+ Defaults to STANDARD_BULK_SIZE.
89
+
90
+ Returns:
91
+ int: Total number of objects updated across all chunks.
92
+ """
93
+ return cast(
94
+ "int",
95
+ bulk_method_in_steps(
96
+ model=model, bulk=bulk, step=step, mode=MODE_UPDATE, fields=update_fields
97
+ ),
98
+ )
99
+
100
+
101
+ def bulk_delete_in_steps(
102
+ model: type[Model], bulk: Iterable[Model], step: int = STANDARD_BULK_SIZE
103
+ ) -> tuple[int, dict[str, int]]:
104
+ """Delete model instances from the database in steps using multithreading.
105
+
106
+ Takes a list of model instances and deletes them from the database in chunks.
107
+ This is useful when you want to delete a large number of objects efficiently.
108
+ Uses multithreading to speed up the process by processing chunks in parallel.
109
+ Also handles cascade deletions according to model relationships.
110
+
111
+ Args:
112
+ model (type[Model]): The Django model class to update.
113
+ bulk (Iterable[Model]): A list of model instances to delete.
114
+ step (int, optional): The step size for bulk deletions.
115
+ Defaults to STANDARD_BULK_SIZE.
116
+
117
+ Returns:
118
+ tuple[int, dict[str, int]]: A tuple containing the
119
+ total count of deleted objects
120
+ and a dictionary mapping model names to their deletion counts.
121
+ """
122
+ return cast(
123
+ "tuple[int, dict[str, int]]",
124
+ bulk_method_in_steps(
125
+ model=model,
126
+ bulk=bulk,
127
+ step=step,
128
+ mode=MODE_DELETE,
129
+ ),
130
+ )
131
+
132
+
133
+ def bulk_method_in_steps(
134
+ model: type[Model],
135
+ bulk: Iterable[Model],
136
+ step: int,
137
+ mode: MODE_TYPES,
138
+ **kwargs: Any,
139
+ ) -> int | tuple[int, dict[str, int]] | list[Model]:
140
+ """Execute bulk operations on model instances in steps with transaction handling.
141
+
142
+ This is the core function that handles bulk create, update, or delete operations
143
+ by dividing the work into manageable chunks and processing them with multithreading.
144
+ It includes transaction safety checks and delegates to the atomic version.
145
+
146
+ Args:
147
+ model (type[Model]): The Django model class to perform operations on.
148
+ bulk (Iterable[Model]): A list of model instances to process.
149
+ step (int): The step size for chunking the bulk operations.
150
+ mode (MODE_TYPES): The operation mode - 'create', 'update', or 'delete'.
151
+ **kwargs: Additional keyword arguments passed to the bulk operation methods.
152
+
153
+ Returns:
154
+ None | int | tuple[int, dict[str, int]] | list[Model]:
155
+ The result depends on mode:
156
+ - create: list of created model instances
157
+ - update: integer count of updated objects
158
+ - delete: tuple of (total_count, count_by_model_dict)
159
+ - None if bulk is empty
160
+ """
161
+ # check if we are inside a transaction.atomic block
162
+ _in_atomic_block = transaction.get_connection().in_atomic_block
163
+ if _in_atomic_block:
164
+ logger.info(
165
+ "BE CAREFUL USING BULK OPERATIONS INSIDE A BROADER TRANSACTION BLOCK. "
166
+ "BULKING WITH BULKS THAT DEPEND ON EACH OTHER CAN CAUSE "
167
+ "INTEGRITY ERRORS OR POTENTIAL OTHER ISSUES."
168
+ )
169
+ return bulk_method_in_steps_atomic(
170
+ model=model, bulk=bulk, step=step, mode=mode, **kwargs
171
+ )
172
+
173
+
174
+ @transaction.atomic
175
+ def bulk_method_in_steps_atomic(
176
+ model: type[Model],
177
+ bulk: Iterable[Model],
178
+ step: int,
179
+ mode: MODE_TYPES,
180
+ **kwargs: Any,
181
+ ) -> int | tuple[int, dict[str, int]] | list[Model]:
182
+ """Bulk create, update or delete the given list of objects in steps.
183
+
184
+ WHEN BULK CREATING OR UPDATING A BULK
185
+ AND THEN A SECOND BULK THAT DEPENDS ON THE FIRST BULK,
186
+ YOU WILL RUN INTO A INTEGRITY ERROR IF YOU DO THE
187
+ ENTIRE THING IN AN @transaction.atomic DECORATOR.
188
+ REMOVE THE DECORATORS THAT ARE HIGHER UP THAN THE ONE OF THIS FUNCTION
189
+ TO AVOID THIS ERROR.
190
+
191
+ Args:
192
+ model (type[Model]): The Django model class to perform operations on.
193
+ bulk (Iterable[Model]): A list of model instances to process.
194
+ step (int): number of objects to process in one chunk
195
+ mode (MODE_TYPES): The operation mode - 'create', 'update', or 'delete'.
196
+ **kwargs: Additional keyword arguments passed to the bulk operation methods.
197
+
198
+ Returns:
199
+ None | int | tuple[int, dict[str, int]] | list[Model]:
200
+ The result depends on mode:
201
+ - create: list of created model instances
202
+ - update: integer count of updated objects
203
+ - delete: tuple of (total_count, count_by_model_dict)
204
+ - None if bulk is empty
205
+ """
206
+ bulk_method = get_bulk_method(model=model, mode=mode, **kwargs)
207
+
208
+ chunks = get_step_chunks(bulk=bulk, step=step)
209
+
210
+ # multithreading significantly increases speed
211
+ result = multithread_loop(
212
+ process_function=bulk_method,
213
+ process_args=chunks,
214
+ )
215
+
216
+ return flatten_bulk_in_steps_result(result=result, mode=mode)
217
+
218
+
219
+ def get_step_chunks(
220
+ bulk: Iterable[Model], step: int
221
+ ) -> Generator[tuple[list[Model]], None, None]:
222
+ """Yield chunks of the given size from the bulk.
223
+
224
+ Args:
225
+ bulk (Iterable[Model]): The bulk to chunk.
226
+ step (int): The size of each chunk.
227
+
228
+ Yields:
229
+ Generator[list[Model], None, None]: Chunks of the bulk.
230
+ """
231
+ bulk = iter(bulk)
232
+ while True:
233
+ chunk = list(islice(bulk, step))
234
+ if not chunk:
235
+ break
236
+ yield (chunk,) # bc concurrent_loop expects a tuple of args
237
+
238
+
239
+ def get_bulk_method(
240
+ model: type[Model], mode: MODE_TYPES, **kwargs: Any
241
+ ) -> Callable[[list[Model]], list[Model] | int | tuple[int, dict[str, int]]]:
242
+ """Get the appropriate bulk method function based on the operation mode.
243
+
244
+ Creates and returns a function that performs the specified bulk operation
245
+ (create, update, or delete) on a chunk of model instances. The returned
246
+ function is configured with the provided kwargs.
247
+
248
+ Args:
249
+ model (type[Model]): The Django model class to perform operations on.
250
+ mode (MODE_TYPES): The operation mode - 'create', 'update', or 'delete'.
251
+ **kwargs: Additional keyword arguments to pass to the bulk operation method.
252
+
253
+ Raises:
254
+ ValueError: If the mode is not one of the valid MODE_TYPES.
255
+
256
+ Returns:
257
+ Callable[[list[Model]], Any]: A function that performs the bulk operation
258
+ on a chunk of model instances.
259
+ """
260
+ bulk_method: Callable[[list[Model]], list[Model] | int | tuple[int, dict[str, int]]]
261
+ if mode == MODE_CREATE:
262
+
263
+ def bulk_create_chunk(chunk: list[Model]) -> list[Model]:
264
+ return model.objects.bulk_create(objs=chunk, **kwargs)
265
+
266
+ bulk_method = bulk_create_chunk
267
+ elif mode == MODE_UPDATE:
268
+
269
+ def bulk_update_chunk(chunk: list[Model]) -> int:
270
+ return model.objects.bulk_update(objs=chunk, **kwargs)
271
+
272
+ bulk_method = bulk_update_chunk
273
+ elif mode == MODE_DELETE:
274
+
275
+ def bulk_delete_chunk(chunk: list[Model]) -> tuple[int, dict[str, int]]:
276
+ return bulk_delete(model=model, objs=chunk, **kwargs)
277
+
278
+ bulk_method = bulk_delete_chunk
279
+ else:
280
+ msg = f"Invalid method. Must be one of {MODES}"
281
+ raise ValueError(msg)
282
+
283
+ return bulk_method
284
+
285
+
286
+ def flatten_bulk_in_steps_result(
287
+ result: list[Any], mode: str
288
+ ) -> int | tuple[int, dict[str, int]] | list[Model]:
289
+ """Flatten and aggregate results from multithreaded bulk operations.
290
+
291
+ Processes the results returned from parallel bulk operations and aggregates
292
+ them into the appropriate format based on the operation mode. Handles
293
+ different return types for create, update, and delete operations.
294
+
295
+ Args:
296
+ result (list[Any]): List of results from each chunk operation.
297
+ mode (str): The operation mode - 'create', 'update', or 'delete'.
298
+
299
+ Raises:
300
+ ValueError: If the mode is not one of the valid operation modes.
301
+
302
+ Returns:
303
+ None | int | tuple[int, dict[str, int]] | list[Model]: Aggregated result:
304
+ - update: sum of updated object counts
305
+ - delete: tuple of (total_count, count_by_model_dict)
306
+ - create: flattened list of all created objects
307
+ """
308
+ if mode == MODE_UPDATE:
309
+ # formated as [1000, 1000, ...]
310
+ # since django 4.2 bulk_update returns the count of updated objects
311
+ return int(sum(result))
312
+ if mode == MODE_DELETE:
313
+ # formated as [(count, {model_name: count, model_cascade_name: count}), ...]
314
+ # join the results to get the total count of deleted objects
315
+ total_count = 0
316
+ count_sum_by_model: defaultdict[str, int] = defaultdict(int)
317
+ for count_sum, count_by_model in result:
318
+ total_count += count_sum
319
+ for model_name, count in count_by_model.items():
320
+ count_sum_by_model[model_name] += count
321
+ return (total_count, dict(count_sum_by_model))
322
+ if mode == MODE_CREATE:
323
+ # formated as [[obj1, obj2, ...], [obj1, obj2, ...], ...]
324
+ return [item for sublist in result for item in sublist]
325
+
326
+ msg = f"Invalid method. Must be one of {MODES}"
327
+ raise ValueError(msg)
328
+
329
+
330
+ def bulk_delete(
331
+ model: type[Model], objs: Iterable[Model], **_: Any
332
+ ) -> tuple[int, dict[str, int]]:
333
+ """Delete model instances using Django's QuerySet delete method.
334
+
335
+ Deletes the provided model instances from the database using Django's
336
+ built-in delete functionality. Handles both individual model instances
337
+ and QuerySets, and returns deletion statistics including cascade counts.
338
+
339
+ Args:
340
+ model (type[Model]): The Django model class to delete from.
341
+ objs (list[Model]): A list of model instances to delete.
342
+
343
+ Returns:
344
+ tuple[int, dict[str, int]]: A tuple containing the total count of deleted
345
+ objects and a dictionary mapping model names to their deletion counts.
346
+ """
347
+ if not isinstance(objs, QuerySet):
348
+ objs = list(objs)
349
+ pks = [obj.pk for obj in objs]
350
+ query_set = model.objects.filter(pk__in=pks)
351
+ else:
352
+ query_set = objs
353
+
354
+ return query_set.delete()
355
+
356
+
357
+ def bulk_create_bulks_in_steps(
358
+ bulk_by_class: dict[type[Model], Iterable[Model]],
359
+ step: int = STANDARD_BULK_SIZE,
360
+ ) -> dict[type[Model], list[Model]]:
361
+ """Create multiple bulks of different model types in dependency order.
362
+
363
+ Takes a dictionary mapping model classes to lists of instances and creates
364
+ them in the database in the correct order based on model dependencies.
365
+ Uses topological sorting to ensure foreign key constraints are satisfied.
366
+
367
+ Args:
368
+ bulk_by_class (dict[type[Model], list[Model]]): Dictionary mapping model classes
369
+ to lists of instances to create.
370
+ step (int, optional): The step size for bulk creation. Defaults to 1000.
371
+ validate (bool, optional): Whether to validate instances before creation.
372
+ Defaults to True.
373
+
374
+ Returns:
375
+ dict[type[Model], list[Model]]: Dictionary mapping model classes to lists
376
+ of created instances.
377
+ """
378
+ # order the bulks in order of creation depending how they depend on each other
379
+ models_ = list(bulk_by_class.keys())
380
+ ordered_models = topological_sort_models(models=models_)
381
+
382
+ results = {}
383
+ for model_ in ordered_models:
384
+ bulk = bulk_by_class[model_]
385
+ result = bulk_create_in_steps(model=model_, bulk=bulk, step=step)
386
+ results[model_] = result
387
+
388
+ return results
389
+
390
+
391
+ def get_differences_between_bulks(
392
+ bulk1: list[Model],
393
+ bulk2: list[Model],
394
+ fields: "list[Field[Any, Any] | ForeignObjectRel | GenericForeignKey]",
395
+ ) -> tuple[list[Model], list[Model], list[Model], list[Model]]:
396
+ """Compare two bulks and return their differences and intersections.
397
+
398
+ Compares two lists of model instances by computing hashes of their field values
399
+ and returns the differences and intersections between them. Optionally allows
400
+ specifying which fields to compare and the depth of comparison for related objects.
401
+
402
+ Args:
403
+ bulk1 (list[Model]): First list of model instances to compare.
404
+ bulk2 (list[Model]): Second list of model instances to compare.
405
+ fields (list[Field] | None, optional): List of fields to compare.
406
+ Defaults to None, which compares all fields.
407
+ max_depth (int | None, optional): Maximum depth for comparing related objects.
408
+ Defaults to None.
409
+
410
+ Raises:
411
+ ValueError: If the two bulks contain different model types.
412
+
413
+ Returns:
414
+ tuple[list[Model], list[Model], list[Model], list[Model]]: A tuple containing:
415
+ - Objects in bulk1 but not in bulk2
416
+ - Objects in bulk2 but not in bulk1
417
+ - Objects in both bulk1 and bulk2 (from bulk1)
418
+ - Objects in both bulk1 and bulk2 (from bulk2)
419
+ """
420
+ if not bulk1 or not bulk2:
421
+ return bulk1, bulk2, [], []
422
+
423
+ if type(bulk1[0]) is not type(bulk2[0]):
424
+ msg = "Both bulks must be of the same model type."
425
+ raise ValueError(msg)
426
+
427
+ hash_model_instance_with_fields = partial(
428
+ hash_model_instance,
429
+ fields=fields,
430
+ )
431
+ # Precompute hashes and map them directly to models in a single pass for both bulks
432
+ hashes1 = list(map(hash_model_instance_with_fields, bulk1))
433
+ hashes2 = list(map(hash_model_instance_with_fields, bulk2))
434
+
435
+ # Convert keys to sets for difference operations
436
+ set1, set2 = set(hashes1), set(hashes2)
437
+
438
+ # Calculate differences between sets
439
+ # Find differences and intersection with original order preserved
440
+ # Important, we need to return the original objects that are the same in memory,
441
+ # so in_1_not_2 and in_2_not_1
442
+ in_1_not_2 = set1 - set2
443
+ in_1_not_2_list = [
444
+ model
445
+ for model, hash_ in zip(bulk1, hashes1, strict=False)
446
+ if hash_ in in_1_not_2
447
+ ]
448
+
449
+ in_2_not_1 = set2 - set1
450
+ in_2_not_1_list = [
451
+ model
452
+ for model, hash_ in zip(bulk2, hashes2, strict=False)
453
+ if hash_ in in_2_not_1
454
+ ]
455
+
456
+ in_1_and_2 = set1 & set2
457
+ in_1_and_2_from_1 = [
458
+ model
459
+ for model, hash_ in zip(bulk1, hashes1, strict=False)
460
+ if hash_ in in_1_and_2
461
+ ]
462
+ in_1_and_2_from_2 = [
463
+ model
464
+ for model, hash_ in zip(bulk2, hashes2, strict=False)
465
+ if hash_ in in_1_and_2
466
+ ]
467
+
468
+ return in_1_not_2_list, in_2_not_1_list, in_1_and_2_from_1, in_1_and_2_from_2
469
+
470
+
471
+ def simulate_bulk_deletion(
472
+ model_class: type[Model], entries: list[Model]
473
+ ) -> dict[type[Model], set[Model]]:
474
+ """Simulate bulk deletion to preview what objects would be deleted.
475
+
476
+ Uses Django's Collector to simulate the deletion process and determine
477
+ which objects would be deleted due to cascade relationships, without
478
+ actually performing the deletion. Useful for previewing deletion effects.
479
+
480
+ Args:
481
+ model_class (type[Model]): The Django model class of the entries to delete.
482
+ entries (list[Model]): List of model instances to simulate deletion for.
483
+
484
+ Returns:
485
+ dict[type[Model], set[Model]]: Dictionary mapping model classes to sets
486
+ of objects that would be deleted, including cascade deletions.
487
+ """
488
+ if not entries:
489
+ return {}
490
+
491
+ # Initialize the Collector
492
+ using = router.db_for_write(model_class)
493
+ collector = Collector(using)
494
+
495
+ # Collect deletion cascade for all entries
496
+ collector.collect(entries)
497
+
498
+ # Prepare the result dictionary
499
+ deletion_summary: defaultdict[type[Model], set[Model]] = defaultdict(set)
500
+
501
+ # Add normal deletes
502
+ for model, objects in collector.data.items():
503
+ deletion_summary[model].update(objects) # objects is already iterable
504
+
505
+ # Add fast deletes (explicitly expand querysets)
506
+ for queryset in collector.fast_deletes:
507
+ deletion_summary[queryset.model].update(list(queryset))
508
+
509
+ return deletion_summary
510
+
511
+
512
+ def multi_simulate_bulk_deletion(
513
+ entries: dict[type[Model], list[Model]],
514
+ ) -> dict[type[Model], set[Model]]:
515
+ """Simulate bulk deletion for multiple model types and aggregate results.
516
+
517
+ Performs deletion simulation for multiple model types and combines the results
518
+ into a single summary. This is useful when you want to preview the deletion
519
+ effects across multiple related model types.
520
+
521
+ Args:
522
+ entries (dict[type[Model], list[Model]]): Dictionary mapping model classes
523
+ to lists of instances to simulate deletion for.
524
+
525
+ Returns:
526
+ dict[type[Model], set[Model]]: Dictionary mapping model classes to sets
527
+ of all objects that would be deleted across all simulations.
528
+ """
529
+ deletion_summaries = [
530
+ simulate_bulk_deletion(model, entry) for model, entry in entries.items()
531
+ ]
532
+ # join the dicts to get the total count of deleted objects
533
+ joined_deletion_summary = defaultdict(set)
534
+ for deletion_summary in deletion_summaries:
535
+ for model, objects in deletion_summary.items():
536
+ joined_deletion_summary[model].update(objects)
537
+
538
+ return dict(joined_deletion_summary)