winipedia-utils 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- winipedia_utils/__init__.py +1 -0
- winipedia_utils/concurrent/__init__.py +1 -0
- winipedia_utils/concurrent/concurrent.py +242 -0
- winipedia_utils/concurrent/multiprocessing.py +115 -0
- winipedia_utils/concurrent/multithreading.py +93 -0
- winipedia_utils/consts.py +22 -0
- winipedia_utils/data/__init__.py +1 -0
- winipedia_utils/data/dataframe.py +7 -0
- winipedia_utils/django/__init__.py +27 -0
- winipedia_utils/django/bulk.py +536 -0
- winipedia_utils/django/command.py +334 -0
- winipedia_utils/django/database.py +304 -0
- winipedia_utils/git/__init__.py +1 -0
- winipedia_utils/git/gitignore.py +80 -0
- winipedia_utils/git/pre_commit/__init__.py +1 -0
- winipedia_utils/git/pre_commit/config.py +60 -0
- winipedia_utils/git/pre_commit/hooks.py +109 -0
- winipedia_utils/git/pre_commit/run_hooks.py +49 -0
- winipedia_utils/iterating/__init__.py +1 -0
- winipedia_utils/iterating/iterate.py +29 -0
- winipedia_utils/logging/__init__.py +1 -0
- winipedia_utils/logging/ansi.py +6 -0
- winipedia_utils/logging/config.py +64 -0
- winipedia_utils/logging/logger.py +26 -0
- winipedia_utils/modules/__init__.py +1 -0
- winipedia_utils/modules/class_.py +76 -0
- winipedia_utils/modules/function.py +86 -0
- winipedia_utils/modules/module.py +361 -0
- winipedia_utils/modules/package.py +350 -0
- winipedia_utils/oop/__init__.py +1 -0
- winipedia_utils/oop/mixins/__init__.py +1 -0
- winipedia_utils/oop/mixins/meta.py +315 -0
- winipedia_utils/oop/mixins/mixin.py +28 -0
- winipedia_utils/os/__init__.py +1 -0
- winipedia_utils/os/os.py +61 -0
- winipedia_utils/projects/__init__.py +1 -0
- winipedia_utils/projects/poetry/__init__.py +1 -0
- winipedia_utils/projects/poetry/config.py +91 -0
- winipedia_utils/projects/poetry/poetry.py +30 -0
- winipedia_utils/setup.py +36 -0
- winipedia_utils/testing/__init__.py +1 -0
- winipedia_utils/testing/assertions.py +23 -0
- winipedia_utils/testing/convention.py +177 -0
- winipedia_utils/testing/create_tests.py +286 -0
- winipedia_utils/testing/fixtures.py +28 -0
- winipedia_utils/testing/tests/__init__.py +1 -0
- winipedia_utils/testing/tests/base/__init__.py +1 -0
- winipedia_utils/testing/tests/base/fixtures/__init__.py +1 -0
- winipedia_utils/testing/tests/base/fixtures/fixture.py +6 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/__init__.py +1 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/class_.py +33 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/function.py +7 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/module.py +31 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/package.py +7 -0
- winipedia_utils/testing/tests/base/fixtures/scopes/session.py +224 -0
- winipedia_utils/testing/tests/base/utils/__init__.py +1 -0
- winipedia_utils/testing/tests/base/utils/utils.py +82 -0
- winipedia_utils/testing/tests/conftest.py +26 -0
- winipedia_utils/text/__init__.py +1 -0
- winipedia_utils/text/string.py +126 -0
- winipedia_utils-0.1.0.dist-info/LICENSE +21 -0
- winipedia_utils-0.1.0.dist-info/METADATA +350 -0
- winipedia_utils-0.1.0.dist-info/RECORD +64 -0
- winipedia_utils-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,536 @@
|
|
1
|
+
"""Bulk utilities for Django models.
|
2
|
+
|
3
|
+
This module provides utility functions for working with Django models,
|
4
|
+
including bulk operations and validation. These utilities help with
|
5
|
+
efficiently managing large amounts of data in Django applications.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from collections import defaultdict
|
9
|
+
from collections.abc import Callable, Generator, Iterable
|
10
|
+
from functools import partial
|
11
|
+
from itertools import islice
|
12
|
+
from typing import Any, Literal, cast, get_args
|
13
|
+
|
14
|
+
from django.contrib.contenttypes.fields import GenericForeignKey
|
15
|
+
from django.db import router, transaction
|
16
|
+
from django.db.models import (
|
17
|
+
Field,
|
18
|
+
Model,
|
19
|
+
QuerySet,
|
20
|
+
)
|
21
|
+
from django.db.models.deletion import Collector
|
22
|
+
from django.db.models.fields.related import ForeignObjectRel
|
23
|
+
|
24
|
+
from winipedia_utils.concurrent.multithreading import multithread_loop
|
25
|
+
from winipedia_utils.django.database import (
|
26
|
+
hash_model_instance,
|
27
|
+
topological_sort_models,
|
28
|
+
)
|
29
|
+
from winipedia_utils.logging.logger import get_logger
|
30
|
+
|
31
|
+
logger = get_logger(__name__)
|
32
|
+
|
33
|
+
MODE_TYPES = Literal["create", "update", "delete"]
|
34
|
+
MODES = get_args(MODE_TYPES)
|
35
|
+
|
36
|
+
MODE_CREATE = MODES[0]
|
37
|
+
MODE_UPDATE = MODES[1]
|
38
|
+
MODE_DELETE = MODES[2]
|
39
|
+
|
40
|
+
STANDARD_BULK_SIZE = 1000
|
41
|
+
|
42
|
+
|
43
|
+
def bulk_create_in_steps(
|
44
|
+
model: type[Model],
|
45
|
+
bulk: Iterable[Model],
|
46
|
+
step: int = STANDARD_BULK_SIZE,
|
47
|
+
) -> list[Model]:
|
48
|
+
"""Create model instances from bulk and saves them to the database in steps.
|
49
|
+
|
50
|
+
Takes a list of model instances and creates them in the database in steps.
|
51
|
+
This is useful when you want to create a large number of objects
|
52
|
+
in the database. It also uses multithreading to speed up the process.
|
53
|
+
|
54
|
+
Args:
|
55
|
+
model (type[Model]): The Django model class to create.
|
56
|
+
bulk (Iterable[Model]): a list of model instances to create.
|
57
|
+
step (int, optional): The step size of the bulk creation.
|
58
|
+
Defaults to STANDARD_BULK_SIZE.
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
list[Model]: a list of created objects.
|
62
|
+
"""
|
63
|
+
return cast(
|
64
|
+
"list[Model]",
|
65
|
+
bulk_method_in_steps(model=model, bulk=bulk, step=step, mode=MODE_CREATE),
|
66
|
+
)
|
67
|
+
|
68
|
+
|
69
|
+
def bulk_update_in_steps(
|
70
|
+
model: type[Model],
|
71
|
+
bulk: Iterable[Model],
|
72
|
+
update_fields: list[str],
|
73
|
+
step: int = STANDARD_BULK_SIZE,
|
74
|
+
) -> int:
|
75
|
+
"""Update model instances in the database in steps using multithreading.
|
76
|
+
|
77
|
+
Takes a list of model instances and updates them in the database in chunks.
|
78
|
+
This is useful when you want to update a large number of objects efficiently.
|
79
|
+
Uses multithreading to speed up the process by processing chunks in parallel.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
model (type[Model]): The Django model class to update.
|
83
|
+
bulk (Iterable[Model]): A list of model instances to update.
|
84
|
+
update_fields (list[str]): List of field names to update on the models.
|
85
|
+
step (int, optional): The step size for bulk updates.
|
86
|
+
Defaults to STANDARD_BULK_SIZE.
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
int: Total number of objects updated across all chunks.
|
90
|
+
"""
|
91
|
+
return cast(
|
92
|
+
"int",
|
93
|
+
bulk_method_in_steps(
|
94
|
+
model=model, bulk=bulk, step=step, mode=MODE_UPDATE, fields=update_fields
|
95
|
+
),
|
96
|
+
)
|
97
|
+
|
98
|
+
|
99
|
+
def bulk_delete_in_steps(
|
100
|
+
model: type[Model], bulk: Iterable[Model], step: int = STANDARD_BULK_SIZE
|
101
|
+
) -> tuple[int, dict[str, int]]:
|
102
|
+
"""Delete model instances from the database in steps using multithreading.
|
103
|
+
|
104
|
+
Takes a list of model instances and deletes them from the database in chunks.
|
105
|
+
This is useful when you want to delete a large number of objects efficiently.
|
106
|
+
Uses multithreading to speed up the process by processing chunks in parallel.
|
107
|
+
Also handles cascade deletions according to model relationships.
|
108
|
+
|
109
|
+
Args:
|
110
|
+
model (type[Model]): The Django model class to update.
|
111
|
+
bulk (Iterable[Model]): A list of model instances to delete.
|
112
|
+
step (int, optional): The step size for bulk deletions.
|
113
|
+
Defaults to STANDARD_BULK_SIZE.
|
114
|
+
|
115
|
+
Returns:
|
116
|
+
tuple[int, dict[str, int]]: A tuple containing the
|
117
|
+
total count of deleted objects
|
118
|
+
and a dictionary mapping model names to their deletion counts.
|
119
|
+
"""
|
120
|
+
return cast(
|
121
|
+
"tuple[int, dict[str, int]]",
|
122
|
+
bulk_method_in_steps(
|
123
|
+
model=model,
|
124
|
+
bulk=bulk,
|
125
|
+
step=step,
|
126
|
+
mode=MODE_DELETE,
|
127
|
+
),
|
128
|
+
)
|
129
|
+
|
130
|
+
|
131
|
+
def bulk_method_in_steps(
|
132
|
+
model: type[Model],
|
133
|
+
bulk: Iterable[Model],
|
134
|
+
step: int,
|
135
|
+
mode: MODE_TYPES,
|
136
|
+
**kwargs: Any,
|
137
|
+
) -> int | tuple[int, dict[str, int]] | list[Model]:
|
138
|
+
"""Execute bulk operations on model instances in steps with transaction handling.
|
139
|
+
|
140
|
+
This is the core function that handles bulk create, update, or delete operations
|
141
|
+
by dividing the work into manageable chunks and processing them with multithreading.
|
142
|
+
It includes transaction safety checks and delegates to the atomic version.
|
143
|
+
|
144
|
+
Args:
|
145
|
+
model (type[Model]): The Django model class to perform operations on.
|
146
|
+
bulk (Iterable[Model]): A list of model instances to process.
|
147
|
+
step (int): The step size for chunking the bulk operations.
|
148
|
+
mode (MODE_TYPES): The operation mode - 'create', 'update', or 'delete'.
|
149
|
+
**kwargs: Additional keyword arguments passed to the bulk operation methods.
|
150
|
+
|
151
|
+
Returns:
|
152
|
+
None | int | tuple[int, dict[str, int]] | list[Model]:
|
153
|
+
The result depends on mode:
|
154
|
+
- create: list of created model instances
|
155
|
+
- update: integer count of updated objects
|
156
|
+
- delete: tuple of (total_count, count_by_model_dict)
|
157
|
+
- None if bulk is empty
|
158
|
+
"""
|
159
|
+
# check if we are inside a transaction.atomic block
|
160
|
+
_in_atomic_block = transaction.get_connection().in_atomic_block
|
161
|
+
if _in_atomic_block:
|
162
|
+
logger.info(
|
163
|
+
"BE CAREFUL USING BULK OPERATIONS INSIDE A BROADER TRANSACTION BLOCK. "
|
164
|
+
"BULKING WITH BULKS THAT DEPEND ON EACH OTHER CAN CAUSE "
|
165
|
+
"INTEGRITY ERRORS OR POTENTIAL OTHER ISSUES."
|
166
|
+
)
|
167
|
+
return bulk_method_in_steps_atomic(
|
168
|
+
model=model, bulk=bulk, step=step, mode=mode, **kwargs
|
169
|
+
)
|
170
|
+
|
171
|
+
|
172
|
+
@transaction.atomic
|
173
|
+
def bulk_method_in_steps_atomic(
|
174
|
+
model: type[Model],
|
175
|
+
bulk: Iterable[Model],
|
176
|
+
step: int,
|
177
|
+
mode: MODE_TYPES,
|
178
|
+
**kwargs: Any,
|
179
|
+
) -> int | tuple[int, dict[str, int]] | list[Model]:
|
180
|
+
"""Bulk create, update or delete the given list of objects in steps.
|
181
|
+
|
182
|
+
WHEN BULK CREATING OR UPDATING A BULK
|
183
|
+
AND THEN A SECOND BULK THAT DEPENDS ON THE FIRST BULK,
|
184
|
+
YOU WILL RUN INTO A INTEGRITY ERROR IF YOU DO THE
|
185
|
+
ENTIRE THING IN AN @transaction.atomic DECORATOR.
|
186
|
+
REMOVE THE DECORATORS THAT ARE HIGHER UP THAN THE ONE OF THIS FUNCTION
|
187
|
+
TO AVOID THIS ERROR.
|
188
|
+
|
189
|
+
Args:
|
190
|
+
model (type[Model]): The Django model class to perform operations on.
|
191
|
+
bulk (Iterable[Model]): A list of model instances to process.
|
192
|
+
step (int): number of objects to process in one chunk
|
193
|
+
mode (MODE_TYPES): The operation mode - 'create', 'update', or 'delete'.
|
194
|
+
**kwargs: Additional keyword arguments passed to the bulk operation methods.
|
195
|
+
|
196
|
+
Returns:
|
197
|
+
None | int | tuple[int, dict[str, int]] | list[Model]:
|
198
|
+
The result depends on mode:
|
199
|
+
- create: list of created model instances
|
200
|
+
- update: integer count of updated objects
|
201
|
+
- delete: tuple of (total_count, count_by_model_dict)
|
202
|
+
- None if bulk is empty
|
203
|
+
"""
|
204
|
+
bulk_method = get_bulk_method(model=model, mode=mode, **kwargs)
|
205
|
+
|
206
|
+
chunks = get_step_chunks(bulk=bulk, step=step)
|
207
|
+
|
208
|
+
# multithreading significantly increases speed
|
209
|
+
result = multithread_loop(
|
210
|
+
process_function=bulk_method,
|
211
|
+
process_args=chunks,
|
212
|
+
)
|
213
|
+
|
214
|
+
return flatten_bulk_in_steps_result(result=result, mode=mode)
|
215
|
+
|
216
|
+
|
217
|
+
def get_step_chunks(
|
218
|
+
bulk: Iterable[Model], step: int
|
219
|
+
) -> Generator[tuple[list[Model]], None, None]:
|
220
|
+
"""Yield chunks of the given size from the bulk.
|
221
|
+
|
222
|
+
Args:
|
223
|
+
bulk (Iterable[Model]): The bulk to chunk.
|
224
|
+
step (int): The size of each chunk.
|
225
|
+
|
226
|
+
Yields:
|
227
|
+
Generator[list[Model], None, None]: Chunks of the bulk.
|
228
|
+
"""
|
229
|
+
bulk = iter(bulk)
|
230
|
+
while True:
|
231
|
+
chunk = list(islice(bulk, step))
|
232
|
+
if not chunk:
|
233
|
+
break
|
234
|
+
yield (chunk,) # bc concurrent_loop expects a tuple of args
|
235
|
+
|
236
|
+
|
237
|
+
def get_bulk_method(
|
238
|
+
model: type[Model], mode: MODE_TYPES, **kwargs: Any
|
239
|
+
) -> Callable[[list[Model]], list[Model] | int | tuple[int, dict[str, int]]]:
|
240
|
+
"""Get the appropriate bulk method function based on the operation mode.
|
241
|
+
|
242
|
+
Creates and returns a function that performs the specified bulk operation
|
243
|
+
(create, update, or delete) on a chunk of model instances. The returned
|
244
|
+
function is configured with the provided kwargs.
|
245
|
+
|
246
|
+
Args:
|
247
|
+
model (type[Model]): The Django model class to perform operations on.
|
248
|
+
mode (MODE_TYPES): The operation mode - 'create', 'update', or 'delete'.
|
249
|
+
**kwargs: Additional keyword arguments to pass to the bulk operation method.
|
250
|
+
|
251
|
+
Raises:
|
252
|
+
ValueError: If the mode is not one of the valid MODE_TYPES.
|
253
|
+
|
254
|
+
Returns:
|
255
|
+
Callable[[list[Model]], Any]: A function that performs the bulk operation
|
256
|
+
on a chunk of model instances.
|
257
|
+
"""
|
258
|
+
bulk_method: Callable[[list[Model]], list[Model] | int | tuple[int, dict[str, int]]]
|
259
|
+
if mode == MODE_CREATE:
|
260
|
+
|
261
|
+
def bulk_create_chunk(chunk: list[Model]) -> list[Model]:
|
262
|
+
return model.objects.bulk_create(objs=chunk, **kwargs)
|
263
|
+
|
264
|
+
bulk_method = bulk_create_chunk
|
265
|
+
elif mode == MODE_UPDATE:
|
266
|
+
|
267
|
+
def bulk_update_chunk(chunk: list[Model]) -> int:
|
268
|
+
return model.objects.bulk_update(objs=chunk, **kwargs)
|
269
|
+
|
270
|
+
bulk_method = bulk_update_chunk
|
271
|
+
elif mode == MODE_DELETE:
|
272
|
+
|
273
|
+
def bulk_delete_chunk(chunk: list[Model]) -> tuple[int, dict[str, int]]:
|
274
|
+
return bulk_delete(model=model, objs=chunk, **kwargs)
|
275
|
+
|
276
|
+
bulk_method = bulk_delete_chunk
|
277
|
+
else:
|
278
|
+
msg = f"Invalid method. Must be one of {MODES}"
|
279
|
+
raise ValueError(msg)
|
280
|
+
|
281
|
+
return bulk_method
|
282
|
+
|
283
|
+
|
284
|
+
def flatten_bulk_in_steps_result(
|
285
|
+
result: list[Any], mode: str
|
286
|
+
) -> int | tuple[int, dict[str, int]] | list[Model]:
|
287
|
+
"""Flatten and aggregate results from multithreaded bulk operations.
|
288
|
+
|
289
|
+
Processes the results returned from parallel bulk operations and aggregates
|
290
|
+
them into the appropriate format based on the operation mode. Handles
|
291
|
+
different return types for create, update, and delete operations.
|
292
|
+
|
293
|
+
Args:
|
294
|
+
result (list[Any]): List of results from each chunk operation.
|
295
|
+
mode (str): The operation mode - 'create', 'update', or 'delete'.
|
296
|
+
|
297
|
+
Raises:
|
298
|
+
ValueError: If the mode is not one of the valid operation modes.
|
299
|
+
|
300
|
+
Returns:
|
301
|
+
None | int | tuple[int, dict[str, int]] | list[Model]: Aggregated result:
|
302
|
+
- update: sum of updated object counts
|
303
|
+
- delete: tuple of (total_count, count_by_model_dict)
|
304
|
+
- create: flattened list of all created objects
|
305
|
+
"""
|
306
|
+
if mode == MODE_UPDATE:
|
307
|
+
# formated as [1000, 1000, ...]
|
308
|
+
# since django 4.2 bulk_update returns the count of updated objects
|
309
|
+
return int(sum(result))
|
310
|
+
if mode == MODE_DELETE:
|
311
|
+
# formated as [(count, {model_name: count, model_cascade_name: count}), ...]
|
312
|
+
# join the results to get the total count of deleted objects
|
313
|
+
total_count = 0
|
314
|
+
count_sum_by_model: defaultdict[str, int] = defaultdict(int)
|
315
|
+
for count_sum, count_by_model in result:
|
316
|
+
total_count += count_sum
|
317
|
+
for model_name, count in count_by_model.items():
|
318
|
+
count_sum_by_model[model_name] += count
|
319
|
+
return (total_count, dict(count_sum_by_model))
|
320
|
+
if mode == MODE_CREATE:
|
321
|
+
# formated as [[obj1, obj2, ...], [obj1, obj2, ...], ...]
|
322
|
+
return [item for sublist in result for item in sublist]
|
323
|
+
|
324
|
+
msg = f"Invalid method. Must be one of {MODES}"
|
325
|
+
raise ValueError(msg)
|
326
|
+
|
327
|
+
|
328
|
+
def bulk_delete(
|
329
|
+
model: type[Model], objs: Iterable[Model], **_: Any
|
330
|
+
) -> tuple[int, dict[str, int]]:
|
331
|
+
"""Delete model instances using Django's QuerySet delete method.
|
332
|
+
|
333
|
+
Deletes the provided model instances from the database using Django's
|
334
|
+
built-in delete functionality. Handles both individual model instances
|
335
|
+
and QuerySets, and returns deletion statistics including cascade counts.
|
336
|
+
|
337
|
+
Args:
|
338
|
+
model (type[Model]): The Django model class to delete from.
|
339
|
+
objs (list[Model]): A list of model instances to delete.
|
340
|
+
|
341
|
+
Returns:
|
342
|
+
tuple[int, dict[str, int]]: A tuple containing the total count of deleted
|
343
|
+
objects and a dictionary mapping model names to their deletion counts.
|
344
|
+
"""
|
345
|
+
if not isinstance(objs, QuerySet):
|
346
|
+
objs = list(objs)
|
347
|
+
pks = [obj.pk for obj in objs]
|
348
|
+
query_set = model.objects.filter(pk__in=pks)
|
349
|
+
else:
|
350
|
+
query_set = objs
|
351
|
+
|
352
|
+
return query_set.delete()
|
353
|
+
|
354
|
+
|
355
|
+
def bulk_create_bulks_in_steps(
|
356
|
+
bulk_by_class: dict[type[Model], Iterable[Model]],
|
357
|
+
step: int = STANDARD_BULK_SIZE,
|
358
|
+
) -> dict[type[Model], list[Model]]:
|
359
|
+
"""Create multiple bulks of different model types in dependency order.
|
360
|
+
|
361
|
+
Takes a dictionary mapping model classes to lists of instances and creates
|
362
|
+
them in the database in the correct order based on model dependencies.
|
363
|
+
Uses topological sorting to ensure foreign key constraints are satisfied.
|
364
|
+
|
365
|
+
Args:
|
366
|
+
bulk_by_class (dict[type[Model], list[Model]]): Dictionary mapping model classes
|
367
|
+
to lists of instances to create.
|
368
|
+
step (int, optional): The step size for bulk creation. Defaults to 1000.
|
369
|
+
validate (bool, optional): Whether to validate instances before creation.
|
370
|
+
Defaults to True.
|
371
|
+
|
372
|
+
Returns:
|
373
|
+
dict[type[Model], list[Model]]: Dictionary mapping model classes to lists
|
374
|
+
of created instances.
|
375
|
+
"""
|
376
|
+
# order the bulks in order of creation depending how they depend on each other
|
377
|
+
models_ = list(bulk_by_class.keys())
|
378
|
+
ordered_models = topological_sort_models(models=models_)
|
379
|
+
|
380
|
+
results = {}
|
381
|
+
for model_ in ordered_models:
|
382
|
+
bulk = bulk_by_class[model_]
|
383
|
+
result = bulk_create_in_steps(model=model_, bulk=bulk, step=step)
|
384
|
+
results[model_] = result
|
385
|
+
|
386
|
+
return results
|
387
|
+
|
388
|
+
|
389
|
+
def get_differences_between_bulks(
|
390
|
+
bulk1: list[Model],
|
391
|
+
bulk2: list[Model],
|
392
|
+
fields: list[Field[Any, Any] | ForeignObjectRel | GenericForeignKey],
|
393
|
+
) -> tuple[list[Model], list[Model], list[Model], list[Model]]:
|
394
|
+
"""Compare two bulks and return their differences and intersections.
|
395
|
+
|
396
|
+
Compares two lists of model instances by computing hashes of their field values
|
397
|
+
and returns the differences and intersections between them. Optionally allows
|
398
|
+
specifying which fields to compare and the depth of comparison for related objects.
|
399
|
+
|
400
|
+
Args:
|
401
|
+
bulk1 (list[Model]): First list of model instances to compare.
|
402
|
+
bulk2 (list[Model]): Second list of model instances to compare.
|
403
|
+
fields (list[Field] | None, optional): List of fields to compare.
|
404
|
+
Defaults to None, which compares all fields.
|
405
|
+
max_depth (int | None, optional): Maximum depth for comparing related objects.
|
406
|
+
Defaults to None.
|
407
|
+
|
408
|
+
Raises:
|
409
|
+
ValueError: If the two bulks contain different model types.
|
410
|
+
|
411
|
+
Returns:
|
412
|
+
tuple[list[Model], list[Model], list[Model], list[Model]]: A tuple containing:
|
413
|
+
- Objects in bulk1 but not in bulk2
|
414
|
+
- Objects in bulk2 but not in bulk1
|
415
|
+
- Objects in both bulk1 and bulk2 (from bulk1)
|
416
|
+
- Objects in both bulk1 and bulk2 (from bulk2)
|
417
|
+
"""
|
418
|
+
if not bulk1 or not bulk2:
|
419
|
+
return bulk1, bulk2, [], []
|
420
|
+
|
421
|
+
if type(bulk1[0]) is not type(bulk2[0]):
|
422
|
+
msg = "Both bulks must be of the same model type."
|
423
|
+
raise ValueError(msg)
|
424
|
+
|
425
|
+
hash_model_instance_with_fields = partial(
|
426
|
+
hash_model_instance,
|
427
|
+
fields=fields,
|
428
|
+
)
|
429
|
+
# Precompute hashes and map them directly to models in a single pass for both bulks
|
430
|
+
hashes1 = list(map(hash_model_instance_with_fields, bulk1))
|
431
|
+
hashes2 = list(map(hash_model_instance_with_fields, bulk2))
|
432
|
+
|
433
|
+
# Convert keys to sets for difference operations
|
434
|
+
set1, set2 = set(hashes1), set(hashes2)
|
435
|
+
|
436
|
+
# Calculate differences between sets
|
437
|
+
# Find differences and intersection with original order preserved
|
438
|
+
# Important, we need to return the original objects that are the same in memory,
|
439
|
+
# so in_1_not_2 and in_2_not_1
|
440
|
+
in_1_not_2 = set1 - set2
|
441
|
+
in_1_not_2_list = [
|
442
|
+
model
|
443
|
+
for model, hash_ in zip(bulk1, hashes1, strict=False)
|
444
|
+
if hash_ in in_1_not_2
|
445
|
+
]
|
446
|
+
|
447
|
+
in_2_not_1 = set2 - set1
|
448
|
+
in_2_not_1_list = [
|
449
|
+
model
|
450
|
+
for model, hash_ in zip(bulk2, hashes2, strict=False)
|
451
|
+
if hash_ in in_2_not_1
|
452
|
+
]
|
453
|
+
|
454
|
+
in_1_and_2 = set1 & set2
|
455
|
+
in_1_and_2_from_1 = [
|
456
|
+
model
|
457
|
+
for model, hash_ in zip(bulk1, hashes1, strict=False)
|
458
|
+
if hash_ in in_1_and_2
|
459
|
+
]
|
460
|
+
in_1_and_2_from_2 = [
|
461
|
+
model
|
462
|
+
for model, hash_ in zip(bulk2, hashes2, strict=False)
|
463
|
+
if hash_ in in_1_and_2
|
464
|
+
]
|
465
|
+
|
466
|
+
return in_1_not_2_list, in_2_not_1_list, in_1_and_2_from_1, in_1_and_2_from_2
|
467
|
+
|
468
|
+
|
469
|
+
def simulate_bulk_deletion(
|
470
|
+
model_class: type[Model], entries: list[Model]
|
471
|
+
) -> dict[type[Model], set[Model]]:
|
472
|
+
"""Simulate bulk deletion to preview what objects would be deleted.
|
473
|
+
|
474
|
+
Uses Django's Collector to simulate the deletion process and determine
|
475
|
+
which objects would be deleted due to cascade relationships, without
|
476
|
+
actually performing the deletion. Useful for previewing deletion effects.
|
477
|
+
|
478
|
+
Args:
|
479
|
+
model_class (type[Model]): The Django model class of the entries to delete.
|
480
|
+
entries (list[Model]): List of model instances to simulate deletion for.
|
481
|
+
|
482
|
+
Returns:
|
483
|
+
dict[type[Model], set[Model]]: Dictionary mapping model classes to sets
|
484
|
+
of objects that would be deleted, including cascade deletions.
|
485
|
+
"""
|
486
|
+
if not entries:
|
487
|
+
return {}
|
488
|
+
|
489
|
+
# Initialize the Collector
|
490
|
+
using = router.db_for_write(model_class)
|
491
|
+
collector = Collector(using)
|
492
|
+
|
493
|
+
# Collect deletion cascade for all entries
|
494
|
+
collector.collect(entries)
|
495
|
+
|
496
|
+
# Prepare the result dictionary
|
497
|
+
deletion_summary: defaultdict[type[Model], set[Model]] = defaultdict(set)
|
498
|
+
|
499
|
+
# Add normal deletes
|
500
|
+
for model, objects in collector.data.items():
|
501
|
+
deletion_summary[model].update(objects) # objects is already iterable
|
502
|
+
|
503
|
+
# Add fast deletes (explicitly expand querysets)
|
504
|
+
for queryset in collector.fast_deletes:
|
505
|
+
deletion_summary[queryset.model].update(list(queryset))
|
506
|
+
|
507
|
+
return deletion_summary
|
508
|
+
|
509
|
+
|
510
|
+
def multi_simulate_bulk_deletion(
|
511
|
+
entries: dict[type[Model], list[Model]],
|
512
|
+
) -> dict[type[Model], set[Model]]:
|
513
|
+
"""Simulate bulk deletion for multiple model types and aggregate results.
|
514
|
+
|
515
|
+
Performs deletion simulation for multiple model types and combines the results
|
516
|
+
into a single summary. This is useful when you want to preview the deletion
|
517
|
+
effects across multiple related model types.
|
518
|
+
|
519
|
+
Args:
|
520
|
+
entries (dict[type[Model], list[Model]]): Dictionary mapping model classes
|
521
|
+
to lists of instances to simulate deletion for.
|
522
|
+
|
523
|
+
Returns:
|
524
|
+
dict[type[Model], set[Model]]: Dictionary mapping model classes to sets
|
525
|
+
of all objects that would be deleted across all simulations.
|
526
|
+
"""
|
527
|
+
deletion_summaries = [
|
528
|
+
simulate_bulk_deletion(model, entry) for model, entry in entries.items()
|
529
|
+
]
|
530
|
+
# join the dicts to get the total count of deleted objects
|
531
|
+
joined_deletion_summary = defaultdict(set)
|
532
|
+
for deletion_summary in deletion_summaries:
|
533
|
+
for model, objects in deletion_summary.items():
|
534
|
+
joined_deletion_summary[model].update(objects)
|
535
|
+
|
536
|
+
return dict(joined_deletion_summary)
|