winipedia-django 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of winipedia-django might be problematic. Click here for more details.
- winipedia_django-0.2.0/LICENSE +21 -0
- winipedia_django-0.2.0/PKG-INFO +20 -0
- winipedia_django-0.2.0/README.md +2 -0
- winipedia_django-0.2.0/pyproject.toml +56 -0
- winipedia_django-0.2.0/winipedia_django/__init__.py +23 -0
- winipedia_django-0.2.0/winipedia_django/bulk.py +538 -0
- winipedia_django-0.2.0/winipedia_django/command.py +333 -0
- winipedia_django-0.2.0/winipedia_django/database.py +288 -0
- winipedia_django-0.2.0/winipedia_django/py.typed +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Winipedia
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: winipedia-django
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: A utils package for django
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Author: Winipedia
|
|
8
|
+
Author-email: win.steveker@gmx.de
|
|
9
|
+
Requires-Python: >=3.12
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
+
Requires-Dist: django (>=5.2.7,<6.0.0)
|
|
15
|
+
Requires-Dist: winipedia-utils (>=0.2.10,<0.3.0)
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# winipedia_django
|
|
19
|
+
A utils package for django
|
|
20
|
+
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "winipedia-django"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "A utils package for django"
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Winipedia",email = "win.steveker@gmx.de"}
|
|
7
|
+
]
|
|
8
|
+
license = "MIT"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"winipedia-utils (>=0.2.10,<0.3.0)",
|
|
13
|
+
"django (>=5.2.7,<6.0.0)"
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
19
|
+
build-backend = "poetry.core.masonry.api"
|
|
20
|
+
|
|
21
|
+
[dependency-groups]
|
|
22
|
+
dev = [
|
|
23
|
+
"ruff (>=0.14.1,<0.15.0)",
|
|
24
|
+
"pre-commit (>=4.3.0,<5.0.0)",
|
|
25
|
+
"mypy (>=1.18.2,<2.0.0)",
|
|
26
|
+
"pytest (>=8.4.2,<9.0.0)",
|
|
27
|
+
"bandit (>=1.8.6,<2.0.0)",
|
|
28
|
+
"types-setuptools (>=80.9.0.20250822,<81.0.0.0)",
|
|
29
|
+
"types-tqdm (>=4.67.0.20250809,<5.0.0.0)",
|
|
30
|
+
"types-defusedxml (>=0.7.0.20250822,<0.8.0.0)",
|
|
31
|
+
"types-pyyaml (>=6.0.12.20250915,<7.0.0.0)",
|
|
32
|
+
"pytest-mock (>=3.15.1,<4.0.0)",
|
|
33
|
+
"django-stubs (>=5.2.7,<6.0.0)"
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[tool.ruff]
|
|
37
|
+
exclude = [".*", "**/migrations/*.py"]
|
|
38
|
+
|
|
39
|
+
[tool.ruff.lint]
|
|
40
|
+
select = ["ALL"]
|
|
41
|
+
ignore = ["D203", "D213", "COM812", "ANN401"]
|
|
42
|
+
fixable = ["ALL"]
|
|
43
|
+
|
|
44
|
+
[tool.ruff.lint.pydocstyle]
|
|
45
|
+
convention = "google"
|
|
46
|
+
|
|
47
|
+
[tool.mypy]
|
|
48
|
+
strict = true
|
|
49
|
+
warn_unreachable = true
|
|
50
|
+
show_error_codes = true
|
|
51
|
+
files = "."
|
|
52
|
+
|
|
53
|
+
[tool.pytest.ini_options]
|
|
54
|
+
testpaths = ["tests"]
|
|
55
|
+
|
|
56
|
+
[tool.bandit]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""__init__ module."""
|
|
2
|
+
|
|
3
|
+
import django
|
|
4
|
+
import django_stubs_ext
|
|
5
|
+
from django.conf import settings
|
|
6
|
+
from winipedia_utils.logging.logger import get_logger
|
|
7
|
+
|
|
8
|
+
logger = get_logger(__name__)
|
|
9
|
+
|
|
10
|
+
django_stubs_ext.monkeypatch()
|
|
11
|
+
logger.info("Monkeypatched django-stubs")
|
|
12
|
+
|
|
13
|
+
if not settings.configured:
|
|
14
|
+
logger.info("Configuring minimal django settings")
|
|
15
|
+
settings.configure(
|
|
16
|
+
DATABASES={
|
|
17
|
+
"default": {
|
|
18
|
+
"ENGINE": "django.db.backends.sqlite3",
|
|
19
|
+
"NAME": ":memory:",
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
)
|
|
23
|
+
django.setup()
|
|
@@ -0,0 +1,538 @@
|
|
|
1
|
+
"""Bulk utilities for Django models.
|
|
2
|
+
|
|
3
|
+
This module provides utility functions for working with Django models,
|
|
4
|
+
including bulk operations and validation. These utilities help with
|
|
5
|
+
efficiently managing large amounts of data in Django applications.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
from collections.abc import Callable, Generator, Iterable
|
|
10
|
+
from functools import partial
|
|
11
|
+
from itertools import islice
|
|
12
|
+
from typing import TYPE_CHECKING, Any, Literal, cast, get_args
|
|
13
|
+
|
|
14
|
+
from django.db import router, transaction
|
|
15
|
+
from django.db.models import (
|
|
16
|
+
Field,
|
|
17
|
+
Model,
|
|
18
|
+
QuerySet,
|
|
19
|
+
)
|
|
20
|
+
from django.db.models.deletion import Collector
|
|
21
|
+
from winipedia_utils.concurrent.multithreading import multithread_loop
|
|
22
|
+
from winipedia_utils.logging.logger import get_logger
|
|
23
|
+
|
|
24
|
+
from winipedia_django.database import (
|
|
25
|
+
hash_model_instance,
|
|
26
|
+
topological_sort_models,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from django.contrib.contenttypes.fields import GenericForeignKey
|
|
31
|
+
from django.db.models.fields.related import ForeignObjectRel
|
|
32
|
+
|
|
33
|
+
logger = get_logger(__name__)
|
|
34
|
+
|
|
35
|
+
MODE_TYPES = Literal["create", "update", "delete"]
|
|
36
|
+
MODES = get_args(MODE_TYPES)
|
|
37
|
+
|
|
38
|
+
MODE_CREATE = MODES[0]
|
|
39
|
+
MODE_UPDATE = MODES[1]
|
|
40
|
+
MODE_DELETE = MODES[2]
|
|
41
|
+
|
|
42
|
+
STANDARD_BULK_SIZE = 1000
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def bulk_create_in_steps(
|
|
46
|
+
model: type[Model],
|
|
47
|
+
bulk: Iterable[Model],
|
|
48
|
+
step: int = STANDARD_BULK_SIZE,
|
|
49
|
+
) -> list[Model]:
|
|
50
|
+
"""Create model instances from bulk and saves them to the database in steps.
|
|
51
|
+
|
|
52
|
+
Takes a list of model instances and creates them in the database in steps.
|
|
53
|
+
This is useful when you want to create a large number of objects
|
|
54
|
+
in the database. It also uses multithreading to speed up the process.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
model (type[Model]): The Django model class to create.
|
|
58
|
+
bulk (Iterable[Model]): a list of model instances to create.
|
|
59
|
+
step (int, optional): The step size of the bulk creation.
|
|
60
|
+
Defaults to STANDARD_BULK_SIZE.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
list[Model]: a list of created objects.
|
|
64
|
+
"""
|
|
65
|
+
return cast(
|
|
66
|
+
"list[Model]",
|
|
67
|
+
bulk_method_in_steps(model=model, bulk=bulk, step=step, mode=MODE_CREATE),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def bulk_update_in_steps(
|
|
72
|
+
model: type[Model],
|
|
73
|
+
bulk: Iterable[Model],
|
|
74
|
+
update_fields: list[str],
|
|
75
|
+
step: int = STANDARD_BULK_SIZE,
|
|
76
|
+
) -> int:
|
|
77
|
+
"""Update model instances in the database in steps using multithreading.
|
|
78
|
+
|
|
79
|
+
Takes a list of model instances and updates them in the database in chunks.
|
|
80
|
+
This is useful when you want to update a large number of objects efficiently.
|
|
81
|
+
Uses multithreading to speed up the process by processing chunks in parallel.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
model (type[Model]): The Django model class to update.
|
|
85
|
+
bulk (Iterable[Model]): A list of model instances to update.
|
|
86
|
+
update_fields (list[str]): List of field names to update on the models.
|
|
87
|
+
step (int, optional): The step size for bulk updates.
|
|
88
|
+
Defaults to STANDARD_BULK_SIZE.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
int: Total number of objects updated across all chunks.
|
|
92
|
+
"""
|
|
93
|
+
return cast(
|
|
94
|
+
"int",
|
|
95
|
+
bulk_method_in_steps(
|
|
96
|
+
model=model, bulk=bulk, step=step, mode=MODE_UPDATE, fields=update_fields
|
|
97
|
+
),
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def bulk_delete_in_steps(
|
|
102
|
+
model: type[Model], bulk: Iterable[Model], step: int = STANDARD_BULK_SIZE
|
|
103
|
+
) -> tuple[int, dict[str, int]]:
|
|
104
|
+
"""Delete model instances from the database in steps using multithreading.
|
|
105
|
+
|
|
106
|
+
Takes a list of model instances and deletes them from the database in chunks.
|
|
107
|
+
This is useful when you want to delete a large number of objects efficiently.
|
|
108
|
+
Uses multithreading to speed up the process by processing chunks in parallel.
|
|
109
|
+
Also handles cascade deletions according to model relationships.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
model (type[Model]): The Django model class to update.
|
|
113
|
+
bulk (Iterable[Model]): A list of model instances to delete.
|
|
114
|
+
step (int, optional): The step size for bulk deletions.
|
|
115
|
+
Defaults to STANDARD_BULK_SIZE.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
tuple[int, dict[str, int]]: A tuple containing the
|
|
119
|
+
total count of deleted objects
|
|
120
|
+
and a dictionary mapping model names to their deletion counts.
|
|
121
|
+
"""
|
|
122
|
+
return cast(
|
|
123
|
+
"tuple[int, dict[str, int]]",
|
|
124
|
+
bulk_method_in_steps(
|
|
125
|
+
model=model,
|
|
126
|
+
bulk=bulk,
|
|
127
|
+
step=step,
|
|
128
|
+
mode=MODE_DELETE,
|
|
129
|
+
),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def bulk_method_in_steps(
|
|
134
|
+
model: type[Model],
|
|
135
|
+
bulk: Iterable[Model],
|
|
136
|
+
step: int,
|
|
137
|
+
mode: MODE_TYPES,
|
|
138
|
+
**kwargs: Any,
|
|
139
|
+
) -> int | tuple[int, dict[str, int]] | list[Model]:
|
|
140
|
+
"""Execute bulk operations on model instances in steps with transaction handling.
|
|
141
|
+
|
|
142
|
+
This is the core function that handles bulk create, update, or delete operations
|
|
143
|
+
by dividing the work into manageable chunks and processing them with multithreading.
|
|
144
|
+
It includes transaction safety checks and delegates to the atomic version.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
model (type[Model]): The Django model class to perform operations on.
|
|
148
|
+
bulk (Iterable[Model]): A list of model instances to process.
|
|
149
|
+
step (int): The step size for chunking the bulk operations.
|
|
150
|
+
mode (MODE_TYPES): The operation mode - 'create', 'update', or 'delete'.
|
|
151
|
+
**kwargs: Additional keyword arguments passed to the bulk operation methods.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
None | int | tuple[int, dict[str, int]] | list[Model]:
|
|
155
|
+
The result depends on mode:
|
|
156
|
+
- create: list of created model instances
|
|
157
|
+
- update: integer count of updated objects
|
|
158
|
+
- delete: tuple of (total_count, count_by_model_dict)
|
|
159
|
+
- None if bulk is empty
|
|
160
|
+
"""
|
|
161
|
+
# check if we are inside a transaction.atomic block
|
|
162
|
+
_in_atomic_block = transaction.get_connection().in_atomic_block
|
|
163
|
+
if _in_atomic_block:
|
|
164
|
+
logger.info(
|
|
165
|
+
"BE CAREFUL USING BULK OPERATIONS INSIDE A BROADER TRANSACTION BLOCK. "
|
|
166
|
+
"BULKING WITH BULKS THAT DEPEND ON EACH OTHER CAN CAUSE "
|
|
167
|
+
"INTEGRITY ERRORS OR POTENTIAL OTHER ISSUES."
|
|
168
|
+
)
|
|
169
|
+
return bulk_method_in_steps_atomic(
|
|
170
|
+
model=model, bulk=bulk, step=step, mode=mode, **kwargs
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@transaction.atomic
|
|
175
|
+
def bulk_method_in_steps_atomic(
|
|
176
|
+
model: type[Model],
|
|
177
|
+
bulk: Iterable[Model],
|
|
178
|
+
step: int,
|
|
179
|
+
mode: MODE_TYPES,
|
|
180
|
+
**kwargs: Any,
|
|
181
|
+
) -> int | tuple[int, dict[str, int]] | list[Model]:
|
|
182
|
+
"""Bulk create, update or delete the given list of objects in steps.
|
|
183
|
+
|
|
184
|
+
WHEN BULK CREATING OR UPDATING A BULK
|
|
185
|
+
AND THEN A SECOND BULK THAT DEPENDS ON THE FIRST BULK,
|
|
186
|
+
YOU WILL RUN INTO A INTEGRITY ERROR IF YOU DO THE
|
|
187
|
+
ENTIRE THING IN AN @transaction.atomic DECORATOR.
|
|
188
|
+
REMOVE THE DECORATORS THAT ARE HIGHER UP THAN THE ONE OF THIS FUNCTION
|
|
189
|
+
TO AVOID THIS ERROR.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
model (type[Model]): The Django model class to perform operations on.
|
|
193
|
+
bulk (Iterable[Model]): A list of model instances to process.
|
|
194
|
+
step (int): number of objects to process in one chunk
|
|
195
|
+
mode (MODE_TYPES): The operation mode - 'create', 'update', or 'delete'.
|
|
196
|
+
**kwargs: Additional keyword arguments passed to the bulk operation methods.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
None | int | tuple[int, dict[str, int]] | list[Model]:
|
|
200
|
+
The result depends on mode:
|
|
201
|
+
- create: list of created model instances
|
|
202
|
+
- update: integer count of updated objects
|
|
203
|
+
- delete: tuple of (total_count, count_by_model_dict)
|
|
204
|
+
- None if bulk is empty
|
|
205
|
+
"""
|
|
206
|
+
bulk_method = get_bulk_method(model=model, mode=mode, **kwargs)
|
|
207
|
+
|
|
208
|
+
chunks = get_step_chunks(bulk=bulk, step=step)
|
|
209
|
+
|
|
210
|
+
# multithreading significantly increases speed
|
|
211
|
+
result = multithread_loop(
|
|
212
|
+
process_function=bulk_method,
|
|
213
|
+
process_args=chunks,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
return flatten_bulk_in_steps_result(result=result, mode=mode)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def get_step_chunks(
|
|
220
|
+
bulk: Iterable[Model], step: int
|
|
221
|
+
) -> Generator[tuple[list[Model]], None, None]:
|
|
222
|
+
"""Yield chunks of the given size from the bulk.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
bulk (Iterable[Model]): The bulk to chunk.
|
|
226
|
+
step (int): The size of each chunk.
|
|
227
|
+
|
|
228
|
+
Yields:
|
|
229
|
+
Generator[list[Model], None, None]: Chunks of the bulk.
|
|
230
|
+
"""
|
|
231
|
+
bulk = iter(bulk)
|
|
232
|
+
while True:
|
|
233
|
+
chunk = list(islice(bulk, step))
|
|
234
|
+
if not chunk:
|
|
235
|
+
break
|
|
236
|
+
yield (chunk,) # bc concurrent_loop expects a tuple of args
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def get_bulk_method(
|
|
240
|
+
model: type[Model], mode: MODE_TYPES, **kwargs: Any
|
|
241
|
+
) -> Callable[[list[Model]], list[Model] | int | tuple[int, dict[str, int]]]:
|
|
242
|
+
"""Get the appropriate bulk method function based on the operation mode.
|
|
243
|
+
|
|
244
|
+
Creates and returns a function that performs the specified bulk operation
|
|
245
|
+
(create, update, or delete) on a chunk of model instances. The returned
|
|
246
|
+
function is configured with the provided kwargs.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
model (type[Model]): The Django model class to perform operations on.
|
|
250
|
+
mode (MODE_TYPES): The operation mode - 'create', 'update', or 'delete'.
|
|
251
|
+
**kwargs: Additional keyword arguments to pass to the bulk operation method.
|
|
252
|
+
|
|
253
|
+
Raises:
|
|
254
|
+
ValueError: If the mode is not one of the valid MODE_TYPES.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Callable[[list[Model]], Any]: A function that performs the bulk operation
|
|
258
|
+
on a chunk of model instances.
|
|
259
|
+
"""
|
|
260
|
+
bulk_method: Callable[[list[Model]], list[Model] | int | tuple[int, dict[str, int]]]
|
|
261
|
+
if mode == MODE_CREATE:
|
|
262
|
+
|
|
263
|
+
def bulk_create_chunk(chunk: list[Model]) -> list[Model]:
|
|
264
|
+
return model.objects.bulk_create(objs=chunk, **kwargs)
|
|
265
|
+
|
|
266
|
+
bulk_method = bulk_create_chunk
|
|
267
|
+
elif mode == MODE_UPDATE:
|
|
268
|
+
|
|
269
|
+
def bulk_update_chunk(chunk: list[Model]) -> int:
|
|
270
|
+
return model.objects.bulk_update(objs=chunk, **kwargs)
|
|
271
|
+
|
|
272
|
+
bulk_method = bulk_update_chunk
|
|
273
|
+
elif mode == MODE_DELETE:
|
|
274
|
+
|
|
275
|
+
def bulk_delete_chunk(chunk: list[Model]) -> tuple[int, dict[str, int]]:
|
|
276
|
+
return bulk_delete(model=model, objs=chunk, **kwargs)
|
|
277
|
+
|
|
278
|
+
bulk_method = bulk_delete_chunk
|
|
279
|
+
else:
|
|
280
|
+
msg = f"Invalid method. Must be one of {MODES}"
|
|
281
|
+
raise ValueError(msg)
|
|
282
|
+
|
|
283
|
+
return bulk_method
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def flatten_bulk_in_steps_result(
|
|
287
|
+
result: list[Any], mode: str
|
|
288
|
+
) -> int | tuple[int, dict[str, int]] | list[Model]:
|
|
289
|
+
"""Flatten and aggregate results from multithreaded bulk operations.
|
|
290
|
+
|
|
291
|
+
Processes the results returned from parallel bulk operations and aggregates
|
|
292
|
+
them into the appropriate format based on the operation mode. Handles
|
|
293
|
+
different return types for create, update, and delete operations.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
result (list[Any]): List of results from each chunk operation.
|
|
297
|
+
mode (str): The operation mode - 'create', 'update', or 'delete'.
|
|
298
|
+
|
|
299
|
+
Raises:
|
|
300
|
+
ValueError: If the mode is not one of the valid operation modes.
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
None | int | tuple[int, dict[str, int]] | list[Model]: Aggregated result:
|
|
304
|
+
- update: sum of updated object counts
|
|
305
|
+
- delete: tuple of (total_count, count_by_model_dict)
|
|
306
|
+
- create: flattened list of all created objects
|
|
307
|
+
"""
|
|
308
|
+
if mode == MODE_UPDATE:
|
|
309
|
+
# formated as [1000, 1000, ...]
|
|
310
|
+
# since django 4.2 bulk_update returns the count of updated objects
|
|
311
|
+
return int(sum(result))
|
|
312
|
+
if mode == MODE_DELETE:
|
|
313
|
+
# formated as [(count, {model_name: count, model_cascade_name: count}), ...]
|
|
314
|
+
# join the results to get the total count of deleted objects
|
|
315
|
+
total_count = 0
|
|
316
|
+
count_sum_by_model: defaultdict[str, int] = defaultdict(int)
|
|
317
|
+
for count_sum, count_by_model in result:
|
|
318
|
+
total_count += count_sum
|
|
319
|
+
for model_name, count in count_by_model.items():
|
|
320
|
+
count_sum_by_model[model_name] += count
|
|
321
|
+
return (total_count, dict(count_sum_by_model))
|
|
322
|
+
if mode == MODE_CREATE:
|
|
323
|
+
# formated as [[obj1, obj2, ...], [obj1, obj2, ...], ...]
|
|
324
|
+
return [item for sublist in result for item in sublist]
|
|
325
|
+
|
|
326
|
+
msg = f"Invalid method. Must be one of {MODES}"
|
|
327
|
+
raise ValueError(msg)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def bulk_delete(
|
|
331
|
+
model: type[Model], objs: Iterable[Model], **_: Any
|
|
332
|
+
) -> tuple[int, dict[str, int]]:
|
|
333
|
+
"""Delete model instances using Django's QuerySet delete method.
|
|
334
|
+
|
|
335
|
+
Deletes the provided model instances from the database using Django's
|
|
336
|
+
built-in delete functionality. Handles both individual model instances
|
|
337
|
+
and QuerySets, and returns deletion statistics including cascade counts.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
model (type[Model]): The Django model class to delete from.
|
|
341
|
+
objs (list[Model]): A list of model instances to delete.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
tuple[int, dict[str, int]]: A tuple containing the total count of deleted
|
|
345
|
+
objects and a dictionary mapping model names to their deletion counts.
|
|
346
|
+
"""
|
|
347
|
+
if not isinstance(objs, QuerySet):
|
|
348
|
+
objs = list(objs)
|
|
349
|
+
pks = [obj.pk for obj in objs]
|
|
350
|
+
query_set = model.objects.filter(pk__in=pks)
|
|
351
|
+
else:
|
|
352
|
+
query_set = objs
|
|
353
|
+
|
|
354
|
+
return query_set.delete()
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def bulk_create_bulks_in_steps(
|
|
358
|
+
bulk_by_class: dict[type[Model], Iterable[Model]],
|
|
359
|
+
step: int = STANDARD_BULK_SIZE,
|
|
360
|
+
) -> dict[type[Model], list[Model]]:
|
|
361
|
+
"""Create multiple bulks of different model types in dependency order.
|
|
362
|
+
|
|
363
|
+
Takes a dictionary mapping model classes to lists of instances and creates
|
|
364
|
+
them in the database in the correct order based on model dependencies.
|
|
365
|
+
Uses topological sorting to ensure foreign key constraints are satisfied.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
bulk_by_class (dict[type[Model], list[Model]]): Dictionary mapping model classes
|
|
369
|
+
to lists of instances to create.
|
|
370
|
+
step (int, optional): The step size for bulk creation. Defaults to 1000.
|
|
371
|
+
validate (bool, optional): Whether to validate instances before creation.
|
|
372
|
+
Defaults to True.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
dict[type[Model], list[Model]]: Dictionary mapping model classes to lists
|
|
376
|
+
of created instances.
|
|
377
|
+
"""
|
|
378
|
+
# order the bulks in order of creation depending how they depend on each other
|
|
379
|
+
models_ = list(bulk_by_class.keys())
|
|
380
|
+
ordered_models = topological_sort_models(models=models_)
|
|
381
|
+
|
|
382
|
+
results = {}
|
|
383
|
+
for model_ in ordered_models:
|
|
384
|
+
bulk = bulk_by_class[model_]
|
|
385
|
+
result = bulk_create_in_steps(model=model_, bulk=bulk, step=step)
|
|
386
|
+
results[model_] = result
|
|
387
|
+
|
|
388
|
+
return results
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def get_differences_between_bulks(
|
|
392
|
+
bulk1: list[Model],
|
|
393
|
+
bulk2: list[Model],
|
|
394
|
+
fields: "list[Field[Any, Any] | ForeignObjectRel | GenericForeignKey]",
|
|
395
|
+
) -> tuple[list[Model], list[Model], list[Model], list[Model]]:
|
|
396
|
+
"""Compare two bulks and return their differences and intersections.
|
|
397
|
+
|
|
398
|
+
Compares two lists of model instances by computing hashes of their field values
|
|
399
|
+
and returns the differences and intersections between them. Optionally allows
|
|
400
|
+
specifying which fields to compare and the depth of comparison for related objects.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
bulk1 (list[Model]): First list of model instances to compare.
|
|
404
|
+
bulk2 (list[Model]): Second list of model instances to compare.
|
|
405
|
+
fields (list[Field] | None, optional): List of fields to compare.
|
|
406
|
+
Defaults to None, which compares all fields.
|
|
407
|
+
max_depth (int | None, optional): Maximum depth for comparing related objects.
|
|
408
|
+
Defaults to None.
|
|
409
|
+
|
|
410
|
+
Raises:
|
|
411
|
+
ValueError: If the two bulks contain different model types.
|
|
412
|
+
|
|
413
|
+
Returns:
|
|
414
|
+
tuple[list[Model], list[Model], list[Model], list[Model]]: A tuple containing:
|
|
415
|
+
- Objects in bulk1 but not in bulk2
|
|
416
|
+
- Objects in bulk2 but not in bulk1
|
|
417
|
+
- Objects in both bulk1 and bulk2 (from bulk1)
|
|
418
|
+
- Objects in both bulk1 and bulk2 (from bulk2)
|
|
419
|
+
"""
|
|
420
|
+
if not bulk1 or not bulk2:
|
|
421
|
+
return bulk1, bulk2, [], []
|
|
422
|
+
|
|
423
|
+
if type(bulk1[0]) is not type(bulk2[0]):
|
|
424
|
+
msg = "Both bulks must be of the same model type."
|
|
425
|
+
raise ValueError(msg)
|
|
426
|
+
|
|
427
|
+
hash_model_instance_with_fields = partial(
|
|
428
|
+
hash_model_instance,
|
|
429
|
+
fields=fields,
|
|
430
|
+
)
|
|
431
|
+
# Precompute hashes and map them directly to models in a single pass for both bulks
|
|
432
|
+
hashes1 = list(map(hash_model_instance_with_fields, bulk1))
|
|
433
|
+
hashes2 = list(map(hash_model_instance_with_fields, bulk2))
|
|
434
|
+
|
|
435
|
+
# Convert keys to sets for difference operations
|
|
436
|
+
set1, set2 = set(hashes1), set(hashes2)
|
|
437
|
+
|
|
438
|
+
# Calculate differences between sets
|
|
439
|
+
# Find differences and intersection with original order preserved
|
|
440
|
+
# Important, we need to return the original objects that are the same in memory,
|
|
441
|
+
# so in_1_not_2 and in_2_not_1
|
|
442
|
+
in_1_not_2 = set1 - set2
|
|
443
|
+
in_1_not_2_list = [
|
|
444
|
+
model
|
|
445
|
+
for model, hash_ in zip(bulk1, hashes1, strict=False)
|
|
446
|
+
if hash_ in in_1_not_2
|
|
447
|
+
]
|
|
448
|
+
|
|
449
|
+
in_2_not_1 = set2 - set1
|
|
450
|
+
in_2_not_1_list = [
|
|
451
|
+
model
|
|
452
|
+
for model, hash_ in zip(bulk2, hashes2, strict=False)
|
|
453
|
+
if hash_ in in_2_not_1
|
|
454
|
+
]
|
|
455
|
+
|
|
456
|
+
in_1_and_2 = set1 & set2
|
|
457
|
+
in_1_and_2_from_1 = [
|
|
458
|
+
model
|
|
459
|
+
for model, hash_ in zip(bulk1, hashes1, strict=False)
|
|
460
|
+
if hash_ in in_1_and_2
|
|
461
|
+
]
|
|
462
|
+
in_1_and_2_from_2 = [
|
|
463
|
+
model
|
|
464
|
+
for model, hash_ in zip(bulk2, hashes2, strict=False)
|
|
465
|
+
if hash_ in in_1_and_2
|
|
466
|
+
]
|
|
467
|
+
|
|
468
|
+
return in_1_not_2_list, in_2_not_1_list, in_1_and_2_from_1, in_1_and_2_from_2
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def simulate_bulk_deletion(
|
|
472
|
+
model_class: type[Model], entries: list[Model]
|
|
473
|
+
) -> dict[type[Model], set[Model]]:
|
|
474
|
+
"""Simulate bulk deletion to preview what objects would be deleted.
|
|
475
|
+
|
|
476
|
+
Uses Django's Collector to simulate the deletion process and determine
|
|
477
|
+
which objects would be deleted due to cascade relationships, without
|
|
478
|
+
actually performing the deletion. Useful for previewing deletion effects.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
model_class (type[Model]): The Django model class of the entries to delete.
|
|
482
|
+
entries (list[Model]): List of model instances to simulate deletion for.
|
|
483
|
+
|
|
484
|
+
Returns:
|
|
485
|
+
dict[type[Model], set[Model]]: Dictionary mapping model classes to sets
|
|
486
|
+
of objects that would be deleted, including cascade deletions.
|
|
487
|
+
"""
|
|
488
|
+
if not entries:
|
|
489
|
+
return {}
|
|
490
|
+
|
|
491
|
+
# Initialize the Collector
|
|
492
|
+
using = router.db_for_write(model_class)
|
|
493
|
+
collector = Collector(using)
|
|
494
|
+
|
|
495
|
+
# Collect deletion cascade for all entries
|
|
496
|
+
collector.collect(entries)
|
|
497
|
+
|
|
498
|
+
# Prepare the result dictionary
|
|
499
|
+
deletion_summary: defaultdict[type[Model], set[Model]] = defaultdict(set)
|
|
500
|
+
|
|
501
|
+
# Add normal deletes
|
|
502
|
+
for model, objects in collector.data.items():
|
|
503
|
+
deletion_summary[model].update(objects) # objects is already iterable
|
|
504
|
+
|
|
505
|
+
# Add fast deletes (explicitly expand querysets)
|
|
506
|
+
for queryset in collector.fast_deletes:
|
|
507
|
+
deletion_summary[queryset.model].update(list(queryset))
|
|
508
|
+
|
|
509
|
+
return deletion_summary
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def multi_simulate_bulk_deletion(
|
|
513
|
+
entries: dict[type[Model], list[Model]],
|
|
514
|
+
) -> dict[type[Model], set[Model]]:
|
|
515
|
+
"""Simulate bulk deletion for multiple model types and aggregate results.
|
|
516
|
+
|
|
517
|
+
Performs deletion simulation for multiple model types and combines the results
|
|
518
|
+
into a single summary. This is useful when you want to preview the deletion
|
|
519
|
+
effects across multiple related model types.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
entries (dict[type[Model], list[Model]]): Dictionary mapping model classes
|
|
523
|
+
to lists of instances to simulate deletion for.
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
dict[type[Model], set[Model]]: Dictionary mapping model classes to sets
|
|
527
|
+
of all objects that would be deleted across all simulations.
|
|
528
|
+
"""
|
|
529
|
+
deletion_summaries = [
|
|
530
|
+
simulate_bulk_deletion(model, entry) for model, entry in entries.items()
|
|
531
|
+
]
|
|
532
|
+
# join the dicts to get the total count of deleted objects
|
|
533
|
+
joined_deletion_summary = defaultdict(set)
|
|
534
|
+
for deletion_summary in deletion_summaries:
|
|
535
|
+
for model, objects in deletion_summary.items():
|
|
536
|
+
joined_deletion_summary[model].update(objects)
|
|
537
|
+
|
|
538
|
+
return dict(joined_deletion_summary)
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""Command utilities for Django.
|
|
2
|
+
|
|
3
|
+
This module provides utility functions for working with Django commands,
|
|
4
|
+
including command execution and output handling. These utilities help with
|
|
5
|
+
managing and automating Django command-line tasks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from abc import abstractmethod
|
|
10
|
+
from argparse import ArgumentParser
|
|
11
|
+
from typing import Any, final
|
|
12
|
+
|
|
13
|
+
from django.core.management import BaseCommand
|
|
14
|
+
from winipedia_utils.oop.mixins.mixin import ABCLoggingMixin
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ABCBaseCommand(ABCLoggingMixin, BaseCommand):
|
|
20
|
+
"""Abstract base class for Django management commands with logging and validation.
|
|
21
|
+
|
|
22
|
+
This class serves as a foundation for creating Django management commands that
|
|
23
|
+
require abstract method implementation enforcement and automatic logging.
|
|
24
|
+
It combines Django's BaseCommand with ABCImplementationLoggingMixin to provide
|
|
25
|
+
both command functionality and development-time validation.
|
|
26
|
+
|
|
27
|
+
The class implements a template method pattern where common argument handling
|
|
28
|
+
and execution flow are managed by final methods, while specific implementations
|
|
29
|
+
are defined through abstract methods that subclasses must implement.
|
|
30
|
+
|
|
31
|
+
Key Features:
|
|
32
|
+
- Automatic logging of method calls with performance tracking
|
|
33
|
+
- Compile-time validation that all abstract methods are implemented
|
|
34
|
+
- Structured argument handling with base and custom arguments
|
|
35
|
+
- Template method pattern for consistent command execution flow
|
|
36
|
+
|
|
37
|
+
Inheritance Order:
|
|
38
|
+
The order of inheritance is critical: ABCImplementationLoggingMixin must
|
|
39
|
+
come before BaseCommand because Django's BaseCommand doesn't call
|
|
40
|
+
super().__init__(), so the mixin's metaclass initialization must happen
|
|
41
|
+
first to ensure proper class construction.
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
>>> class MyCommand(ABCBaseCommand):
|
|
45
|
+
... def add_command_arguments(self, parser):
|
|
46
|
+
... parser.add_argument('--my-option', help='Custom option')
|
|
47
|
+
...
|
|
48
|
+
... def handle_command(self, *args, **options):
|
|
49
|
+
... self.stdout.write('Executing my command')
|
|
50
|
+
|
|
51
|
+
Note:
|
|
52
|
+
- All methods are automatically logged with performance tracking
|
|
53
|
+
- Subclasses must implement add_command_arguments and handle_command
|
|
54
|
+
- The @final decorator prevents overriding of template methods
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
@final
|
|
58
|
+
def add_arguments(self, parser: ArgumentParser) -> None:
|
|
59
|
+
"""Configure command-line arguments for the Django management command.
|
|
60
|
+
|
|
61
|
+
This method implements the template method pattern by first adding common
|
|
62
|
+
base arguments that are used across multiple commands, then delegating
|
|
63
|
+
to the abstract add_command_arguments method for command-specific arguments.
|
|
64
|
+
|
|
65
|
+
The @final decorator prevents subclasses from overriding this method,
|
|
66
|
+
ensuring consistent argument handling across all commands while still
|
|
67
|
+
allowing customization through the abstract method.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
parser (ArgumentParser): Django's argument parser instance used to
|
|
71
|
+
define command-line options and arguments for the command.
|
|
72
|
+
|
|
73
|
+
Note:
|
|
74
|
+
- This method is final and cannot be overridden by subclasses
|
|
75
|
+
- Common arguments are added first via _add_arguments()
|
|
76
|
+
- Custom arguments are added via the abstract add_command_arguments()
|
|
77
|
+
- Subclasses must implement add_command_arguments() for specific needs
|
|
78
|
+
"""
|
|
79
|
+
# add base args that are used in most commands
|
|
80
|
+
self._add_arguments(parser)
|
|
81
|
+
|
|
82
|
+
# add additional args that are specific to the command
|
|
83
|
+
self.add_command_arguments(parser)
|
|
84
|
+
|
|
85
|
+
@final
|
|
86
|
+
def _add_arguments(self, parser: ArgumentParser) -> None:
|
|
87
|
+
"""Add common command-line arguments used across multiple commands.
|
|
88
|
+
|
|
89
|
+
This method defines base arguments that are commonly used across different
|
|
90
|
+
Django management commands. These arguments provide standard functionality
|
|
91
|
+
like dry-run mode, verbosity control, and batch processing options.
|
|
92
|
+
|
|
93
|
+
The method is final to ensure consistent base argument handling, while
|
|
94
|
+
command-specific arguments are handled through the abstract
|
|
95
|
+
add_command_arguments method.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
parser (ArgumentParser): Django's argument parser instance to which
|
|
99
|
+
common arguments should be added.
|
|
100
|
+
|
|
101
|
+
Note:
|
|
102
|
+
- Provides standard arguments for dry-run, verbosity, and batch processing
|
|
103
|
+
- The @final decorator prevents subclasses from overriding this method
|
|
104
|
+
- Command-specific arguments should be added via add_command_arguments()
|
|
105
|
+
"""
|
|
106
|
+
parser.add_argument(
|
|
107
|
+
"--dry-run",
|
|
108
|
+
action="store_true",
|
|
109
|
+
help="Show what would be done without actually executing the changes",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
parser.add_argument(
|
|
113
|
+
"--size",
|
|
114
|
+
type=int,
|
|
115
|
+
default=None,
|
|
116
|
+
help="Size of smth in a command",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
parser.add_argument(
|
|
120
|
+
"--force",
|
|
121
|
+
action="store_true",
|
|
122
|
+
help="Force an action in a command",
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
parser.add_argument(
|
|
126
|
+
"--delete",
|
|
127
|
+
action="store_true",
|
|
128
|
+
help="Deleting smth in a command",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
parser.add_argument(
|
|
132
|
+
"--quiet",
|
|
133
|
+
action="store_true",
|
|
134
|
+
help="Suppress non-error output for cleaner automation",
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
parser.add_argument(
|
|
138
|
+
"--debug",
|
|
139
|
+
action="store_true",
|
|
140
|
+
help="Print debug output for detailed tracing",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
parser.add_argument(
|
|
144
|
+
"--yes",
|
|
145
|
+
action="store_true",
|
|
146
|
+
help="Answer yes to all prompts",
|
|
147
|
+
default=False,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
parser.add_argument(
|
|
151
|
+
"--config",
|
|
152
|
+
type=str,
|
|
153
|
+
help="A configuration setup like filepath or json string for a command",
|
|
154
|
+
default=None,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
parser.add_argument(
|
|
158
|
+
"--timeout",
|
|
159
|
+
type=int,
|
|
160
|
+
help="Timeout for a command",
|
|
161
|
+
default=None,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
parser.add_argument(
|
|
165
|
+
"--batch-size",
|
|
166
|
+
type=int,
|
|
167
|
+
default=None,
|
|
168
|
+
help="Number of items to process in each batch",
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
parser.add_argument(
|
|
172
|
+
"--no-input",
|
|
173
|
+
action="store_true",
|
|
174
|
+
help="Do not prompt for user input",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
parser.add_argument(
|
|
178
|
+
"--threads",
|
|
179
|
+
type=int,
|
|
180
|
+
default=None,
|
|
181
|
+
help="Number of threads to use for processing",
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
parser.add_argument(
|
|
185
|
+
"--processes",
|
|
186
|
+
type=int,
|
|
187
|
+
default=None,
|
|
188
|
+
help="Number of processes to use for processing",
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
@abstractmethod
|
|
192
|
+
def add_command_arguments(self, parser: ArgumentParser) -> None:
|
|
193
|
+
"""Add command-specific arguments to the argument parser.
|
|
194
|
+
|
|
195
|
+
This abstract method must be implemented by subclasses to define
|
|
196
|
+
command-specific command-line arguments. It is called after common
|
|
197
|
+
base arguments are added, allowing each command to customize its
|
|
198
|
+
argument interface while maintaining consistent base functionality.
|
|
199
|
+
|
|
200
|
+
Subclasses should use this method to add arguments specific to their
|
|
201
|
+
command's functionality, such as file paths, configuration options,
|
|
202
|
+
or operational flags.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
parser (ArgumentParser): Django's argument parser instance to which
|
|
206
|
+
command-specific arguments should be added.
|
|
207
|
+
|
|
208
|
+
Example:
|
|
209
|
+
>>> def add_command_arguments(self, parser):
|
|
210
|
+
... parser.add_argument(
|
|
211
|
+
... '--input-file',
|
|
212
|
+
... type=str,
|
|
213
|
+
... required=True,
|
|
214
|
+
... help='Path to input file'
|
|
215
|
+
... )
|
|
216
|
+
... parser.add_argument(
|
|
217
|
+
... '--output-format',
|
|
218
|
+
... choices=['json', 'csv', 'xml'],
|
|
219
|
+
... default='json',
|
|
220
|
+
... help='Output format for results'
|
|
221
|
+
... )
|
|
222
|
+
|
|
223
|
+
Note:
|
|
224
|
+
- This method is abstract and must be implemented by subclasses
|
|
225
|
+
- Called after _add_arguments() adds common base arguments
|
|
226
|
+
- Should focus on command-specific functionality only
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
@final
|
|
230
|
+
def handle(self, *args: Any, **options: Any) -> None:
|
|
231
|
+
"""Execute the Django management command using template method pattern.
|
|
232
|
+
|
|
233
|
+
This method implements the main execution flow for the command by first
|
|
234
|
+
calling common handling logic through _handle(), then delegating to
|
|
235
|
+
the command-specific implementation via handle_command().
|
|
236
|
+
|
|
237
|
+
The @final decorator ensures this execution pattern cannot be overridden,
|
|
238
|
+
maintaining consistent command execution flow while allowing customization
|
|
239
|
+
through the abstract handle_command method.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
*args: Positional arguments passed from Django's command execution.
|
|
243
|
+
**options: Keyword arguments containing parsed command-line options
|
|
244
|
+
and their values as defined by add_arguments().
|
|
245
|
+
|
|
246
|
+
Note:
|
|
247
|
+
- This method is final and cannot be overridden by subclasses
|
|
248
|
+
- Common handling logic is executed first via _handle()
|
|
249
|
+
- Command-specific logic is executed via abstract handle_command()
|
|
250
|
+
- All method calls are automatically logged with performance tracking
|
|
251
|
+
"""
|
|
252
|
+
self._handle(*args, **options)
|
|
253
|
+
self.handle_command(*args, **options)
|
|
254
|
+
|
|
255
|
+
@final
|
|
256
|
+
def _handle(self, *_args: Any, **options: Any) -> None:
|
|
257
|
+
"""Execute common handling logic shared across all commands.
|
|
258
|
+
|
|
259
|
+
This method is intended to contain common processing logic that should
|
|
260
|
+
be executed before command-specific handling. Currently, it serves as
|
|
261
|
+
a placeholder for future common functionality such as logging setup,
|
|
262
|
+
validation, or shared initialization.
|
|
263
|
+
|
|
264
|
+
The method is final to ensure consistent common handling across all
|
|
265
|
+
commands, while command-specific logic is handled through the abstract
|
|
266
|
+
handle_command method.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
*args: Positional arguments passed from Django's command execution.
|
|
270
|
+
Currently unused but reserved for future common processing.
|
|
271
|
+
**options: Keyword arguments containing parsed command-line options.
|
|
272
|
+
Currently unused but reserved for future common processing.
|
|
273
|
+
|
|
274
|
+
Note:
|
|
275
|
+
- Examples might include logging setup, database connection validation, etc.
|
|
276
|
+
- The @final decorator prevents subclasses from overriding this method
|
|
277
|
+
- Called before handle_command() in the template method pattern
|
|
278
|
+
"""
|
|
279
|
+
# log each option for the command
|
|
280
|
+
for key, value in options.items():
|
|
281
|
+
logger.info(
|
|
282
|
+
"Command '%s' - runs with option: '%s' with value: '%s'",
|
|
283
|
+
self.__class__.__name__,
|
|
284
|
+
key,
|
|
285
|
+
value,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
@abstractmethod
|
|
289
|
+
def handle_command(self, *args: Any, **options: Any) -> None:
|
|
290
|
+
"""Execute command-specific logic and functionality.
|
|
291
|
+
|
|
292
|
+
This abstract method must be implemented by subclasses to define the
|
|
293
|
+
core functionality of the Django management command. It is called after
|
|
294
|
+
common handling logic is executed, allowing each command to implement
|
|
295
|
+
its specific business logic while benefiting from shared infrastructure.
|
|
296
|
+
|
|
297
|
+
This method should contain the main logic that the command is designed
|
|
298
|
+
to perform, such as data processing, database operations, file manipulation,
|
|
299
|
+
or any other command-specific tasks.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
*args: Positional arguments passed from Django's command execution.
|
|
303
|
+
These are typically not used in Django management commands.
|
|
304
|
+
**options: Keyword arguments containing parsed command-line options
|
|
305
|
+
and their values as defined by add_command_arguments().
|
|
306
|
+
|
|
307
|
+
Example:
|
|
308
|
+
>>> def handle_command(self, *args, **options):
|
|
309
|
+
... input_file = options['input_file']
|
|
310
|
+
... dry_run = options['dry_run'] # Base argument
|
|
311
|
+
... batch_size = options['batch_size'] # Base argument
|
|
312
|
+
... quiet = options['quiet'] # Base argument
|
|
313
|
+
...
|
|
314
|
+
... if dry_run:
|
|
315
|
+
... self.stdout.write('Dry run mode - no changes will be made')
|
|
316
|
+
...
|
|
317
|
+
... if not quiet:
|
|
318
|
+
... msg = f'Processing {input_file} in batches of {batch_size}'
|
|
319
|
+
... self.stdout.write(msg)
|
|
320
|
+
...
|
|
321
|
+
... # Perform command-specific operations
|
|
322
|
+
... self.process_file(input_file, batch_size, dry_run)
|
|
323
|
+
...
|
|
324
|
+
... if not quiet:
|
|
325
|
+
... self.stdout.write('Command completed successfully')
|
|
326
|
+
|
|
327
|
+
Note:
|
|
328
|
+
- This method is abstract and must be implemented by subclasses
|
|
329
|
+
- Called after _handle() executes common logic
|
|
330
|
+
- Should contain the main functionality of the command
|
|
331
|
+
- All method calls are automatically logged with performance tracking
|
|
332
|
+
- Use self.stdout.write() for output instead of print()
|
|
333
|
+
"""
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""Database utilities for Django.
|
|
2
|
+
|
|
3
|
+
This module provides utility functions for working with Django models,
|
|
4
|
+
including hashing, topological sorting, and database operations.
|
|
5
|
+
These utilities help with efficient and safe database interactions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from graphlib import TopologicalSorter
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Self
|
|
11
|
+
|
|
12
|
+
from django.db import connection
|
|
13
|
+
from django.db.models import DateTimeField, Field, Model
|
|
14
|
+
from django.db.models.fields.related import ForeignKey, ForeignObjectRel
|
|
15
|
+
from django.forms.models import model_to_dict
|
|
16
|
+
from winipedia_utils.logging.logger import get_logger
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from django.contrib.contenttypes.fields import GenericForeignKey
|
|
20
|
+
from django.db.models.options import Options
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_model_meta(model: type[Model]) -> "Options[Model]":
|
|
26
|
+
"""Get the Django model metadata options object.
|
|
27
|
+
|
|
28
|
+
Retrieves the _meta attribute from a Django model class, which contains
|
|
29
|
+
metadata about the model including field definitions, table name, and
|
|
30
|
+
other model configuration options. This is a convenience wrapper around
|
|
31
|
+
accessing the private _meta attribute directly.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
model (type[Model]): The Django model class to get metadata from.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Options[Model]: The model's metadata options object containing
|
|
38
|
+
field definitions, table information, and other model configuration.
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
>>> from django.contrib.auth.models import User
|
|
42
|
+
>>> meta = get_model_meta(User)
|
|
43
|
+
>>> meta.db_table
|
|
44
|
+
'auth_user'
|
|
45
|
+
>>> len(meta.get_fields())
|
|
46
|
+
11
|
|
47
|
+
"""
|
|
48
|
+
return model._meta # noqa: SLF001
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_fields(
|
|
52
|
+
model: type[Model],
|
|
53
|
+
) -> "list[Field[Any, Any] | ForeignObjectRel | GenericForeignKey]":
|
|
54
|
+
"""Get all fields from a Django model including relationships.
|
|
55
|
+
|
|
56
|
+
Retrieves all field objects from a Django model, including regular fields,
|
|
57
|
+
foreign key relationships, reverse foreign key relationships, and generic
|
|
58
|
+
foreign keys. This provides a comprehensive view of all model attributes
|
|
59
|
+
that can be used for introspection, validation, or bulk operations.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
model (type[Model]): The Django model class to get fields from.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
list[Field | ForeignObjectRel | GenericForeignKey]: A list
|
|
66
|
+
containing all field objects associated with the model, including:
|
|
67
|
+
- Regular model fields (CharField, IntegerField, etc.)
|
|
68
|
+
- Foreign key fields (ForeignKey, OneToOneField, etc.)
|
|
69
|
+
- Reverse relationship fields (ForeignObjectRel)
|
|
70
|
+
- Generic foreign key fields (GenericForeignKey)
|
|
71
|
+
|
|
72
|
+
Example:
|
|
73
|
+
>>> from django.contrib.auth.models import User
|
|
74
|
+
>>> fields = get_fields(User)
|
|
75
|
+
>>> field_names = [f.name for f in fields if hasattr(f, 'name')]
|
|
76
|
+
>>> 'username' in field_names
|
|
77
|
+
True
|
|
78
|
+
>>> 'email' in field_names
|
|
79
|
+
True
|
|
80
|
+
"""
|
|
81
|
+
return get_model_meta(model).get_fields()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def get_field_names(
|
|
85
|
+
fields: "list[Field[Any, Any] | ForeignObjectRel | GenericForeignKey]",
|
|
86
|
+
) -> list[str]:
|
|
87
|
+
"""Get the names of all fields from a Django model including relationships.
|
|
88
|
+
|
|
89
|
+
Retrieves the names of all field objects from a Django model, including
|
|
90
|
+
regular fields, foreign key relationships, reverse foreign key relationships,
|
|
91
|
+
and generic foreign keys. This provides a comprehensive view of all model
|
|
92
|
+
attributes that can be used for introspection, validation, or bulk operations.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
fields (list[Field | ForeignObjectRel | GenericForeignKey]):
|
|
96
|
+
The list of field objects to get names from.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
list[str]: A list containing the names of all fields.
|
|
100
|
+
|
|
101
|
+
Example:
|
|
102
|
+
>>> from django.contrib.auth.models import User
|
|
103
|
+
>>> fields = get_fields(User)
|
|
104
|
+
>>> field_names = get_field_names(fields)
|
|
105
|
+
>>> 'username' in field_names
|
|
106
|
+
True
|
|
107
|
+
>>> 'email' in field_names
|
|
108
|
+
True
|
|
109
|
+
"""
|
|
110
|
+
return [field.name for field in fields]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def topological_sort_models(models: list[type[Model]]) -> list[type[Model]]:
|
|
114
|
+
"""Sort Django models in dependency order using topological sorting.
|
|
115
|
+
|
|
116
|
+
Analyzes foreign key relationships between Django models and returns them
|
|
117
|
+
in an order where dependencies come before dependents. This ensures that
|
|
118
|
+
when performing operations like bulk creation or deletion, models are
|
|
119
|
+
processed in the correct order to avoid foreign key constraint violations.
|
|
120
|
+
|
|
121
|
+
The function uses Python's graphlib.TopologicalSorter to perform the sorting
|
|
122
|
+
based on ForeignKey relationships between the provided models. Only
|
|
123
|
+
relationships between models in the input list are considered.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
models (list[type[Model]]): A list of Django model classes to sort
|
|
127
|
+
based on their foreign key dependencies.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
list[type[Model]]: The input models sorted in dependency order, where
|
|
131
|
+
models that are referenced by foreign keys appear before models
|
|
132
|
+
that reference them. Self-referential relationships are ignored.
|
|
133
|
+
|
|
134
|
+
Raises:
|
|
135
|
+
graphlib.CycleError: If there are circular dependencies between models
|
|
136
|
+
that cannot be resolved.
|
|
137
|
+
|
|
138
|
+
Example:
|
|
139
|
+
>>> # Assuming Author model has no dependencies
|
|
140
|
+
>>> # and Book model has ForeignKey to Author
|
|
141
|
+
>>> models = [Book, Author]
|
|
142
|
+
>>> sorted_models = topological_sort_models(models)
|
|
143
|
+
>>> sorted_models
|
|
144
|
+
[<class 'Author'>, <class 'Book'>]
|
|
145
|
+
|
|
146
|
+
Note:
|
|
147
|
+
- Only considers ForeignKey relationships, not other field types
|
|
148
|
+
- Self-referential foreign keys are ignored to avoid self-loops
|
|
149
|
+
- Only relationships between models in the input list are considered
|
|
150
|
+
"""
|
|
151
|
+
ts: TopologicalSorter[type[Model]] = TopologicalSorter()
|
|
152
|
+
|
|
153
|
+
for model in models:
|
|
154
|
+
deps = {
|
|
155
|
+
field.related_model
|
|
156
|
+
for field in get_fields(model)
|
|
157
|
+
if isinstance(field, ForeignKey)
|
|
158
|
+
and isinstance(field.related_model, type)
|
|
159
|
+
and field.related_model in models
|
|
160
|
+
and field.related_model is not model
|
|
161
|
+
}
|
|
162
|
+
ts.add(model, *deps)
|
|
163
|
+
|
|
164
|
+
return list(ts.static_order())
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def execute_sql(
|
|
168
|
+
sql: str, params: dict[str, Any] | None = None
|
|
169
|
+
) -> tuple[list[str], list[Any]]:
|
|
170
|
+
"""Execute raw SQL query and return column names with results.
|
|
171
|
+
|
|
172
|
+
Executes a raw SQL query using Django's database connection and returns
|
|
173
|
+
both the column names and the result rows. This provides a convenient
|
|
174
|
+
way to run custom SQL queries while maintaining Django's database
|
|
175
|
+
connection management and parameter binding for security.
|
|
176
|
+
|
|
177
|
+
The function automatically handles cursor management and ensures proper
|
|
178
|
+
cleanup of database resources. Parameters are safely bound to prevent
|
|
179
|
+
SQL injection attacks.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
sql (str): The SQL query string to execute. Can contain parameter
|
|
183
|
+
placeholders that will be safely bound using the params argument.
|
|
184
|
+
params (dict[str, Any] | None, optional): Dictionary of parameters
|
|
185
|
+
to bind to the SQL query for safe parameter substitution.
|
|
186
|
+
Defaults to None if no parameters are needed.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
tuple[list[str], list[Any]]: A tuple containing:
|
|
190
|
+
- list[str]: Column names from the query result
|
|
191
|
+
- list[Any]: List of result rows, where each row is a tuple
|
|
192
|
+
of values corresponding to the column names
|
|
193
|
+
|
|
194
|
+
Raises:
|
|
195
|
+
django.db.Error: If there's a database error during query execution
|
|
196
|
+
django.db.ProgrammingError: If the SQL syntax is invalid
|
|
197
|
+
django.db.IntegrityError: If the query violates database constraints
|
|
198
|
+
|
|
199
|
+
Example:
|
|
200
|
+
>>> sql = "SELECT id, username FROM auth_user WHERE is_active = %(active)s"
|
|
201
|
+
>>> params = {"active": True}
|
|
202
|
+
>>> columns, rows = execute_sql(sql, params)
|
|
203
|
+
>>> columns
|
|
204
|
+
['id', 'username']
|
|
205
|
+
>>> rows[0]
|
|
206
|
+
(1, 'admin')
|
|
207
|
+
|
|
208
|
+
Note:
|
|
209
|
+
- Uses Django's default database connection
|
|
210
|
+
- Automatically manages cursor lifecycle
|
|
211
|
+
- Parameters are safely bound to prevent SQL injection
|
|
212
|
+
- Returns all results in memory - use with caution for large datasets
|
|
213
|
+
"""
|
|
214
|
+
with connection.cursor() as cursor:
|
|
215
|
+
cursor.execute(sql=sql, params=params)
|
|
216
|
+
rows = cursor.fetchall()
|
|
217
|
+
column_names = [col[0] for col in cursor.description]
|
|
218
|
+
|
|
219
|
+
return column_names, rows
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def hash_model_instance(
|
|
223
|
+
instance: Model,
|
|
224
|
+
fields: "list[Field[Any, Any] | ForeignObjectRel | GenericForeignKey]",
|
|
225
|
+
) -> int:
|
|
226
|
+
"""Hash a model instance based on its field values.
|
|
227
|
+
|
|
228
|
+
Generates a hash for a Django model instance by considering the values
|
|
229
|
+
of its fields. This can be useful for comparing instances, especially
|
|
230
|
+
when dealing with related objects or complex data structures. The hash
|
|
231
|
+
is generated by recursively hashing related objects up to a specified
|
|
232
|
+
depth.
|
|
233
|
+
This is not very reliable, use with caution.
|
|
234
|
+
Only use if working with unsafed objects or bulks, as with safed
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
instance (Model): The Django model instance to hash
|
|
238
|
+
fields (list[str]): The fields to hash
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
int: The hash value representing the instance's data
|
|
242
|
+
|
|
243
|
+
"""
|
|
244
|
+
if instance.pk:
|
|
245
|
+
return hash(instance.pk)
|
|
246
|
+
|
|
247
|
+
field_names = get_field_names(fields)
|
|
248
|
+
model_dict = model_to_dict(instance, fields=field_names)
|
|
249
|
+
sorted_dict = dict(sorted(model_dict.items()))
|
|
250
|
+
values = (type(instance), tuple(sorted_dict.items()))
|
|
251
|
+
return hash(values)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class BaseModel(Model):
|
|
255
|
+
"""Base model for all models in the project.
|
|
256
|
+
|
|
257
|
+
Provides common fields and methods for all models.
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
created_at: DateTimeField[datetime, datetime] = DateTimeField(auto_now_add=True)
|
|
261
|
+
updated_at: DateTimeField[datetime, datetime] = DateTimeField(auto_now=True)
|
|
262
|
+
|
|
263
|
+
class Meta:
|
|
264
|
+
"""Mark the model as abstract."""
|
|
265
|
+
|
|
266
|
+
# abstract does not inherit in children
|
|
267
|
+
abstract = True
|
|
268
|
+
|
|
269
|
+
def __str__(self) -> str:
|
|
270
|
+
"""Base string representation of a model.
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
str: The string representation of the model as all fields and their values.
|
|
274
|
+
"""
|
|
275
|
+
fields_values = ", ".join(
|
|
276
|
+
f"{field.name}={getattr(self, field.name)}"
|
|
277
|
+
for field in get_fields(self.__class__)
|
|
278
|
+
)
|
|
279
|
+
return f"{self.__class__.__name__}({fields_values})"
|
|
280
|
+
|
|
281
|
+
def __repr__(self) -> str:
|
|
282
|
+
"""Base representation of a model."""
|
|
283
|
+
return str(self)
|
|
284
|
+
|
|
285
|
+
@property
|
|
286
|
+
def meta(self) -> "Options[Self]":
|
|
287
|
+
"""Get the meta options for the model."""
|
|
288
|
+
return self._meta
|
|
File without changes
|