strapi-kit 0.0.6__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- strapi_kit/_version.py +2 -2
- strapi_kit/cache/schema_cache.py +91 -3
- strapi_kit/client/async_client.py +83 -47
- strapi_kit/client/base.py +9 -2
- strapi_kit/client/sync_client.py +23 -12
- strapi_kit/export/__init__.py +3 -1
- strapi_kit/export/exporter.py +160 -4
- strapi_kit/export/importer.py +519 -66
- strapi_kit/export/jsonl_reader.py +195 -0
- strapi_kit/export/jsonl_writer.py +134 -0
- strapi_kit/export/relation_resolver.py +230 -1
- strapi_kit/models/__init__.py +8 -1
- strapi_kit/models/export_format.py +13 -0
- strapi_kit/models/import_options.py +10 -0
- strapi_kit/models/schema.py +6 -1
- strapi_kit/utils/__init__.py +3 -0
- strapi_kit/utils/schema.py +35 -0
- {strapi_kit-0.0.6.dist-info → strapi_kit-0.1.0.dist-info}/METADATA +2 -3
- {strapi_kit-0.0.6.dist-info → strapi_kit-0.1.0.dist-info}/RECORD +21 -18
- {strapi_kit-0.0.6.dist-info → strapi_kit-0.1.0.dist-info}/WHEEL +0 -0
- {strapi_kit-0.0.6.dist-info → strapi_kit-0.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""JSONL streaming import reader.
|
|
2
|
+
|
|
3
|
+
Provides O(1) memory import by reading entities one at a time.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
from collections.abc import Generator
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import IO, Any
|
|
11
|
+
|
|
12
|
+
from strapi_kit.exceptions import FormatError, ImportExportError
|
|
13
|
+
from strapi_kit.models.export_format import (
|
|
14
|
+
ExportedEntity,
|
|
15
|
+
ExportedMediaFile,
|
|
16
|
+
ExportMetadata,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class JSONLImportReader:
|
|
23
|
+
"""Streaming JSONL import reader.
|
|
24
|
+
|
|
25
|
+
Reads entities one at a time from a JSONL file for memory-efficient
|
|
26
|
+
import of large datasets.
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
>>> with JSONLImportReader("export.jsonl") as reader:
|
|
30
|
+
... metadata = reader.read_metadata()
|
|
31
|
+
... for entity in reader.iter_entities():
|
|
32
|
+
... process_entity(entity)
|
|
33
|
+
... media_manifest = reader.read_media_manifest()
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, file_path: str | Path) -> None:
|
|
37
|
+
"""Initialize JSONL reader.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
file_path: Path to input JSONL file
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
FormatError: If file doesn't exist
|
|
44
|
+
"""
|
|
45
|
+
self.file_path = Path(file_path)
|
|
46
|
+
if not self.file_path.exists():
|
|
47
|
+
raise FormatError(f"JSONL file not found: {file_path}")
|
|
48
|
+
|
|
49
|
+
self._file: IO[str] | None = None
|
|
50
|
+
self._metadata: ExportMetadata | None = None
|
|
51
|
+
self._media_manifest: list[ExportedMediaFile] | None = None
|
|
52
|
+
self._current_line = 0
|
|
53
|
+
|
|
54
|
+
def __enter__(self) -> "JSONLImportReader":
|
|
55
|
+
"""Open file for reading."""
|
|
56
|
+
self._file = open(self.file_path, encoding="utf-8")
|
|
57
|
+
return self
|
|
58
|
+
|
|
59
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
60
|
+
"""Close file."""
|
|
61
|
+
if self._file:
|
|
62
|
+
self._file.close()
|
|
63
|
+
self._file = None
|
|
64
|
+
|
|
65
|
+
def read_metadata(self) -> ExportMetadata:
|
|
66
|
+
"""Read metadata from first line.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Export metadata
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
FormatError: If first line is not metadata
|
|
73
|
+
"""
|
|
74
|
+
if not self._file:
|
|
75
|
+
raise ImportExportError("Reader not opened - use context manager")
|
|
76
|
+
|
|
77
|
+
if self._metadata is not None:
|
|
78
|
+
return self._metadata
|
|
79
|
+
|
|
80
|
+
line = self._file.readline()
|
|
81
|
+
self._current_line = 1
|
|
82
|
+
|
|
83
|
+
if not line:
|
|
84
|
+
raise FormatError("Empty JSONL file")
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
record = json.loads(line)
|
|
88
|
+
except json.JSONDecodeError as e:
|
|
89
|
+
raise FormatError(f"Invalid JSON on line 1: {e}") from e
|
|
90
|
+
|
|
91
|
+
if record.get("_type") != "metadata":
|
|
92
|
+
raise FormatError(f"Expected metadata on line 1, got: {record.get('_type')}")
|
|
93
|
+
|
|
94
|
+
# Remove _type field before parsing
|
|
95
|
+
record.pop("_type", None)
|
|
96
|
+
self._metadata = ExportMetadata(**record)
|
|
97
|
+
return self._metadata
|
|
98
|
+
|
|
99
|
+
def iter_entities(self) -> Generator[ExportedEntity, None, None]:
|
|
100
|
+
"""Iterate over entities in the file.
|
|
101
|
+
|
|
102
|
+
Yields entities one at a time for memory-efficient processing.
|
|
103
|
+
|
|
104
|
+
Yields:
|
|
105
|
+
ExportedEntity objects
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
FormatError: If entity parsing fails
|
|
109
|
+
"""
|
|
110
|
+
if not self._file:
|
|
111
|
+
raise ImportExportError("Reader not opened - use context manager")
|
|
112
|
+
|
|
113
|
+
# Ensure metadata is read first
|
|
114
|
+
if self._metadata is None:
|
|
115
|
+
self.read_metadata()
|
|
116
|
+
|
|
117
|
+
for line in self._file:
|
|
118
|
+
self._current_line += 1
|
|
119
|
+
line = line.strip()
|
|
120
|
+
if not line:
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
record = json.loads(line)
|
|
125
|
+
except json.JSONDecodeError as e:
|
|
126
|
+
raise FormatError(f"Invalid JSON on line {self._current_line}: {e}") from e
|
|
127
|
+
|
|
128
|
+
record_type = record.get("_type")
|
|
129
|
+
|
|
130
|
+
if record_type == "entity":
|
|
131
|
+
record.pop("_type", None)
|
|
132
|
+
yield ExportedEntity(**record)
|
|
133
|
+
|
|
134
|
+
elif record_type == "media_manifest":
|
|
135
|
+
# Parse and cache media manifest
|
|
136
|
+
files_data = record.get("files", [])
|
|
137
|
+
self._media_manifest = [ExportedMediaFile(**f) for f in files_data]
|
|
138
|
+
# Don't yield - this is handled separately
|
|
139
|
+
break
|
|
140
|
+
|
|
141
|
+
elif record_type == "metadata":
|
|
142
|
+
# Skip duplicate metadata
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
else:
|
|
146
|
+
logger.warning(f"Unknown record type on line {self._current_line}: {record_type}")
|
|
147
|
+
|
|
148
|
+
def read_media_manifest(self) -> list[ExportedMediaFile]:
|
|
149
|
+
"""Read media manifest from file.
|
|
150
|
+
|
|
151
|
+
Must be called after iter_entities() has completed, or will consume
|
|
152
|
+
remaining entities to find the manifest.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
List of media file references, or empty list if no manifest found
|
|
156
|
+
"""
|
|
157
|
+
if self._media_manifest is not None:
|
|
158
|
+
return self._media_manifest
|
|
159
|
+
|
|
160
|
+
# If we haven't read through entities yet, do so now
|
|
161
|
+
if not self._file:
|
|
162
|
+
raise ImportExportError("Reader not opened - use context manager")
|
|
163
|
+
|
|
164
|
+
# Consume remaining lines to find media manifest
|
|
165
|
+
for _ in self.iter_entities():
|
|
166
|
+
pass # Discard entities, we just want the manifest
|
|
167
|
+
|
|
168
|
+
if self._media_manifest is None:
|
|
169
|
+
# No media manifest found - return empty list
|
|
170
|
+
return []
|
|
171
|
+
|
|
172
|
+
return self._media_manifest
|
|
173
|
+
|
|
174
|
+
def get_entity_count(self) -> int:
|
|
175
|
+
"""Count total entities without loading them all.
|
|
176
|
+
|
|
177
|
+
Note: This reads through the entire file.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Total entity count
|
|
181
|
+
"""
|
|
182
|
+
count = 0
|
|
183
|
+
# Create a new file handle to not disturb current position
|
|
184
|
+
with open(self.file_path, encoding="utf-8") as f:
|
|
185
|
+
for line in f:
|
|
186
|
+
line = line.strip()
|
|
187
|
+
if not line:
|
|
188
|
+
continue
|
|
189
|
+
try:
|
|
190
|
+
record = json.loads(line)
|
|
191
|
+
if record.get("_type") == "entity":
|
|
192
|
+
count += 1
|
|
193
|
+
except json.JSONDecodeError:
|
|
194
|
+
continue
|
|
195
|
+
return count
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""JSONL streaming export writer.
|
|
2
|
+
|
|
3
|
+
Provides O(1) memory export by writing entities as they're fetched,
|
|
4
|
+
one JSON object per line.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import IO, Any
|
|
11
|
+
|
|
12
|
+
from strapi_kit.exceptions import ImportExportError
|
|
13
|
+
from strapi_kit.models.export_format import (
|
|
14
|
+
ExportedEntity,
|
|
15
|
+
ExportedMediaFile,
|
|
16
|
+
ExportMetadata,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class JSONLExportWriter:
|
|
23
|
+
"""Streaming JSONL export writer.
|
|
24
|
+
|
|
25
|
+
Writes entities one at a time to a JSONL file for memory-efficient
|
|
26
|
+
export of large datasets.
|
|
27
|
+
|
|
28
|
+
JSONL Format:
|
|
29
|
+
Line 1: {"_type": "metadata", ...}
|
|
30
|
+
Lines 2-N: {"_type": "entity", "content_type": "...", "data": {...}}
|
|
31
|
+
Last line: {"_type": "media_manifest", "files": [...]}
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
>>> with JSONLExportWriter("export.jsonl") as writer:
|
|
35
|
+
... writer.write_metadata(metadata)
|
|
36
|
+
... for entity in entities:
|
|
37
|
+
... writer.write_entity(entity)
|
|
38
|
+
... writer.write_media_manifest(media_files)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, file_path: str | Path) -> None:
|
|
42
|
+
"""Initialize JSONL writer.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
file_path: Path to output JSONL file
|
|
46
|
+
"""
|
|
47
|
+
self.file_path = Path(file_path)
|
|
48
|
+
self._file: IO[str] | None = None
|
|
49
|
+
self._entity_count = 0
|
|
50
|
+
self._content_type_counts: dict[str, int] = {}
|
|
51
|
+
|
|
52
|
+
def __enter__(self) -> "JSONLExportWriter":
|
|
53
|
+
"""Open file for writing."""
|
|
54
|
+
self.file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
self._file = open(self.file_path, "w", encoding="utf-8")
|
|
56
|
+
return self
|
|
57
|
+
|
|
58
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
59
|
+
"""Close file."""
|
|
60
|
+
if self._file:
|
|
61
|
+
self._file.close()
|
|
62
|
+
self._file = None
|
|
63
|
+
|
|
64
|
+
def write_metadata(self, metadata: ExportMetadata) -> None:
|
|
65
|
+
"""Write metadata as first line.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
metadata: Export metadata
|
|
69
|
+
"""
|
|
70
|
+
if not self._file:
|
|
71
|
+
raise ImportExportError("Writer not opened - use context manager")
|
|
72
|
+
|
|
73
|
+
record = {
|
|
74
|
+
"_type": "metadata",
|
|
75
|
+
**metadata.model_dump(mode="json"),
|
|
76
|
+
}
|
|
77
|
+
self._write_line(record)
|
|
78
|
+
logger.debug("Wrote metadata to JSONL")
|
|
79
|
+
|
|
80
|
+
def write_entity(self, entity: ExportedEntity) -> None:
|
|
81
|
+
"""Write a single entity.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
entity: Entity to write
|
|
85
|
+
"""
|
|
86
|
+
if not self._file:
|
|
87
|
+
raise ImportExportError("Writer not opened - use context manager")
|
|
88
|
+
|
|
89
|
+
record = {
|
|
90
|
+
"_type": "entity",
|
|
91
|
+
**entity.model_dump(mode="json"),
|
|
92
|
+
}
|
|
93
|
+
self._write_line(record)
|
|
94
|
+
|
|
95
|
+
self._entity_count += 1
|
|
96
|
+
ct = entity.content_type
|
|
97
|
+
self._content_type_counts[ct] = self._content_type_counts.get(ct, 0) + 1
|
|
98
|
+
|
|
99
|
+
def write_media_manifest(self, media_files: list[ExportedMediaFile]) -> None:
|
|
100
|
+
"""Write media manifest as final line.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
media_files: List of media file references
|
|
104
|
+
"""
|
|
105
|
+
if not self._file:
|
|
106
|
+
raise ImportExportError("Writer not opened - use context manager")
|
|
107
|
+
|
|
108
|
+
record = {
|
|
109
|
+
"_type": "media_manifest",
|
|
110
|
+
"files": [m.model_dump(mode="json") for m in media_files],
|
|
111
|
+
}
|
|
112
|
+
self._write_line(record)
|
|
113
|
+
logger.debug(f"Wrote media manifest with {len(media_files)} files")
|
|
114
|
+
|
|
115
|
+
def _write_line(self, record: dict[str, Any]) -> None:
|
|
116
|
+
"""Write a single JSON line.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
record: Dictionary to serialize as JSON line
|
|
120
|
+
"""
|
|
121
|
+
if self._file is None:
|
|
122
|
+
raise ImportExportError("Writer not opened - use context manager")
|
|
123
|
+
line = json.dumps(record, ensure_ascii=False, default=str)
|
|
124
|
+
self._file.write(line + "\n")
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def entity_count(self) -> int:
|
|
128
|
+
"""Get total entities written."""
|
|
129
|
+
return self._entity_count
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def content_type_counts(self) -> dict[str, int]:
|
|
133
|
+
"""Get entity counts per content type."""
|
|
134
|
+
return self._content_type_counts.copy()
|
|
@@ -4,8 +4,17 @@ This module handles extracting relations from entities during export
|
|
|
4
4
|
and resolving them during import using ID mappings.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
7
9
|
import logging
|
|
8
|
-
from typing import Any
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
11
|
+
|
|
12
|
+
from ..exceptions import StrapiError
|
|
13
|
+
from ..models.schema import FieldType
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from ..cache.schema_cache import InMemorySchemaCache
|
|
17
|
+
from ..models.schema import ContentTypeSchema
|
|
9
18
|
|
|
10
19
|
logger = logging.getLogger(__name__)
|
|
11
20
|
|
|
@@ -170,3 +179,223 @@ class RelationResolver:
|
|
|
170
179
|
payload[field_name] = ids
|
|
171
180
|
|
|
172
181
|
return payload
|
|
182
|
+
|
|
183
|
+
# Schema-aware extraction methods
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def extract_relations_with_schema(
|
|
187
|
+
data: dict[str, Any],
|
|
188
|
+
schema: ContentTypeSchema,
|
|
189
|
+
schema_cache: InMemorySchemaCache | None = None,
|
|
190
|
+
) -> dict[str, list[int | str]]:
|
|
191
|
+
"""Extract relations using schema - only actual relation fields.
|
|
192
|
+
|
|
193
|
+
This method uses the content type schema to identify relation fields,
|
|
194
|
+
avoiding false positives from fields that happen to contain {"data": ...}.
|
|
195
|
+
It also recursively extracts relations from components and dynamic zones.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
data: Entity attributes dictionary
|
|
199
|
+
schema: Content type schema with field definitions
|
|
200
|
+
schema_cache: Optional schema cache for component lookups
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Dictionary mapping relation field paths to lists of IDs
|
|
204
|
+
|
|
205
|
+
Example:
|
|
206
|
+
>>> # Only extracts from actual relation fields defined in schema
|
|
207
|
+
>>> data = {
|
|
208
|
+
... "title": "Article",
|
|
209
|
+
... "author": {"data": {"id": 5}},
|
|
210
|
+
... "metadata": {"data": "not a relation"} # Won't be extracted
|
|
211
|
+
... }
|
|
212
|
+
>>> relations = RelationResolver.extract_relations_with_schema(data, schema)
|
|
213
|
+
{'author': [5]} # metadata excluded because not a relation in schema
|
|
214
|
+
"""
|
|
215
|
+
relations: dict[str, list[int | str]] = {}
|
|
216
|
+
|
|
217
|
+
for field_name, field_value in data.items():
|
|
218
|
+
field_schema = schema.fields.get(field_name)
|
|
219
|
+
if not field_schema:
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
if field_schema.type == FieldType.RELATION:
|
|
223
|
+
# Extract IDs from relation field
|
|
224
|
+
ids = RelationResolver._extract_ids_from_field(field_value)
|
|
225
|
+
if ids is not None:
|
|
226
|
+
relations[field_name] = ids
|
|
227
|
+
|
|
228
|
+
elif field_schema.type == FieldType.COMPONENT and schema_cache:
|
|
229
|
+
# Recursively extract from component
|
|
230
|
+
component_uid = field_schema.component
|
|
231
|
+
if component_uid and field_value:
|
|
232
|
+
if field_schema.repeatable and isinstance(field_value, list):
|
|
233
|
+
# Repeatable component - list of components
|
|
234
|
+
for idx, item in enumerate(field_value):
|
|
235
|
+
if isinstance(item, dict):
|
|
236
|
+
nested = RelationResolver._extract_from_component(
|
|
237
|
+
item, component_uid, schema_cache, f"{field_name}[{idx}]."
|
|
238
|
+
)
|
|
239
|
+
relations.update(nested)
|
|
240
|
+
elif isinstance(field_value, dict):
|
|
241
|
+
# Single component
|
|
242
|
+
nested = RelationResolver._extract_from_component(
|
|
243
|
+
field_value, component_uid, schema_cache, f"{field_name}."
|
|
244
|
+
)
|
|
245
|
+
relations.update(nested)
|
|
246
|
+
|
|
247
|
+
elif field_schema.type == FieldType.DYNAMIC_ZONE and schema_cache:
|
|
248
|
+
# Recursively extract from dynamic zone components
|
|
249
|
+
if isinstance(field_value, list):
|
|
250
|
+
for idx, item in enumerate(field_value):
|
|
251
|
+
if isinstance(item, dict) and "__component" in item:
|
|
252
|
+
component_uid = item["__component"]
|
|
253
|
+
nested = RelationResolver._extract_from_component(
|
|
254
|
+
item, component_uid, schema_cache, f"{field_name}[{idx}]."
|
|
255
|
+
)
|
|
256
|
+
relations.update(nested)
|
|
257
|
+
|
|
258
|
+
return relations
|
|
259
|
+
|
|
260
|
+
@staticmethod
|
|
261
|
+
def _extract_from_component(
|
|
262
|
+
component_data: dict[str, Any],
|
|
263
|
+
component_uid: str,
|
|
264
|
+
schema_cache: InMemorySchemaCache,
|
|
265
|
+
prefix: str = "",
|
|
266
|
+
) -> dict[str, list[int | str]]:
|
|
267
|
+
"""Recursively extract relations from a component.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
component_data: Component data dictionary
|
|
271
|
+
component_uid: Component UID for schema lookup
|
|
272
|
+
schema_cache: Schema cache for component lookups
|
|
273
|
+
prefix: Field path prefix for nested fields
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Dictionary mapping prefixed field paths to lists of IDs
|
|
277
|
+
"""
|
|
278
|
+
try:
|
|
279
|
+
component_schema = schema_cache.get_component_schema(component_uid)
|
|
280
|
+
except StrapiError:
|
|
281
|
+
logger.warning(f"Could not fetch component schema: {component_uid}", exc_info=True)
|
|
282
|
+
return {}
|
|
283
|
+
|
|
284
|
+
relations: dict[str, list[int | str]] = {}
|
|
285
|
+
|
|
286
|
+
for field_name, field_value in component_data.items():
|
|
287
|
+
if field_name == "__component":
|
|
288
|
+
continue # Skip component type marker
|
|
289
|
+
|
|
290
|
+
field_schema = component_schema.fields.get(field_name)
|
|
291
|
+
if not field_schema:
|
|
292
|
+
continue
|
|
293
|
+
|
|
294
|
+
full_key = f"{prefix}{field_name}"
|
|
295
|
+
|
|
296
|
+
if field_schema.type == FieldType.RELATION:
|
|
297
|
+
ids = RelationResolver._extract_ids_from_field(field_value)
|
|
298
|
+
if ids is not None:
|
|
299
|
+
relations[full_key] = ids
|
|
300
|
+
|
|
301
|
+
elif field_schema.type == FieldType.COMPONENT:
|
|
302
|
+
nested_uid = field_schema.component
|
|
303
|
+
if nested_uid and field_value:
|
|
304
|
+
if field_schema.repeatable and isinstance(field_value, list):
|
|
305
|
+
for idx, item in enumerate(field_value):
|
|
306
|
+
if isinstance(item, dict):
|
|
307
|
+
nested = RelationResolver._extract_from_component(
|
|
308
|
+
item, nested_uid, schema_cache, f"{full_key}[{idx}]."
|
|
309
|
+
)
|
|
310
|
+
relations.update(nested)
|
|
311
|
+
elif isinstance(field_value, dict):
|
|
312
|
+
nested = RelationResolver._extract_from_component(
|
|
313
|
+
field_value, nested_uid, schema_cache, f"{full_key}."
|
|
314
|
+
)
|
|
315
|
+
relations.update(nested)
|
|
316
|
+
|
|
317
|
+
elif field_schema.type == FieldType.DYNAMIC_ZONE:
|
|
318
|
+
if isinstance(field_value, list):
|
|
319
|
+
for idx, item in enumerate(field_value):
|
|
320
|
+
if isinstance(item, dict) and "__component" in item:
|
|
321
|
+
dz_uid = item["__component"]
|
|
322
|
+
nested = RelationResolver._extract_from_component(
|
|
323
|
+
item, dz_uid, schema_cache, f"{full_key}[{idx}]."
|
|
324
|
+
)
|
|
325
|
+
relations.update(nested)
|
|
326
|
+
|
|
327
|
+
return relations
|
|
328
|
+
|
|
329
|
+
@staticmethod
|
|
330
|
+
def _extract_ids_from_field(field_value: Any) -> list[int | str] | None:
|
|
331
|
+
"""Extract IDs from a relation field value.
|
|
332
|
+
|
|
333
|
+
Handles both v4 nested format and v5 flat format.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
field_value: Field value from entity data
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
List of IDs if this looks like a relation, None otherwise
|
|
340
|
+
"""
|
|
341
|
+
if field_value is None:
|
|
342
|
+
return []
|
|
343
|
+
|
|
344
|
+
# v4 format: {"data": ...}
|
|
345
|
+
if isinstance(field_value, dict) and "data" in field_value:
|
|
346
|
+
relation_data = field_value["data"]
|
|
347
|
+
if relation_data is None:
|
|
348
|
+
return []
|
|
349
|
+
elif isinstance(relation_data, dict) and "id" in relation_data:
|
|
350
|
+
return [relation_data["id"]]
|
|
351
|
+
elif isinstance(relation_data, list):
|
|
352
|
+
return [
|
|
353
|
+
item["id"] for item in relation_data if isinstance(item, dict) and "id" in item
|
|
354
|
+
]
|
|
355
|
+
|
|
356
|
+
# v5 format: direct ID or list of IDs (can be int or str)
|
|
357
|
+
if isinstance(field_value, (int, str)):
|
|
358
|
+
return [field_value]
|
|
359
|
+
elif isinstance(field_value, list):
|
|
360
|
+
ids: list[int | str] = [item for item in field_value if isinstance(item, (int, str))]
|
|
361
|
+
if ids:
|
|
362
|
+
return ids
|
|
363
|
+
|
|
364
|
+
return None
|
|
365
|
+
|
|
366
|
+
@staticmethod
|
|
367
|
+
def strip_relations_with_schema(
|
|
368
|
+
data: dict[str, Any],
|
|
369
|
+
schema: ContentTypeSchema,
|
|
370
|
+
) -> dict[str, Any]:
|
|
371
|
+
"""Remove only actual relation fields from entity data.
|
|
372
|
+
|
|
373
|
+
Uses schema to identify relation fields, preserving non-relation
|
|
374
|
+
fields that happen to contain {"data": ...}.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
data: Entity attributes dictionary
|
|
378
|
+
schema: Content type schema with field definitions
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
Copy of data with relation fields removed
|
|
382
|
+
|
|
383
|
+
Example:
|
|
384
|
+
>>> data = {
|
|
385
|
+
... "title": "Article",
|
|
386
|
+
... "author": {"data": {"id": 5}}, # Relation - removed
|
|
387
|
+
... "metadata": {"data": "custom"} # Not relation - kept
|
|
388
|
+
... }
|
|
389
|
+
>>> stripped = RelationResolver.strip_relations_with_schema(data, schema)
|
|
390
|
+
{'title': 'Article', 'metadata': {'data': 'custom'}}
|
|
391
|
+
"""
|
|
392
|
+
cleaned_data = {}
|
|
393
|
+
|
|
394
|
+
for field_name, field_value in data.items():
|
|
395
|
+
field_schema = schema.fields.get(field_name)
|
|
396
|
+
|
|
397
|
+
# Keep field if it's not in schema or not a relation
|
|
398
|
+
if not field_schema or field_schema.type != FieldType.RELATION:
|
|
399
|
+
cleaned_data[field_name] = field_value
|
|
400
|
+
|
|
401
|
+
return cleaned_data
|
strapi_kit/models/__init__.py
CHANGED
|
@@ -9,7 +9,13 @@ from .content_type import ComponentListItem, ContentTypeListItem
|
|
|
9
9
|
from .content_type import ContentTypeInfo as CTBContentTypeInfo
|
|
10
10
|
from .content_type import ContentTypeSchema as CTBContentTypeSchema
|
|
11
11
|
from .enums import FilterOperator, PublicationState, SortDirection
|
|
12
|
-
from .export_format import
|
|
12
|
+
from .export_format import (
|
|
13
|
+
ExportData,
|
|
14
|
+
ExportedEntity,
|
|
15
|
+
ExportedMediaFile,
|
|
16
|
+
ExportFormat,
|
|
17
|
+
ExportMetadata,
|
|
18
|
+
)
|
|
13
19
|
from .import_options import ConflictResolution, ImportOptions, ImportResult
|
|
14
20
|
from .request.fields import FieldSelection
|
|
15
21
|
from .request.filters import FilterBuilder, FilterCondition, FilterGroup
|
|
@@ -47,6 +53,7 @@ __all__ = [
|
|
|
47
53
|
"ExportMetadata",
|
|
48
54
|
"ExportedEntity",
|
|
49
55
|
"ExportedMediaFile",
|
|
56
|
+
"ExportFormat",
|
|
50
57
|
"ImportOptions",
|
|
51
58
|
"ImportResult",
|
|
52
59
|
"ConflictResolution",
|
|
@@ -5,6 +5,7 @@ and version compatibility.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from datetime import UTC, datetime
|
|
8
|
+
from enum import StrEnum
|
|
8
9
|
from pathlib import PureWindowsPath
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
@@ -15,6 +16,18 @@ from strapi_kit.exceptions import FormatError
|
|
|
15
16
|
from .schema import ContentTypeSchema
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
class ExportFormat(StrEnum):
|
|
20
|
+
"""Export file format options.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
JSON: Standard JSON format (default). Loads entire file into memory.
|
|
24
|
+
JSONL: JSON Lines format. Streams entities one per line for O(1) memory.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
JSON = "json"
|
|
28
|
+
JSONL = "jsonl"
|
|
29
|
+
|
|
30
|
+
|
|
18
31
|
class ExportMetadata(BaseModel):
|
|
19
32
|
"""Metadata about the export.
|
|
20
33
|
|
|
@@ -90,6 +90,7 @@ class ImportResult(BaseModel):
|
|
|
90
90
|
entities_skipped: Number of entities skipped
|
|
91
91
|
entities_updated: Number of entities updated
|
|
92
92
|
entities_failed: Number of entities that failed
|
|
93
|
+
relations_imported: Number of relation updates performed
|
|
93
94
|
media_imported: Number of media files imported
|
|
94
95
|
media_skipped: Number of media files skipped
|
|
95
96
|
errors: List of error messages
|
|
@@ -103,6 +104,7 @@ class ImportResult(BaseModel):
|
|
|
103
104
|
entities_skipped: int = Field(default=0, description="Entities skipped")
|
|
104
105
|
entities_updated: int = Field(default=0, description="Entities updated")
|
|
105
106
|
entities_failed: int = Field(default=0, description="Entities failed")
|
|
107
|
+
relations_imported: int = Field(default=0, description="Relation updates performed")
|
|
106
108
|
media_imported: int = Field(default=0, description="Media files imported")
|
|
107
109
|
media_skipped: int = Field(default=0, description="Media files skipped")
|
|
108
110
|
errors: list[str] = Field(default_factory=list, description="Error messages")
|
|
@@ -111,6 +113,14 @@ class ImportResult(BaseModel):
|
|
|
111
113
|
default_factory=dict,
|
|
112
114
|
description="Mapping of old IDs to new IDs per content type",
|
|
113
115
|
)
|
|
116
|
+
doc_id_mapping: dict[str, dict[int, str]] = Field(
|
|
117
|
+
default_factory=dict,
|
|
118
|
+
description="Mapping of old IDs to document_ids per content type (for v5 endpoints)",
|
|
119
|
+
)
|
|
120
|
+
doc_id_to_new_id: dict[str, dict[str, int]] = Field(
|
|
121
|
+
default_factory=dict,
|
|
122
|
+
description="Mapping of old document_ids to new IDs (for v5 string relation resolution)",
|
|
123
|
+
)
|
|
114
124
|
|
|
115
125
|
def add_error(self, error: str) -> None:
|
|
116
126
|
"""Add an error message.
|
strapi_kit/models/schema.py
CHANGED
|
@@ -12,7 +12,7 @@ class FieldType(StrEnum):
|
|
|
12
12
|
TEXT = "text"
|
|
13
13
|
RICH_TEXT = "richtext"
|
|
14
14
|
EMAIL = "email"
|
|
15
|
-
PASSWORD = "password" #
|
|
15
|
+
PASSWORD = "password" # noqa: S105 - Field type enum, not a hardcoded password
|
|
16
16
|
INTEGER = "integer"
|
|
17
17
|
BIG_INTEGER = "biginteger"
|
|
18
18
|
FLOAT = "float"
|
|
@@ -53,6 +53,11 @@ class FieldSchema(BaseModel):
|
|
|
53
53
|
mapped_by: str | None = None
|
|
54
54
|
inversed_by: str | None = None
|
|
55
55
|
|
|
56
|
+
# Component-specific
|
|
57
|
+
component: str | None = None # Component UID for COMPONENT type
|
|
58
|
+
components: list[str] | None = None # Allowed UIDs for DYNAMIC_ZONE
|
|
59
|
+
repeatable: bool = False # True for repeatable components
|
|
60
|
+
|
|
56
61
|
|
|
57
62
|
class ContentTypeSchema(BaseModel):
|
|
58
63
|
"""Complete schema for a content type."""
|
strapi_kit/utils/__init__.py
CHANGED
|
@@ -7,6 +7,7 @@ This package contains helper utilities including:
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from strapi_kit.utils.rate_limiter import AsyncTokenBucketRateLimiter, TokenBucketRateLimiter
|
|
10
|
+
from strapi_kit.utils.schema import extract_info_from_schema
|
|
10
11
|
from strapi_kit.utils.seo import SEOConfiguration, detect_seo_configuration
|
|
11
12
|
from strapi_kit.utils.uid import (
|
|
12
13
|
api_id_to_singular,
|
|
@@ -31,4 +32,6 @@ __all__ = [
|
|
|
31
32
|
# SEO utilities
|
|
32
33
|
"detect_seo_configuration",
|
|
33
34
|
"SEOConfiguration",
|
|
35
|
+
# Schema utilities
|
|
36
|
+
"extract_info_from_schema",
|
|
34
37
|
]
|