strapi-kit 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- strapi_kit/_version.py +2 -2
- strapi_kit/cache/schema_cache.py +91 -3
- strapi_kit/client/async_client.py +83 -47
- strapi_kit/client/base.py +70 -3
- strapi_kit/client/sync_client.py +23 -12
- strapi_kit/export/__init__.py +3 -1
- strapi_kit/export/exporter.py +160 -4
- strapi_kit/export/importer.py +519 -66
- strapi_kit/export/jsonl_reader.py +195 -0
- strapi_kit/export/jsonl_writer.py +134 -0
- strapi_kit/export/relation_resolver.py +230 -1
- strapi_kit/models/__init__.py +8 -1
- strapi_kit/models/config.py +5 -1
- strapi_kit/models/enums.py +4 -4
- strapi_kit/models/export_format.py +13 -0
- strapi_kit/models/import_options.py +12 -2
- strapi_kit/models/schema.py +9 -4
- strapi_kit/utils/__init__.py +3 -0
- strapi_kit/utils/schema.py +35 -0
- {strapi_kit-0.0.5.dist-info → strapi_kit-0.1.0.dist-info}/METADATA +2 -3
- {strapi_kit-0.0.5.dist-info → strapi_kit-0.1.0.dist-info}/RECORD +23 -20
- {strapi_kit-0.0.5.dist-info → strapi_kit-0.1.0.dist-info}/WHEEL +0 -0
- {strapi_kit-0.0.5.dist-info → strapi_kit-0.1.0.dist-info}/licenses/LICENSE +0 -0
strapi_kit/export/importer.py
CHANGED
|
@@ -111,6 +111,12 @@ class StrapiImporter:
|
|
|
111
111
|
# Step 1.5: Load schemas from export metadata
|
|
112
112
|
self._load_schemas_from_export(export_data)
|
|
113
113
|
|
|
114
|
+
# Step 1.6: Validate relations if requested
|
|
115
|
+
if options.validate_relations:
|
|
116
|
+
if options.progress_callback:
|
|
117
|
+
options.progress_callback(10, 100, "Validating relations")
|
|
118
|
+
self._validate_relations(export_data, result)
|
|
119
|
+
|
|
114
120
|
# Step 2: Filter content types if specified
|
|
115
121
|
content_types_to_import = self._get_content_types_to_import(export_data, options)
|
|
116
122
|
|
|
@@ -189,6 +195,54 @@ class StrapiImporter:
|
|
|
189
195
|
if export_data.get_entity_count() == 0:
|
|
190
196
|
result.add_warning("No entities to import")
|
|
191
197
|
|
|
198
|
+
def _validate_relations(self, export_data: ExportData, result: ImportResult) -> None:
|
|
199
|
+
"""Validate that all relation targets exist in export data.
|
|
200
|
+
|
|
201
|
+
This pre-import validation ensures all referenced entities are present
|
|
202
|
+
in the export, warning about any missing targets.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
export_data: Export data to validate
|
|
206
|
+
result: Result object to add warnings to
|
|
207
|
+
"""
|
|
208
|
+
# Build set of available IDs per content type (both int and str for v5)
|
|
209
|
+
available_ids: dict[str, set[int | str]] = {}
|
|
210
|
+
for ct, entities in export_data.entities.items():
|
|
211
|
+
ids: set[int | str] = set()
|
|
212
|
+
for e in entities:
|
|
213
|
+
ids.add(e.id)
|
|
214
|
+
# Include document_id for v5 string-based relations
|
|
215
|
+
if e.document_id:
|
|
216
|
+
ids.add(e.document_id)
|
|
217
|
+
available_ids[ct] = ids
|
|
218
|
+
|
|
219
|
+
# Check all relations
|
|
220
|
+
for ct, entities in export_data.entities.items():
|
|
221
|
+
# Get schema for this content type
|
|
222
|
+
schema = export_data.metadata.schemas.get(ct)
|
|
223
|
+
if not schema:
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
for entity in entities:
|
|
227
|
+
for field_name, target_ids in entity.relations.items():
|
|
228
|
+
target_ct = schema.get_field_target(field_name)
|
|
229
|
+
if not target_ct:
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
if target_ct not in available_ids:
|
|
233
|
+
result.add_warning(
|
|
234
|
+
f"{ct}#{entity.id}.{field_name} -> target type '{target_ct}' "
|
|
235
|
+
"not in export"
|
|
236
|
+
)
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
missing = set(target_ids) - available_ids.get(target_ct, set())
|
|
240
|
+
if missing:
|
|
241
|
+
result.add_warning(
|
|
242
|
+
f"{ct}#{entity.id}.{field_name} -> missing IDs in {target_ct}: "
|
|
243
|
+
f"{sorted(missing)}"
|
|
244
|
+
)
|
|
245
|
+
|
|
192
246
|
def _get_content_types_to_import(
|
|
193
247
|
self, export_data: ExportData, options: ImportOptions
|
|
194
248
|
) -> list[str]:
|
|
@@ -231,83 +285,144 @@ class StrapiImporter:
|
|
|
231
285
|
options: Import options
|
|
232
286
|
result: Result object to update
|
|
233
287
|
"""
|
|
288
|
+
total_entities = sum(len(export_data.entities.get(ct, [])) for ct in content_types)
|
|
289
|
+
processed = 0
|
|
290
|
+
|
|
234
291
|
for content_type in content_types:
|
|
235
292
|
entities = export_data.entities.get(content_type, [])
|
|
236
293
|
|
|
237
294
|
# Get endpoint from schema (prefers plural_name) or fallback to UID
|
|
238
295
|
endpoint = self._get_endpoint(content_type)
|
|
239
296
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
continue
|
|
253
|
-
|
|
254
|
-
# Check for existing entity if document_id is available (for conflict handling)
|
|
255
|
-
existing_id: int | None = None
|
|
256
|
-
if entity.document_id:
|
|
257
|
-
existing_id = self._check_entity_exists(endpoint, entity.document_id)
|
|
297
|
+
# Process entities in batches for progress reporting
|
|
298
|
+
for batch_start in range(0, len(entities), options.batch_size):
|
|
299
|
+
batch = entities[batch_start : batch_start + options.batch_size]
|
|
300
|
+
|
|
301
|
+
for entity in batch:
|
|
302
|
+
try:
|
|
303
|
+
# Update media references if we have mappings
|
|
304
|
+
entity_data = entity.data
|
|
305
|
+
if media_id_mapping:
|
|
306
|
+
entity_data = MediaHandler.update_media_references(
|
|
307
|
+
entity.data, media_id_mapping
|
|
308
|
+
)
|
|
258
309
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
result.entities_skipped += 1
|
|
263
|
-
# Still track the ID mapping for relations
|
|
264
|
-
if content_type not in result.id_mapping:
|
|
265
|
-
result.id_mapping[content_type] = {}
|
|
266
|
-
result.id_mapping[content_type][entity.id] = existing_id
|
|
310
|
+
if options.dry_run:
|
|
311
|
+
# Just validate, don't actually create
|
|
312
|
+
result.entities_imported += 1
|
|
267
313
|
continue
|
|
268
314
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
)
|
|
315
|
+
# Check for existing entity if document_id is available (for conflict handling)
|
|
316
|
+
existing_id: int | None = None
|
|
317
|
+
if entity.document_id:
|
|
318
|
+
existing_id = self._check_entity_exists(endpoint, entity.document_id)
|
|
274
319
|
|
|
275
|
-
|
|
276
|
-
#
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
)
|
|
281
|
-
if response.data:
|
|
320
|
+
if existing_id is not None:
|
|
321
|
+
# Entity already exists - handle according to conflict resolution
|
|
322
|
+
if options.conflict_resolution == ConflictResolution.SKIP:
|
|
323
|
+
result.entities_skipped += 1
|
|
324
|
+
# Still track the ID mapping for relations
|
|
282
325
|
if content_type not in result.id_mapping:
|
|
283
326
|
result.id_mapping[content_type] = {}
|
|
284
|
-
result.id_mapping[content_type][entity.id] =
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
327
|
+
result.id_mapping[content_type][entity.id] = existing_id
|
|
328
|
+
# Track document_id mappings for v5
|
|
329
|
+
if entity.document_id:
|
|
330
|
+
if content_type not in result.doc_id_mapping:
|
|
331
|
+
result.doc_id_mapping[content_type] = {}
|
|
332
|
+
result.doc_id_mapping[content_type][entity.id] = (
|
|
333
|
+
entity.document_id
|
|
334
|
+
)
|
|
335
|
+
# Track reverse mapping for v5 string relation resolution
|
|
336
|
+
if content_type not in result.doc_id_to_new_id:
|
|
337
|
+
result.doc_id_to_new_id[content_type] = {}
|
|
338
|
+
result.doc_id_to_new_id[content_type][entity.document_id] = (
|
|
339
|
+
existing_id
|
|
340
|
+
)
|
|
341
|
+
continue
|
|
342
|
+
|
|
343
|
+
elif options.conflict_resolution == ConflictResolution.FAIL:
|
|
344
|
+
raise ImportExportError(
|
|
345
|
+
f"Entity already exists: {content_type} with documentId "
|
|
346
|
+
f"{entity.document_id}. Use conflict_resolution=SKIP or UPDATE."
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
elif options.conflict_resolution == ConflictResolution.UPDATE:
|
|
350
|
+
# Update existing entity
|
|
351
|
+
response = self.client.update(
|
|
352
|
+
f"{endpoint}/{entity.document_id}",
|
|
353
|
+
entity_data,
|
|
354
|
+
)
|
|
355
|
+
if response.data:
|
|
356
|
+
if content_type not in result.id_mapping:
|
|
357
|
+
result.id_mapping[content_type] = {}
|
|
358
|
+
result.id_mapping[content_type][entity.id] = response.data.id
|
|
359
|
+
# Track document_id mappings for v5
|
|
360
|
+
if entity.document_id:
|
|
361
|
+
if content_type not in result.doc_id_mapping:
|
|
362
|
+
result.doc_id_mapping[content_type] = {}
|
|
363
|
+
result.doc_id_mapping[content_type][entity.id] = (
|
|
364
|
+
entity.document_id
|
|
365
|
+
)
|
|
366
|
+
# Track reverse mapping for v5 string relation resolution
|
|
367
|
+
if content_type not in result.doc_id_to_new_id:
|
|
368
|
+
result.doc_id_to_new_id[content_type] = {}
|
|
369
|
+
result.doc_id_to_new_id[content_type][
|
|
370
|
+
entity.document_id
|
|
371
|
+
] = response.data.id
|
|
372
|
+
result.entities_updated += 1
|
|
373
|
+
continue
|
|
374
|
+
|
|
375
|
+
# Create new entity
|
|
376
|
+
response = self.client.create(endpoint, entity_data)
|
|
377
|
+
|
|
378
|
+
if response.data:
|
|
379
|
+
# Track ID mapping for relation resolution
|
|
380
|
+
if content_type not in result.id_mapping:
|
|
381
|
+
result.id_mapping[content_type] = {}
|
|
382
|
+
result.id_mapping[content_type][entity.id] = response.data.id
|
|
383
|
+
|
|
384
|
+
# Track document_id mappings for v5
|
|
385
|
+
if response.data.document_id:
|
|
386
|
+
if content_type not in result.doc_id_mapping:
|
|
387
|
+
result.doc_id_mapping[content_type] = {}
|
|
388
|
+
result.doc_id_mapping[content_type][entity.id] = (
|
|
389
|
+
response.data.document_id
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Track reverse mapping for v5 string relation resolution
|
|
393
|
+
if entity.document_id:
|
|
394
|
+
if content_type not in result.doc_id_to_new_id:
|
|
395
|
+
result.doc_id_to_new_id[content_type] = {}
|
|
396
|
+
result.doc_id_to_new_id[content_type][entity.document_id] = (
|
|
397
|
+
response.data.id
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
result.entities_imported += 1
|
|
401
|
+
|
|
402
|
+
except ValidationError as e:
|
|
403
|
+
result.add_error(
|
|
404
|
+
f"Validation error importing {content_type} #{entity.id}: {e}"
|
|
405
|
+
)
|
|
406
|
+
result.entities_failed += 1
|
|
290
407
|
|
|
291
|
-
|
|
292
|
-
#
|
|
293
|
-
|
|
294
|
-
result.id_mapping[content_type] = {}
|
|
408
|
+
except ImportExportError:
|
|
409
|
+
# Re-raise ImportExportError (e.g., from FAIL conflict resolution)
|
|
410
|
+
raise
|
|
295
411
|
|
|
296
|
-
|
|
297
|
-
|
|
412
|
+
except StrapiError as e:
|
|
413
|
+
# Catch Strapi-specific errors (API errors, network issues, etc.)
|
|
414
|
+
result.add_error(f"Failed to import {content_type} #{entity.id}: {e}")
|
|
415
|
+
result.entities_failed += 1
|
|
298
416
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
result.entities_failed += 1
|
|
417
|
+
finally:
|
|
418
|
+
processed += 1
|
|
302
419
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
result.add_error(f"Failed to import {content_type} #{entity.id}: {e}")
|
|
310
|
-
result.entities_failed += 1
|
|
420
|
+
# Batch progress callback
|
|
421
|
+
if options.progress_callback and total_entities > 0:
|
|
422
|
+
progress = 40 + int((processed / total_entities) * 20)
|
|
423
|
+
options.progress_callback(
|
|
424
|
+
progress, 100, f"Importing entities ({processed}/{total_entities})"
|
|
425
|
+
)
|
|
311
426
|
|
|
312
427
|
def _check_entity_exists(self, endpoint: str, document_id: str) -> int | None:
|
|
313
428
|
"""Check if an entity exists by document ID.
|
|
@@ -370,6 +485,14 @@ class StrapiImporter:
|
|
|
370
485
|
|
|
371
486
|
new_id = result.id_mapping[content_type][old_id]
|
|
372
487
|
|
|
488
|
+
# Get document_id for v5 endpoint (falls back to numeric ID for v4)
|
|
489
|
+
entity_endpoint_id: str | int = new_id
|
|
490
|
+
if (
|
|
491
|
+
content_type in result.doc_id_mapping
|
|
492
|
+
and old_id in result.doc_id_mapping[content_type]
|
|
493
|
+
):
|
|
494
|
+
entity_endpoint_id = result.doc_id_mapping[content_type][old_id]
|
|
495
|
+
|
|
373
496
|
# Get schema for this content type
|
|
374
497
|
try:
|
|
375
498
|
schema = self._schema_cache.get_schema(content_type)
|
|
@@ -383,7 +506,10 @@ class StrapiImporter:
|
|
|
383
506
|
|
|
384
507
|
# FIXED: Resolve relations using schema
|
|
385
508
|
resolved_relations = self._resolve_relations_with_schema(
|
|
386
|
-
entity.relations,
|
|
509
|
+
entity.relations,
|
|
510
|
+
schema,
|
|
511
|
+
result.id_mapping,
|
|
512
|
+
result.doc_id_to_new_id,
|
|
387
513
|
)
|
|
388
514
|
|
|
389
515
|
if not resolved_relations:
|
|
@@ -393,10 +519,10 @@ class StrapiImporter:
|
|
|
393
519
|
relation_payload = RelationResolver.build_relation_payload(resolved_relations)
|
|
394
520
|
|
|
395
521
|
if relation_payload:
|
|
396
|
-
# Update entity with relations
|
|
522
|
+
# Update entity with relations (use document_id for v5)
|
|
397
523
|
# Note: update() already wraps data in {"data": ...}
|
|
398
524
|
self.client.update(
|
|
399
|
-
f"{endpoint}/{
|
|
525
|
+
f"{endpoint}/{entity_endpoint_id}",
|
|
400
526
|
relation_payload,
|
|
401
527
|
)
|
|
402
528
|
|
|
@@ -446,6 +572,16 @@ class StrapiImporter:
|
|
|
446
572
|
result.media_imported += 1
|
|
447
573
|
continue
|
|
448
574
|
|
|
575
|
+
# Check for existing media with same hash (if not overwriting)
|
|
576
|
+
if not options.overwrite_media:
|
|
577
|
+
existing_id = self._find_media_by_hash(exported_media.hash)
|
|
578
|
+
if existing_id is not None:
|
|
579
|
+
# Use existing media - no need to upload
|
|
580
|
+
media_id_mapping[exported_media.id] = existing_id
|
|
581
|
+
result.media_skipped += 1
|
|
582
|
+
logger.debug(f"Media {exported_media.name} already exists (hash match)")
|
|
583
|
+
continue
|
|
584
|
+
|
|
449
585
|
# Find local file with path traversal protection
|
|
450
586
|
file_path = (media_path / exported_media.local_path).resolve()
|
|
451
587
|
|
|
@@ -491,21 +627,46 @@ class StrapiImporter:
|
|
|
491
627
|
|
|
492
628
|
logger.info(f"Loaded {self._schema_cache.cache_size} schemas from export")
|
|
493
629
|
|
|
630
|
+
def _find_media_by_hash(self, file_hash: str) -> int | None:
|
|
631
|
+
"""Find existing media file by hash.
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
file_hash: File hash to search for
|
|
635
|
+
|
|
636
|
+
Returns:
|
|
637
|
+
Media ID if found, None otherwise
|
|
638
|
+
"""
|
|
639
|
+
try:
|
|
640
|
+
from strapi_kit.models import FilterBuilder, StrapiQuery
|
|
641
|
+
|
|
642
|
+
query = StrapiQuery().filter(FilterBuilder().eq("hash", file_hash))
|
|
643
|
+
response = self.client.list_media(query)
|
|
644
|
+
|
|
645
|
+
if response.data:
|
|
646
|
+
return response.data[0].id
|
|
647
|
+
except Exception: # noqa: BLE001, S110 - Intentionally ignore lookup failures
|
|
648
|
+
pass
|
|
649
|
+
return None
|
|
650
|
+
|
|
494
651
|
def _resolve_relations_with_schema(
|
|
495
652
|
self,
|
|
496
653
|
relations: dict[str, list[int | str]],
|
|
497
654
|
schema: ContentTypeSchema,
|
|
498
655
|
id_mapping: dict[str, dict[int, int]],
|
|
656
|
+
doc_id_to_new_id: dict[str, dict[str, int]] | None = None,
|
|
499
657
|
) -> dict[str, list[int]]:
|
|
500
658
|
"""Resolve relation IDs using schema information.
|
|
501
659
|
|
|
502
660
|
Uses content type schemas to determine relation targets, enabling
|
|
503
|
-
proper ID mapping during import.
|
|
661
|
+
proper ID mapping during import. Handles both numeric IDs and
|
|
662
|
+
string documentIds (v5 format).
|
|
504
663
|
|
|
505
664
|
Args:
|
|
506
665
|
relations: Raw relations from export (field -> [old_ids])
|
|
507
666
|
schema: Schema for the content type
|
|
508
667
|
id_mapping: Full ID mapping (content_type -> {old_id: new_id})
|
|
668
|
+
doc_id_to_new_id: Optional document_id mapping for v5 string IDs
|
|
669
|
+
(content_type -> {old_document_id: new_id})
|
|
509
670
|
|
|
510
671
|
Returns:
|
|
511
672
|
Resolved relations with new IDs
|
|
@@ -529,12 +690,18 @@ class StrapiImporter:
|
|
|
529
690
|
continue
|
|
530
691
|
|
|
531
692
|
target_mapping = id_mapping[target_content_type]
|
|
693
|
+
target_doc_mapping = (
|
|
694
|
+
doc_id_to_new_id.get(target_content_type, {}) if doc_id_to_new_id else {}
|
|
695
|
+
)
|
|
532
696
|
|
|
533
|
-
# Resolve old IDs to new IDs
|
|
697
|
+
# Resolve old IDs to new IDs (supports both int and str IDs)
|
|
534
698
|
new_ids = []
|
|
535
699
|
for old_id in old_ids:
|
|
536
700
|
if isinstance(old_id, int) and old_id in target_mapping:
|
|
537
701
|
new_ids.append(target_mapping[old_id])
|
|
702
|
+
elif isinstance(old_id, str) and old_id in target_doc_mapping:
|
|
703
|
+
# V5 string documentId - look up in doc_id mapping
|
|
704
|
+
new_ids.append(target_doc_mapping[old_id])
|
|
538
705
|
else:
|
|
539
706
|
logger.warning(
|
|
540
707
|
f"Could not resolve {target_content_type} ID {old_id} "
|
|
@@ -617,3 +784,289 @@ class StrapiImporter:
|
|
|
617
784
|
API endpoint (e.g., "articles")
|
|
618
785
|
"""
|
|
619
786
|
return StrapiImporter._uid_to_endpoint_fallback(uid)
|
|
787
|
+
|
|
788
|
+
def import_from_jsonl(
|
|
789
|
+
self,
|
|
790
|
+
jsonl_path: str | Path,
|
|
791
|
+
options: ImportOptions | None = None,
|
|
792
|
+
media_dir: Path | str | None = None,
|
|
793
|
+
) -> ImportResult:
|
|
794
|
+
"""Import data from JSONL file with two-pass streaming.
|
|
795
|
+
|
|
796
|
+
This method uses two-pass streaming for true O(1) memory usage:
|
|
797
|
+
- Pass 1: Create entities, store only ID mappings (old_id -> new_id)
|
|
798
|
+
- Pass 2: Re-read file to resolve relations using ID mappings
|
|
799
|
+
|
|
800
|
+
Memory profile: O(entity_count x 2 ints) for ID mappings only,
|
|
801
|
+
not O(entities) for full entity objects.
|
|
802
|
+
|
|
803
|
+
Args:
|
|
804
|
+
jsonl_path: Path to input JSONL file
|
|
805
|
+
options: Import options (uses defaults if None)
|
|
806
|
+
media_dir: Directory containing media files from export
|
|
807
|
+
|
|
808
|
+
Returns:
|
|
809
|
+
ImportResult with statistics and any errors
|
|
810
|
+
|
|
811
|
+
Raises:
|
|
812
|
+
ImportExportError: If import fails critically
|
|
813
|
+
|
|
814
|
+
Example:
|
|
815
|
+
>>> result = importer.import_from_jsonl(
|
|
816
|
+
... "export.jsonl",
|
|
817
|
+
... media_dir="media/"
|
|
818
|
+
... )
|
|
819
|
+
>>> if result.success:
|
|
820
|
+
... print(f"Imported {result.entities_imported} entities")
|
|
821
|
+
"""
|
|
822
|
+
from strapi_kit.export.jsonl_reader import JSONLImportReader
|
|
823
|
+
|
|
824
|
+
if options is None:
|
|
825
|
+
options = ImportOptions()
|
|
826
|
+
|
|
827
|
+
result = ImportResult(success=False, dry_run=options.dry_run)
|
|
828
|
+
jsonl_path = Path(jsonl_path)
|
|
829
|
+
|
|
830
|
+
try:
|
|
831
|
+
# ============================================================
|
|
832
|
+
# Pass 1: Read metadata, import media, create entities
|
|
833
|
+
# Store only ID mappings (O(entity_count x 2 ints))
|
|
834
|
+
# ============================================================
|
|
835
|
+
with JSONLImportReader(jsonl_path) as reader:
|
|
836
|
+
# Step 1: Read metadata
|
|
837
|
+
if options.progress_callback:
|
|
838
|
+
options.progress_callback(0, 100, "Reading metadata")
|
|
839
|
+
|
|
840
|
+
metadata = reader.read_metadata()
|
|
841
|
+
|
|
842
|
+
# Load schemas from metadata
|
|
843
|
+
for ct, schema in metadata.schemas.items():
|
|
844
|
+
self._schema_cache.cache_schema(ct, schema)
|
|
845
|
+
|
|
846
|
+
# Step 2: Import media first (if requested)
|
|
847
|
+
# Use separate reader to avoid consuming entity stream (Issue #30)
|
|
848
|
+
media_id_mapping: dict[int, int] = {}
|
|
849
|
+
if options.import_media and media_dir:
|
|
850
|
+
if options.progress_callback:
|
|
851
|
+
options.progress_callback(10, 100, "Importing media files")
|
|
852
|
+
|
|
853
|
+
# Read media manifest with separate reader to preserve entity stream
|
|
854
|
+
with JSONLImportReader(jsonl_path) as media_reader:
|
|
855
|
+
media_reader.read_metadata() # Skip metadata
|
|
856
|
+
media_files = media_reader.read_media_manifest()
|
|
857
|
+
|
|
858
|
+
if media_files:
|
|
859
|
+
media_dir_path = Path(media_dir)
|
|
860
|
+
for media in media_files:
|
|
861
|
+
try:
|
|
862
|
+
if options.dry_run:
|
|
863
|
+
result.media_imported += 1
|
|
864
|
+
continue
|
|
865
|
+
|
|
866
|
+
# Check for existing media (overwrite_media option)
|
|
867
|
+
if (
|
|
868
|
+
hasattr(options, "overwrite_media")
|
|
869
|
+
and not options.overwrite_media
|
|
870
|
+
):
|
|
871
|
+
# Try to find by hash
|
|
872
|
+
existing = self._find_media_by_hash(media.hash)
|
|
873
|
+
if existing is not None:
|
|
874
|
+
media_id_mapping[media.id] = existing
|
|
875
|
+
result.media_skipped += 1
|
|
876
|
+
continue
|
|
877
|
+
|
|
878
|
+
# Upload media file with path traversal protection
|
|
879
|
+
local_path = (media_dir_path / media.local_path).resolve()
|
|
880
|
+
|
|
881
|
+
# Security: Ensure resolved path stays within media_dir_path
|
|
882
|
+
if not local_path.is_relative_to(media_dir_path.resolve()):
|
|
883
|
+
result.add_error(
|
|
884
|
+
f"Security: Invalid media path {media.local_path} - "
|
|
885
|
+
"path traversal detected"
|
|
886
|
+
)
|
|
887
|
+
result.media_skipped += 1
|
|
888
|
+
continue
|
|
889
|
+
|
|
890
|
+
if local_path.exists():
|
|
891
|
+
uploaded = MediaHandler.upload_media_file(
|
|
892
|
+
self.client, local_path, media
|
|
893
|
+
)
|
|
894
|
+
media_id_mapping[media.id] = uploaded.id
|
|
895
|
+
result.media_imported += 1
|
|
896
|
+
else:
|
|
897
|
+
result.add_warning(f"Media file not found: {local_path}")
|
|
898
|
+
result.media_skipped += 1
|
|
899
|
+
except StrapiError as e:
|
|
900
|
+
result.add_error(f"Failed to import media {media.id}: {e}")
|
|
901
|
+
result.media_skipped += 1
|
|
902
|
+
|
|
903
|
+
# Step 3: Create entities - streaming with ID mapping only
|
|
904
|
+
if options.progress_callback:
|
|
905
|
+
options.progress_callback(30, 100, "Creating entities (pass 1)")
|
|
906
|
+
|
|
907
|
+
# Store only ID mappings: old_id -> new_id (O(entity_count x 2 ints))
|
|
908
|
+
id_mappings: dict[str, dict[int, int]] = {}
|
|
909
|
+
# Store document_id mappings for v5 endpoint updates
|
|
910
|
+
doc_id_mappings: dict[str, dict[int, str]] = {}
|
|
911
|
+
# Store reverse document_id mapping for v5 string relation resolution
|
|
912
|
+
doc_id_to_new_id_mappings: dict[str, dict[str, int]] = {}
|
|
913
|
+
|
|
914
|
+
for entity in reader.iter_entities():
|
|
915
|
+
# Filter by content types if specified
|
|
916
|
+
if options.content_types and entity.content_type not in options.content_types:
|
|
917
|
+
continue
|
|
918
|
+
|
|
919
|
+
content_type = entity.content_type
|
|
920
|
+
if content_type not in id_mappings:
|
|
921
|
+
id_mappings[content_type] = {}
|
|
922
|
+
doc_id_mappings[content_type] = {}
|
|
923
|
+
doc_id_to_new_id_mappings[content_type] = {}
|
|
924
|
+
|
|
925
|
+
try:
|
|
926
|
+
# Update media references
|
|
927
|
+
entity_data = entity.data
|
|
928
|
+
if media_id_mapping:
|
|
929
|
+
entity_data = MediaHandler.update_media_references(
|
|
930
|
+
entity.data, media_id_mapping
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
if options.dry_run:
|
|
934
|
+
result.entities_imported += 1
|
|
935
|
+
continue
|
|
936
|
+
|
|
937
|
+
# Get endpoint
|
|
938
|
+
endpoint = self._get_endpoint(content_type)
|
|
939
|
+
|
|
940
|
+
# Check for existing entity
|
|
941
|
+
existing_id = None
|
|
942
|
+
if entity.document_id:
|
|
943
|
+
existing_id = self._check_entity_exists(endpoint, entity.document_id)
|
|
944
|
+
|
|
945
|
+
if existing_id is not None:
|
|
946
|
+
if options.conflict_resolution == ConflictResolution.SKIP:
|
|
947
|
+
id_mappings[content_type][entity.id] = existing_id
|
|
948
|
+
# Track document_id mappings for v5
|
|
949
|
+
if entity.document_id:
|
|
950
|
+
doc_id_mappings[content_type][entity.id] = entity.document_id
|
|
951
|
+
doc_id_to_new_id_mappings[content_type][entity.document_id] = (
|
|
952
|
+
existing_id
|
|
953
|
+
)
|
|
954
|
+
result.entities_skipped += 1
|
|
955
|
+
continue
|
|
956
|
+
elif options.conflict_resolution == ConflictResolution.FAIL:
|
|
957
|
+
result.add_error(
|
|
958
|
+
f"Entity already exists: {content_type} {entity.document_id}"
|
|
959
|
+
)
|
|
960
|
+
result.entities_failed += 1
|
|
961
|
+
continue
|
|
962
|
+
elif options.conflict_resolution == ConflictResolution.UPDATE:
|
|
963
|
+
# Update existing entity (use document_id for v5 endpoint)
|
|
964
|
+
self.client.update(
|
|
965
|
+
f"{endpoint}/{entity.document_id}", data=entity_data
|
|
966
|
+
)
|
|
967
|
+
id_mappings[content_type][entity.id] = existing_id
|
|
968
|
+
# Track document_id mappings for v5
|
|
969
|
+
if entity.document_id:
|
|
970
|
+
doc_id_mappings[content_type][entity.id] = entity.document_id
|
|
971
|
+
doc_id_to_new_id_mappings[content_type][entity.document_id] = (
|
|
972
|
+
existing_id
|
|
973
|
+
)
|
|
974
|
+
result.entities_updated += 1
|
|
975
|
+
continue
|
|
976
|
+
|
|
977
|
+
# Create new entity
|
|
978
|
+
response = self.client.create(endpoint, data=entity_data)
|
|
979
|
+
if response.data:
|
|
980
|
+
id_mappings[content_type][entity.id] = response.data.id
|
|
981
|
+
# Track document_id for v5 endpoint updates
|
|
982
|
+
if response.data.document_id:
|
|
983
|
+
doc_id_mappings[content_type][entity.id] = response.data.document_id
|
|
984
|
+
# Track reverse mapping for v5 string relation resolution
|
|
985
|
+
if entity.document_id:
|
|
986
|
+
doc_id_to_new_id_mappings[content_type][entity.document_id] = (
|
|
987
|
+
response.data.id
|
|
988
|
+
)
|
|
989
|
+
result.entities_imported += 1
|
|
990
|
+
|
|
991
|
+
except StrapiError as e:
|
|
992
|
+
result.add_error(f"Failed to import {content_type} {entity.id}: {e}")
|
|
993
|
+
result.entities_failed += 1
|
|
994
|
+
|
|
995
|
+
# ============================================================
|
|
996
|
+
# Pass 2: Re-read file to resolve relations using ID mappings
|
|
997
|
+
# True O(1) memory - entities processed one at a time
|
|
998
|
+
# ============================================================
|
|
999
|
+
if not options.skip_relations and not options.dry_run:
|
|
1000
|
+
if options.progress_callback:
|
|
1001
|
+
options.progress_callback(70, 100, "Resolving relations (pass 2)")
|
|
1002
|
+
|
|
1003
|
+
with JSONLImportReader(jsonl_path) as reader2:
|
|
1004
|
+
# Skip metadata (already loaded)
|
|
1005
|
+
reader2.read_metadata()
|
|
1006
|
+
|
|
1007
|
+
for entity in reader2.iter_entities():
|
|
1008
|
+
# Filter by content types if specified
|
|
1009
|
+
if (
|
|
1010
|
+
options.content_types
|
|
1011
|
+
and entity.content_type not in options.content_types
|
|
1012
|
+
):
|
|
1013
|
+
continue
|
|
1014
|
+
|
|
1015
|
+
# Skip entities without relations
|
|
1016
|
+
if not entity.relations:
|
|
1017
|
+
continue
|
|
1018
|
+
|
|
1019
|
+
content_type = entity.content_type
|
|
1020
|
+
endpoint = self._get_endpoint(content_type)
|
|
1021
|
+
|
|
1022
|
+
# Get new ID for this entity
|
|
1023
|
+
new_id = id_mappings.get(content_type, {}).get(entity.id)
|
|
1024
|
+
if new_id is None:
|
|
1025
|
+
continue
|
|
1026
|
+
|
|
1027
|
+
# Get document_id for v5 endpoint (falls back to numeric ID for v4)
|
|
1028
|
+
entity_endpoint_id: str | int = new_id
|
|
1029
|
+
if (
|
|
1030
|
+
content_type in doc_id_mappings
|
|
1031
|
+
and entity.id in doc_id_mappings[content_type]
|
|
1032
|
+
):
|
|
1033
|
+
entity_endpoint_id = doc_id_mappings[content_type][entity.id]
|
|
1034
|
+
|
|
1035
|
+
# Get schema from cache
|
|
1036
|
+
try:
|
|
1037
|
+
schema = self._schema_cache.get_schema(content_type)
|
|
1038
|
+
except Exception: # noqa: BLE001, S112 - Skip content types without schema
|
|
1039
|
+
continue
|
|
1040
|
+
|
|
1041
|
+
try:
|
|
1042
|
+
# Resolve relations using ID mappings
|
|
1043
|
+
resolved = self._resolve_relations_with_schema(
|
|
1044
|
+
entity.relations,
|
|
1045
|
+
schema,
|
|
1046
|
+
id_mappings,
|
|
1047
|
+
doc_id_to_new_id_mappings,
|
|
1048
|
+
)
|
|
1049
|
+
|
|
1050
|
+
if resolved:
|
|
1051
|
+
payload = RelationResolver.build_relation_payload(resolved)
|
|
1052
|
+
self.client.update(f"{endpoint}/{entity_endpoint_id}", data=payload)
|
|
1053
|
+
result.relations_imported += 1
|
|
1054
|
+
except StrapiError as e:
|
|
1055
|
+
result.add_warning(
|
|
1056
|
+
f"Failed to import relations for {content_type} {entity.id}: {e}"
|
|
1057
|
+
)
|
|
1058
|
+
|
|
1059
|
+
if options.progress_callback:
|
|
1060
|
+
options.progress_callback(100, 100, "Import complete")
|
|
1061
|
+
|
|
1062
|
+
# Copy local mappings to result for caller access
|
|
1063
|
+
result.id_mapping = id_mappings
|
|
1064
|
+
result.doc_id_mapping = doc_id_mappings
|
|
1065
|
+
result.doc_id_to_new_id = doc_id_to_new_id_mappings
|
|
1066
|
+
|
|
1067
|
+
result.success = result.entities_failed == 0
|
|
1068
|
+
return result
|
|
1069
|
+
|
|
1070
|
+
except Exception as e:
|
|
1071
|
+
result.add_error(f"JSONL import failed: {e}")
|
|
1072
|
+
raise ImportExportError(f"JSONL import failed: {e}") from e
|