strapi-kit 0.0.6__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,9 +17,11 @@ from strapi_kit.export.relation_resolver import RelationResolver
17
17
  from strapi_kit.models.export_format import (
18
18
  ExportData,
19
19
  ExportedEntity,
20
+ ExportedMediaFile,
20
21
  ExportMetadata,
21
22
  )
22
23
  from strapi_kit.models.request.query import StrapiQuery
24
+ from strapi_kit.models.schema import ContentTypeSchema
23
25
  from strapi_kit.operations.streaming import stream_entities
24
26
 
25
27
  if TYPE_CHECKING:
@@ -123,6 +125,9 @@ class StrapiExporter:
123
125
  # Build query with populate_all to ensure relations/media are included
124
126
  export_query = StrapiQuery().populate_all()
125
127
 
128
+ # Get schema for this content type (already cached from _fetch_schemas)
129
+ schema = self._schema_cache.get_schema(content_type)
130
+
126
131
  # Stream entities for memory efficiency
127
132
  entities = []
128
133
  for entity in stream_entities(self.client, endpoint, query=export_query):
@@ -132,11 +137,16 @@ class StrapiExporter:
132
137
  media_ids = MediaHandler.extract_media_references(entity.attributes)
133
138
  all_media_ids.update(media_ids)
134
139
 
135
- # Extract relations from entity data
136
- relations = RelationResolver.extract_relations(entity.attributes)
140
+ # Extract relations using schema for accuracy
141
+ # This avoids false positives from fields that look like relations
142
+ relations = RelationResolver.extract_relations_with_schema(
143
+ entity.attributes, schema, self._schema_cache
144
+ )
137
145
 
138
- # Strip relations from data to store separately
139
- clean_data = RelationResolver.strip_relations(entity.attributes)
146
+ # Strip relations using schema to preserve non-relation fields
147
+ clean_data = RelationResolver.strip_relations_with_schema(
148
+ entity.attributes, schema
149
+ )
140
150
 
141
151
  exported_entity = ExportedEntity(
142
152
  id=entity.id,
@@ -383,3 +393,149 @@ class StrapiExporter:
383
393
  API endpoint (e.g., "articles")
384
394
  """
385
395
  return StrapiExporter._uid_to_endpoint_fallback(uid)
396
+
397
+ def export_to_jsonl(
398
+ self,
399
+ content_types: list[str],
400
+ output_path: str | Path,
401
+ *,
402
+ include_media: bool = True,
403
+ media_dir: Path | str | None = None,
404
+ progress_callback: Callable[[int, int, str], None] | None = None,
405
+ ) -> int:
406
+ """Export content types to JSONL format with streaming.
407
+
408
+ This method writes entities directly to disk as they're fetched,
409
+ providing O(1) memory usage regardless of export size.
410
+
411
+ Args:
412
+ content_types: List of content type UIDs to export
413
+ output_path: Path to output JSONL file
414
+ include_media: Whether to include media file references
415
+ media_dir: Directory to download media files to (if include_media=True)
416
+ progress_callback: Optional callback(current, total, message)
417
+
418
+ Returns:
419
+ Total number of entities exported
420
+
421
+ Raises:
422
+ ValidationError: If include_media=True but media_dir is not provided
423
+ ImportExportError: If export fails
424
+
425
+ Example:
426
+ >>> count = exporter.export_to_jsonl(
427
+ ... ["api::article.article"],
428
+ ... "export.jsonl",
429
+ ... media_dir="media/"
430
+ ... )
431
+ >>> print(f"Exported {count} entities")
432
+ """
433
+ from strapi_kit.export.jsonl_writer import JSONLExportWriter
434
+
435
+ if include_media and media_dir is None:
436
+ raise ValidationError("media_dir must be provided when include_media=True")
437
+
438
+ try:
439
+ # Create initial metadata
440
+ metadata = ExportMetadata(
441
+ strapi_version=self.client.api_version or "auto",
442
+ source_url=self.client.base_url,
443
+ content_types=content_types,
444
+ )
445
+
446
+ # Fetch schemas upfront
447
+ schemas: dict[str, ContentTypeSchema] = {}
448
+ for content_type in content_types:
449
+ try:
450
+ ct_schema = self._schema_cache.get_schema(content_type)
451
+ schemas[content_type] = ct_schema
452
+ metadata.schemas[content_type] = ct_schema
453
+ except Exception as e:
454
+ logger.warning(f"Failed to fetch schema for {content_type}: {e}")
455
+
456
+ all_media_ids: set[int] = set()
457
+
458
+ with JSONLExportWriter(output_path) as writer:
459
+ # Write metadata first
460
+ writer.write_metadata(metadata)
461
+
462
+ total_content_types = len(content_types)
463
+
464
+ # Stream entities
465
+ for idx, content_type in enumerate(content_types):
466
+ if progress_callback:
467
+ progress_callback(idx, total_content_types, f"Exporting {content_type}")
468
+
469
+ endpoint = self._get_endpoint(content_type)
470
+ schema: ContentTypeSchema | None = schemas.get(content_type)
471
+ export_query = StrapiQuery().populate_all()
472
+
473
+ for entity in stream_entities(self.client, endpoint, query=export_query):
474
+ # Extract media references before stripping
475
+ if include_media:
476
+ media_ids = MediaHandler.extract_media_references(entity.attributes)
477
+ all_media_ids.update(media_ids)
478
+
479
+ # Extract relations using schema if available
480
+ if schema:
481
+ relations = RelationResolver.extract_relations_with_schema(
482
+ entity.attributes, schema, self._schema_cache
483
+ )
484
+ clean_data = RelationResolver.strip_relations_with_schema(
485
+ entity.attributes, schema
486
+ )
487
+ else:
488
+ relations = RelationResolver.extract_relations(entity.attributes)
489
+ clean_data = RelationResolver.strip_relations(entity.attributes)
490
+
491
+ exported_entity = ExportedEntity(
492
+ id=entity.id,
493
+ document_id=entity.document_id,
494
+ content_type=content_type,
495
+ data=clean_data,
496
+ relations=relations,
497
+ )
498
+
499
+ # Write immediately - no accumulation in memory
500
+ writer.write_entity(exported_entity)
501
+
502
+ # Export media if requested
503
+ media_files: list[ExportedMediaFile] = []
504
+ if include_media and all_media_ids:
505
+ if progress_callback:
506
+ progress_callback(
507
+ total_content_types,
508
+ total_content_types + 1,
509
+ "Exporting media files",
510
+ )
511
+
512
+ # Type guard: validated at method start
513
+ assert media_dir is not None # noqa: S101
514
+ output_dir = Path(media_dir)
515
+ output_dir.mkdir(parents=True, exist_ok=True)
516
+
517
+ for media_id in sorted(all_media_ids):
518
+ try:
519
+ media = self.client.get_media(media_id)
520
+ local_path = MediaHandler.download_media_file(
521
+ self.client, media, output_dir
522
+ )
523
+ exported_media = MediaHandler.create_media_export(media, local_path)
524
+ media_files.append(exported_media)
525
+ except Exception as e:
526
+ logger.warning(f"Failed to download media {media_id}: {e}")
527
+
528
+ # Write media manifest
529
+ writer.write_media_manifest(media_files)
530
+
531
+ if progress_callback:
532
+ progress_callback(
533
+ total_content_types,
534
+ total_content_types,
535
+ "Export complete",
536
+ )
537
+
538
+ return writer.entity_count
539
+
540
+ except Exception as e:
541
+ raise ImportExportError(f"JSONL export failed: {e}") from e