strapi-kit 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- strapi_kit/_version.py +2 -2
- strapi_kit/cache/schema_cache.py +91 -3
- strapi_kit/client/async_client.py +83 -47
- strapi_kit/client/base.py +70 -3
- strapi_kit/client/sync_client.py +23 -12
- strapi_kit/export/__init__.py +3 -1
- strapi_kit/export/exporter.py +160 -4
- strapi_kit/export/importer.py +519 -66
- strapi_kit/export/jsonl_reader.py +195 -0
- strapi_kit/export/jsonl_writer.py +134 -0
- strapi_kit/export/relation_resolver.py +230 -1
- strapi_kit/models/__init__.py +8 -1
- strapi_kit/models/config.py +5 -1
- strapi_kit/models/enums.py +4 -4
- strapi_kit/models/export_format.py +13 -0
- strapi_kit/models/import_options.py +12 -2
- strapi_kit/models/schema.py +9 -4
- strapi_kit/utils/__init__.py +3 -0
- strapi_kit/utils/schema.py +35 -0
- {strapi_kit-0.0.5.dist-info → strapi_kit-0.1.0.dist-info}/METADATA +2 -3
- {strapi_kit-0.0.5.dist-info → strapi_kit-0.1.0.dist-info}/RECORD +23 -20
- {strapi_kit-0.0.5.dist-info → strapi_kit-0.1.0.dist-info}/WHEEL +0 -0
- {strapi_kit-0.0.5.dist-info → strapi_kit-0.1.0.dist-info}/licenses/LICENSE +0 -0
strapi_kit/export/exporter.py
CHANGED
|
@@ -17,9 +17,11 @@ from strapi_kit.export.relation_resolver import RelationResolver
|
|
|
17
17
|
from strapi_kit.models.export_format import (
|
|
18
18
|
ExportData,
|
|
19
19
|
ExportedEntity,
|
|
20
|
+
ExportedMediaFile,
|
|
20
21
|
ExportMetadata,
|
|
21
22
|
)
|
|
22
23
|
from strapi_kit.models.request.query import StrapiQuery
|
|
24
|
+
from strapi_kit.models.schema import ContentTypeSchema
|
|
23
25
|
from strapi_kit.operations.streaming import stream_entities
|
|
24
26
|
|
|
25
27
|
if TYPE_CHECKING:
|
|
@@ -123,6 +125,9 @@ class StrapiExporter:
|
|
|
123
125
|
# Build query with populate_all to ensure relations/media are included
|
|
124
126
|
export_query = StrapiQuery().populate_all()
|
|
125
127
|
|
|
128
|
+
# Get schema for this content type (already cached from _fetch_schemas)
|
|
129
|
+
schema = self._schema_cache.get_schema(content_type)
|
|
130
|
+
|
|
126
131
|
# Stream entities for memory efficiency
|
|
127
132
|
entities = []
|
|
128
133
|
for entity in stream_entities(self.client, endpoint, query=export_query):
|
|
@@ -132,11 +137,16 @@ class StrapiExporter:
|
|
|
132
137
|
media_ids = MediaHandler.extract_media_references(entity.attributes)
|
|
133
138
|
all_media_ids.update(media_ids)
|
|
134
139
|
|
|
135
|
-
# Extract relations
|
|
136
|
-
|
|
140
|
+
# Extract relations using schema for accuracy
|
|
141
|
+
# This avoids false positives from fields that look like relations
|
|
142
|
+
relations = RelationResolver.extract_relations_with_schema(
|
|
143
|
+
entity.attributes, schema, self._schema_cache
|
|
144
|
+
)
|
|
137
145
|
|
|
138
|
-
# Strip relations
|
|
139
|
-
clean_data = RelationResolver.
|
|
146
|
+
# Strip relations using schema to preserve non-relation fields
|
|
147
|
+
clean_data = RelationResolver.strip_relations_with_schema(
|
|
148
|
+
entity.attributes, schema
|
|
149
|
+
)
|
|
140
150
|
|
|
141
151
|
exported_entity = ExportedEntity(
|
|
142
152
|
id=entity.id,
|
|
@@ -383,3 +393,149 @@ class StrapiExporter:
|
|
|
383
393
|
API endpoint (e.g., "articles")
|
|
384
394
|
"""
|
|
385
395
|
return StrapiExporter._uid_to_endpoint_fallback(uid)
|
|
396
|
+
|
|
397
|
+
def export_to_jsonl(
|
|
398
|
+
self,
|
|
399
|
+
content_types: list[str],
|
|
400
|
+
output_path: str | Path,
|
|
401
|
+
*,
|
|
402
|
+
include_media: bool = True,
|
|
403
|
+
media_dir: Path | str | None = None,
|
|
404
|
+
progress_callback: Callable[[int, int, str], None] | None = None,
|
|
405
|
+
) -> int:
|
|
406
|
+
"""Export content types to JSONL format with streaming.
|
|
407
|
+
|
|
408
|
+
This method writes entities directly to disk as they're fetched,
|
|
409
|
+
providing O(1) memory usage regardless of export size.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
content_types: List of content type UIDs to export
|
|
413
|
+
output_path: Path to output JSONL file
|
|
414
|
+
include_media: Whether to include media file references
|
|
415
|
+
media_dir: Directory to download media files to (if include_media=True)
|
|
416
|
+
progress_callback: Optional callback(current, total, message)
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
Total number of entities exported
|
|
420
|
+
|
|
421
|
+
Raises:
|
|
422
|
+
ValidationError: If include_media=True but media_dir is not provided
|
|
423
|
+
ImportExportError: If export fails
|
|
424
|
+
|
|
425
|
+
Example:
|
|
426
|
+
>>> count = exporter.export_to_jsonl(
|
|
427
|
+
... ["api::article.article"],
|
|
428
|
+
... "export.jsonl",
|
|
429
|
+
... media_dir="media/"
|
|
430
|
+
... )
|
|
431
|
+
>>> print(f"Exported {count} entities")
|
|
432
|
+
"""
|
|
433
|
+
from strapi_kit.export.jsonl_writer import JSONLExportWriter
|
|
434
|
+
|
|
435
|
+
if include_media and media_dir is None:
|
|
436
|
+
raise ValidationError("media_dir must be provided when include_media=True")
|
|
437
|
+
|
|
438
|
+
try:
|
|
439
|
+
# Create initial metadata
|
|
440
|
+
metadata = ExportMetadata(
|
|
441
|
+
strapi_version=self.client.api_version or "auto",
|
|
442
|
+
source_url=self.client.base_url,
|
|
443
|
+
content_types=content_types,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
# Fetch schemas upfront
|
|
447
|
+
schemas: dict[str, ContentTypeSchema] = {}
|
|
448
|
+
for content_type in content_types:
|
|
449
|
+
try:
|
|
450
|
+
ct_schema = self._schema_cache.get_schema(content_type)
|
|
451
|
+
schemas[content_type] = ct_schema
|
|
452
|
+
metadata.schemas[content_type] = ct_schema
|
|
453
|
+
except Exception as e:
|
|
454
|
+
logger.warning(f"Failed to fetch schema for {content_type}: {e}")
|
|
455
|
+
|
|
456
|
+
all_media_ids: set[int] = set()
|
|
457
|
+
|
|
458
|
+
with JSONLExportWriter(output_path) as writer:
|
|
459
|
+
# Write metadata first
|
|
460
|
+
writer.write_metadata(metadata)
|
|
461
|
+
|
|
462
|
+
total_content_types = len(content_types)
|
|
463
|
+
|
|
464
|
+
# Stream entities
|
|
465
|
+
for idx, content_type in enumerate(content_types):
|
|
466
|
+
if progress_callback:
|
|
467
|
+
progress_callback(idx, total_content_types, f"Exporting {content_type}")
|
|
468
|
+
|
|
469
|
+
endpoint = self._get_endpoint(content_type)
|
|
470
|
+
schema: ContentTypeSchema | None = schemas.get(content_type)
|
|
471
|
+
export_query = StrapiQuery().populate_all()
|
|
472
|
+
|
|
473
|
+
for entity in stream_entities(self.client, endpoint, query=export_query):
|
|
474
|
+
# Extract media references before stripping
|
|
475
|
+
if include_media:
|
|
476
|
+
media_ids = MediaHandler.extract_media_references(entity.attributes)
|
|
477
|
+
all_media_ids.update(media_ids)
|
|
478
|
+
|
|
479
|
+
# Extract relations using schema if available
|
|
480
|
+
if schema:
|
|
481
|
+
relations = RelationResolver.extract_relations_with_schema(
|
|
482
|
+
entity.attributes, schema, self._schema_cache
|
|
483
|
+
)
|
|
484
|
+
clean_data = RelationResolver.strip_relations_with_schema(
|
|
485
|
+
entity.attributes, schema
|
|
486
|
+
)
|
|
487
|
+
else:
|
|
488
|
+
relations = RelationResolver.extract_relations(entity.attributes)
|
|
489
|
+
clean_data = RelationResolver.strip_relations(entity.attributes)
|
|
490
|
+
|
|
491
|
+
exported_entity = ExportedEntity(
|
|
492
|
+
id=entity.id,
|
|
493
|
+
document_id=entity.document_id,
|
|
494
|
+
content_type=content_type,
|
|
495
|
+
data=clean_data,
|
|
496
|
+
relations=relations,
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
# Write immediately - no accumulation in memory
|
|
500
|
+
writer.write_entity(exported_entity)
|
|
501
|
+
|
|
502
|
+
# Export media if requested
|
|
503
|
+
media_files: list[ExportedMediaFile] = []
|
|
504
|
+
if include_media and all_media_ids:
|
|
505
|
+
if progress_callback:
|
|
506
|
+
progress_callback(
|
|
507
|
+
total_content_types,
|
|
508
|
+
total_content_types + 1,
|
|
509
|
+
"Exporting media files",
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
# Type guard: validated at method start
|
|
513
|
+
assert media_dir is not None # noqa: S101
|
|
514
|
+
output_dir = Path(media_dir)
|
|
515
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
516
|
+
|
|
517
|
+
for media_id in sorted(all_media_ids):
|
|
518
|
+
try:
|
|
519
|
+
media = self.client.get_media(media_id)
|
|
520
|
+
local_path = MediaHandler.download_media_file(
|
|
521
|
+
self.client, media, output_dir
|
|
522
|
+
)
|
|
523
|
+
exported_media = MediaHandler.create_media_export(media, local_path)
|
|
524
|
+
media_files.append(exported_media)
|
|
525
|
+
except Exception as e:
|
|
526
|
+
logger.warning(f"Failed to download media {media_id}: {e}")
|
|
527
|
+
|
|
528
|
+
# Write media manifest
|
|
529
|
+
writer.write_media_manifest(media_files)
|
|
530
|
+
|
|
531
|
+
if progress_callback:
|
|
532
|
+
progress_callback(
|
|
533
|
+
total_content_types,
|
|
534
|
+
total_content_types,
|
|
535
|
+
"Export complete",
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
return writer.entity_count
|
|
539
|
+
|
|
540
|
+
except Exception as e:
|
|
541
|
+
raise ImportExportError(f"JSONL export failed: {e}") from e
|