PyPI - strapi-kit - Versions diffs - 0.0.6__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

strapi-kit 0.0.6py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

strapi_kit/_version.py +2 -2
strapi_kit/cache/schema_cache.py +91 -3
strapi_kit/client/async_client.py +83 -47
strapi_kit/client/base.py +9 -2
strapi_kit/client/sync_client.py +23 -12
strapi_kit/export/__init__.py +3 -1
strapi_kit/export/exporter.py +160 -4
strapi_kit/export/importer.py +519 -66
strapi_kit/export/jsonl_reader.py +195 -0
strapi_kit/export/jsonl_writer.py +134 -0
strapi_kit/export/relation_resolver.py +230 -1
strapi_kit/models/__init__.py +8 -1
strapi_kit/models/export_format.py +13 -0
strapi_kit/models/import_options.py +10 -0
strapi_kit/models/schema.py +6 -1
strapi_kit/utils/__init__.py +3 -0
strapi_kit/utils/schema.py +35 -0
{strapi_kit-0.0.6.dist-info → strapi_kit-0.1.0.dist-info}/METADATA +2 -3
{strapi_kit-0.0.6.dist-info → strapi_kit-0.1.0.dist-info}/RECORD +21 -18
{strapi_kit-0.0.6.dist-info → strapi_kit-0.1.0.dist-info}/WHEEL +0 -0
{strapi_kit-0.0.6.dist-info → strapi_kit-0.1.0.dist-info}/licenses/LICENSE +0 -0

strapi_kit/export/jsonl_reader.py ADDED Viewed

@@ -0,0 +1,195 @@
+"""JSONL streaming import reader.
+Provides O(1) memory import by reading entities one at a time.
+"""
+import json
+import logging
+from collections.abc import Generator
+from pathlib import Path
+from typing import IO, Any
+from strapi_kit.exceptions import FormatError, ImportExportError
+from strapi_kit.models.export_format import (
+    ExportedEntity,
+    ExportedMediaFile,
+    ExportMetadata,
+)
+logger = logging.getLogger(__name__)
+class JSONLImportReader:
+    """Streaming JSONL import reader.
+    Reads entities one at a time from a JSONL file for memory-efficient
+    import of large datasets.
+    Example:
+        >>> with JSONLImportReader("export.jsonl") as reader:
+        ...     metadata = reader.read_metadata()
+        ...     for entity in reader.iter_entities():
+        ...         process_entity(entity)
+        ...     media_manifest = reader.read_media_manifest()
+    """
+    def __init__(self, file_path: str | Path) -> None:
+        """Initialize JSONL reader.
+        Args:
+            file_path: Path to input JSONL file
+        Raises:
+            FormatError: If file doesn't exist
+        """
+        self.file_path = Path(file_path)
+        if not self.file_path.exists():
+            raise FormatError(f"JSONL file not found: {file_path}")
+        self._file: IO[str] | None = None
+        self._metadata: ExportMetadata | None = None
+        self._media_manifest: list[ExportedMediaFile] | None = None
+        self._current_line = 0
+    def __enter__(self) -> "JSONLImportReader":
+        """Open file for reading."""
+        self._file = open(self.file_path, encoding="utf-8")
+        return self
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        """Close file."""
+        if self._file:
+            self._file.close()
+            self._file = None
+    def read_metadata(self) -> ExportMetadata:
+        """Read metadata from first line.
+        Returns:
+            Export metadata
+        Raises:
+            FormatError: If first line is not metadata
+        """
+        if not self._file:
+            raise ImportExportError("Reader not opened - use context manager")
+        if self._metadata is not None:
+            return self._metadata
+        line = self._file.readline()
+        self._current_line = 1
+        if not line:
+            raise FormatError("Empty JSONL file")
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError as e:
+            raise FormatError(f"Invalid JSON on line 1: {e}") from e
+        if record.get("_type") != "metadata":
+            raise FormatError(f"Expected metadata on line 1, got: {record.get('_type')}")
+        # Remove _type field before parsing
+        record.pop("_type", None)
+        self._metadata = ExportMetadata(**record)
+        return self._metadata
+    def iter_entities(self) -> Generator[ExportedEntity, None, None]:
+        """Iterate over entities in the file.
+        Yields entities one at a time for memory-efficient processing.
+        Yields:
+            ExportedEntity objects
+        Raises:
+            FormatError: If entity parsing fails
+        """
+        if not self._file:
+            raise ImportExportError("Reader not opened - use context manager")
+        # Ensure metadata is read first
+        if self._metadata is None:
+            self.read_metadata()
+        for line in self._file:
+            self._current_line += 1
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                record = json.loads(line)
+            except json.JSONDecodeError as e:
+                raise FormatError(f"Invalid JSON on line {self._current_line}: {e}") from e
+            record_type = record.get("_type")
+            if record_type == "entity":
+                record.pop("_type", None)
+                yield ExportedEntity(**record)
+            elif record_type == "media_manifest":
+                # Parse and cache media manifest
+                files_data = record.get("files", [])
+                self._media_manifest = [ExportedMediaFile(**f) for f in files_data]
+                # Don't yield - this is handled separately
+                break
+            elif record_type == "metadata":
+                # Skip duplicate metadata
+                continue
+            else:
+                logger.warning(f"Unknown record type on line {self._current_line}: {record_type}")
+    def read_media_manifest(self) -> list[ExportedMediaFile]:
+        """Read media manifest from file.
+        Must be called after iter_entities() has completed, or will consume
+        remaining entities to find the manifest.
+        Returns:
+            List of media file references, or empty list if no manifest found
+        """
+        if self._media_manifest is not None:
+            return self._media_manifest
+        # If we haven't read through entities yet, do so now
+        if not self._file:
+            raise ImportExportError("Reader not opened - use context manager")
+        # Consume remaining lines to find media manifest
+        for _ in self.iter_entities():
+            pass  # Discard entities, we just want the manifest
+        if self._media_manifest is None:
+            # No media manifest found - return empty list
+            return []
+        return self._media_manifest
+    def get_entity_count(self) -> int:
+        """Count total entities without loading them all.
+        Note: This reads through the entire file.
+        Returns:
+            Total entity count
+        """
+        count = 0
+        # Create a new file handle to not disturb current position
+        with open(self.file_path, encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    record = json.loads(line)
+                    if record.get("_type") == "entity":
+                        count += 1
+                except json.JSONDecodeError:
+                    continue
+        return count

strapi_kit/export/jsonl_writer.py ADDED Viewed

@@ -0,0 +1,134 @@
+"""JSONL streaming export writer.
+Provides O(1) memory export by writing entities as they're fetched,
+one JSON object per line.
+"""
+import json
+import logging
+from pathlib import Path
+from typing import IO, Any
+from strapi_kit.exceptions import ImportExportError
+from strapi_kit.models.export_format import (
+    ExportedEntity,
+    ExportedMediaFile,
+    ExportMetadata,
+)
+logger = logging.getLogger(__name__)
+class JSONLExportWriter:
+    """Streaming JSONL export writer.
+    Writes entities one at a time to a JSONL file for memory-efficient
+    export of large datasets.
+    JSONL Format:
+        Line 1: {"_type": "metadata", ...}
+        Lines 2-N: {"_type": "entity", "content_type": "...", "data": {...}}
+        Last line: {"_type": "media_manifest", "files": [...]}
+    Example:
+        >>> with JSONLExportWriter("export.jsonl") as writer:
+        ...     writer.write_metadata(metadata)
+        ...     for entity in entities:
+        ...         writer.write_entity(entity)
+        ...     writer.write_media_manifest(media_files)
+    """
+    def __init__(self, file_path: str | Path) -> None:
+        """Initialize JSONL writer.
+        Args:
+            file_path: Path to output JSONL file
+        """
+        self.file_path = Path(file_path)
+        self._file: IO[str] | None = None
+        self._entity_count = 0
+        self._content_type_counts: dict[str, int] = {}
+    def __enter__(self) -> "JSONLExportWriter":
+        """Open file for writing."""
+        self.file_path.parent.mkdir(parents=True, exist_ok=True)
+        self._file = open(self.file_path, "w", encoding="utf-8")
+        return self
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        """Close file."""
+        if self._file:
+            self._file.close()
+            self._file = None
+    def write_metadata(self, metadata: ExportMetadata) -> None:
+        """Write metadata as first line.
+        Args:
+            metadata: Export metadata
+        """
+        if not self._file:
+            raise ImportExportError("Writer not opened - use context manager")
+        record = {
+            "_type": "metadata",
+            **metadata.model_dump(mode="json"),
+        }
+        self._write_line(record)
+        logger.debug("Wrote metadata to JSONL")
+    def write_entity(self, entity: ExportedEntity) -> None:
+        """Write a single entity.
+        Args:
+            entity: Entity to write
+        """
+        if not self._file:
+            raise ImportExportError("Writer not opened - use context manager")
+        record = {
+            "_type": "entity",
+            **entity.model_dump(mode="json"),
+        }
+        self._write_line(record)
+        self._entity_count += 1
+        ct = entity.content_type
+        self._content_type_counts[ct] = self._content_type_counts.get(ct, 0) + 1
+    def write_media_manifest(self, media_files: list[ExportedMediaFile]) -> None:
+        """Write media manifest as final line.
+        Args:
+            media_files: List of media file references
+        """
+        if not self._file:
+            raise ImportExportError("Writer not opened - use context manager")
+        record = {
+            "_type": "media_manifest",
+            "files": [m.model_dump(mode="json") for m in media_files],
+        }
+        self._write_line(record)
+        logger.debug(f"Wrote media manifest with {len(media_files)} files")
+    def _write_line(self, record: dict[str, Any]) -> None:
+        """Write a single JSON line.
+        Args:
+            record: Dictionary to serialize as JSON line
+        """
+        if self._file is None:
+            raise ImportExportError("Writer not opened - use context manager")
+        line = json.dumps(record, ensure_ascii=False, default=str)
+        self._file.write(line + "\n")
+    @property
+    def entity_count(self) -> int:
+        """Get total entities written."""
+        return self._entity_count
+    @property
+    def content_type_counts(self) -> dict[str, int]:
+        """Get entity counts per content type."""
+        return self._content_type_counts.copy()

strapi_kit/export/relation_resolver.py CHANGED Viewed

@@ -4,8 +4,17 @@ This module handles extracting relations from entities during export
 and resolving them during import using ID mappings.
 """
+from __future__ import annotations
 import logging
-from typing import Any
+from typing import TYPE_CHECKING, Any
+from ..exceptions import StrapiError
+from ..models.schema import FieldType
+if TYPE_CHECKING:
+    from ..cache.schema_cache import InMemorySchemaCache
+    from ..models.schema import ContentTypeSchema
 logger = logging.getLogger(__name__)
@@ -170,3 +179,223 @@ class RelationResolver:
                 payload[field_name] = ids
         return payload
+    # Schema-aware extraction methods
+    @staticmethod
+    def extract_relations_with_schema(
+        data: dict[str, Any],
+        schema: ContentTypeSchema,
+        schema_cache: InMemorySchemaCache | None = None,
+    ) -> dict[str, list[int | str]]:
+        """Extract relations using schema - only actual relation fields.
+        This method uses the content type schema to identify relation fields,
+        avoiding false positives from fields that happen to contain {"data": ...}.
+        It also recursively extracts relations from components and dynamic zones.
+        Args:
+            data: Entity attributes dictionary
+            schema: Content type schema with field definitions
+            schema_cache: Optional schema cache for component lookups
+        Returns:
+            Dictionary mapping relation field paths to lists of IDs
+        Example:
+            >>> # Only extracts from actual relation fields defined in schema
+            >>> data = {
+            ...     "title": "Article",
+            ...     "author": {"data": {"id": 5}},
+            ...     "metadata": {"data": "not a relation"}  # Won't be extracted
+            ... }
+            >>> relations = RelationResolver.extract_relations_with_schema(data, schema)
+            {'author': [5]}  # metadata excluded because not a relation in schema
+        """
+        relations: dict[str, list[int | str]] = {}
+        for field_name, field_value in data.items():
+            field_schema = schema.fields.get(field_name)
+            if not field_schema:
+                continue
+            if field_schema.type == FieldType.RELATION:
+                # Extract IDs from relation field
+                ids = RelationResolver._extract_ids_from_field(field_value)
+                if ids is not None:
+                    relations[field_name] = ids
+            elif field_schema.type == FieldType.COMPONENT and schema_cache:
+                # Recursively extract from component
+                component_uid = field_schema.component
+                if component_uid and field_value:
+                    if field_schema.repeatable and isinstance(field_value, list):
+                        # Repeatable component - list of components
+                        for idx, item in enumerate(field_value):
+                            if isinstance(item, dict):
+                                nested = RelationResolver._extract_from_component(
+                                    item, component_uid, schema_cache, f"{field_name}[{idx}]."
+                                )
+                                relations.update(nested)
+                    elif isinstance(field_value, dict):
+                        # Single component
+                        nested = RelationResolver._extract_from_component(
+                            field_value, component_uid, schema_cache, f"{field_name}."
+                        )
+                        relations.update(nested)
+            elif field_schema.type == FieldType.DYNAMIC_ZONE and schema_cache:
+                # Recursively extract from dynamic zone components
+                if isinstance(field_value, list):
+                    for idx, item in enumerate(field_value):
+                        if isinstance(item, dict) and "__component" in item:
+                            component_uid = item["__component"]
+                            nested = RelationResolver._extract_from_component(
+                                item, component_uid, schema_cache, f"{field_name}[{idx}]."
+                            )
+                            relations.update(nested)
+        return relations
+    @staticmethod
+    def _extract_from_component(
+        component_data: dict[str, Any],
+        component_uid: str,
+        schema_cache: InMemorySchemaCache,
+        prefix: str = "",
+    ) -> dict[str, list[int | str]]:
+        """Recursively extract relations from a component.
+        Args:
+            component_data: Component data dictionary
+            component_uid: Component UID for schema lookup
+            schema_cache: Schema cache for component lookups
+            prefix: Field path prefix for nested fields
+        Returns:
+            Dictionary mapping prefixed field paths to lists of IDs
+        """
+        try:
+            component_schema = schema_cache.get_component_schema(component_uid)
+        except StrapiError:
+            logger.warning(f"Could not fetch component schema: {component_uid}", exc_info=True)
+            return {}
+        relations: dict[str, list[int | str]] = {}
+        for field_name, field_value in component_data.items():
+            if field_name == "__component":
+                continue  # Skip component type marker
+            field_schema = component_schema.fields.get(field_name)
+            if not field_schema:
+                continue
+            full_key = f"{prefix}{field_name}"
+            if field_schema.type == FieldType.RELATION:
+                ids = RelationResolver._extract_ids_from_field(field_value)
+                if ids is not None:
+                    relations[full_key] = ids
+            elif field_schema.type == FieldType.COMPONENT:
+                nested_uid = field_schema.component
+                if nested_uid and field_value:
+                    if field_schema.repeatable and isinstance(field_value, list):
+                        for idx, item in enumerate(field_value):
+                            if isinstance(item, dict):
+                                nested = RelationResolver._extract_from_component(
+                                    item, nested_uid, schema_cache, f"{full_key}[{idx}]."
+                                )
+                                relations.update(nested)
+                    elif isinstance(field_value, dict):
+                        nested = RelationResolver._extract_from_component(
+                            field_value, nested_uid, schema_cache, f"{full_key}."
+                        )
+                        relations.update(nested)
+            elif field_schema.type == FieldType.DYNAMIC_ZONE:
+                if isinstance(field_value, list):
+                    for idx, item in enumerate(field_value):
+                        if isinstance(item, dict) and "__component" in item:
+                            dz_uid = item["__component"]
+                            nested = RelationResolver._extract_from_component(
+                                item, dz_uid, schema_cache, f"{full_key}[{idx}]."
+                            )
+                            relations.update(nested)
+        return relations
+    @staticmethod
+    def _extract_ids_from_field(field_value: Any) -> list[int | str] | None:
+        """Extract IDs from a relation field value.
+        Handles both v4 nested format and v5 flat format.
+        Args:
+            field_value: Field value from entity data
+        Returns:
+            List of IDs if this looks like a relation, None otherwise
+        """
+        if field_value is None:
+            return []
+        # v4 format: {"data": ...}
+        if isinstance(field_value, dict) and "data" in field_value:
+            relation_data = field_value["data"]
+            if relation_data is None:
+                return []
+            elif isinstance(relation_data, dict) and "id" in relation_data:
+                return [relation_data["id"]]
+            elif isinstance(relation_data, list):
+                return [
+                    item["id"] for item in relation_data if isinstance(item, dict) and "id" in item
+                ]
+        # v5 format: direct ID or list of IDs (can be int or str)
+        if isinstance(field_value, (int, str)):
+            return [field_value]
+        elif isinstance(field_value, list):
+            ids: list[int | str] = [item for item in field_value if isinstance(item, (int, str))]
+            if ids:
+                return ids
+        return None
+    @staticmethod
+    def strip_relations_with_schema(
+        data: dict[str, Any],
+        schema: ContentTypeSchema,
+    ) -> dict[str, Any]:
+        """Remove only actual relation fields from entity data.
+        Uses schema to identify relation fields, preserving non-relation
+        fields that happen to contain {"data": ...}.
+        Args:
+            data: Entity attributes dictionary
+            schema: Content type schema with field definitions
+        Returns:
+            Copy of data with relation fields removed
+        Example:
+            >>> data = {
+            ...     "title": "Article",
+            ...     "author": {"data": {"id": 5}},  # Relation - removed
+            ...     "metadata": {"data": "custom"}   # Not relation - kept
+            ... }
+            >>> stripped = RelationResolver.strip_relations_with_schema(data, schema)
+            {'title': 'Article', 'metadata': {'data': 'custom'}}
+        """
+        cleaned_data = {}
+        for field_name, field_value in data.items():
+            field_schema = schema.fields.get(field_name)
+            # Keep field if it's not in schema or not a relation
+            if not field_schema or field_schema.type != FieldType.RELATION:
+                cleaned_data[field_name] = field_value
+        return cleaned_data

strapi_kit/models/__init__.py CHANGED Viewed

@@ -9,7 +9,13 @@ from .content_type import ComponentListItem, ContentTypeListItem
 from .content_type import ContentTypeInfo as CTBContentTypeInfo
 from .content_type import ContentTypeSchema as CTBContentTypeSchema
 from .enums import FilterOperator, PublicationState, SortDirection
-from .export_format import ExportData, ExportedEntity, ExportedMediaFile, ExportMetadata
+from .export_format import (
+    ExportData,
+    ExportedEntity,
+    ExportedMediaFile,
+    ExportFormat,
+    ExportMetadata,
+)
 from .import_options import ConflictResolution, ImportOptions, ImportResult
 from .request.fields import FieldSelection
 from .request.filters import FilterBuilder, FilterCondition, FilterGroup
@@ -47,6 +53,7 @@ __all__ = [
     "ExportMetadata",
     "ExportedEntity",
     "ExportedMediaFile",
+    "ExportFormat",
     "ImportOptions",
     "ImportResult",
     "ConflictResolution",

strapi_kit/models/export_format.py CHANGED Viewed

@@ -5,6 +5,7 @@ and version compatibility.
 """
 from datetime import UTC, datetime
+from enum import StrEnum
 from pathlib import PureWindowsPath
 from typing import Any
@@ -15,6 +16,18 @@ from strapi_kit.exceptions import FormatError
 from .schema import ContentTypeSchema
+class ExportFormat(StrEnum):
+    """Export file format options.
+    Attributes:
+        JSON: Standard JSON format (default). Loads entire file into memory.
+        JSONL: JSON Lines format. Streams entities one per line for O(1) memory.
+    """
+    JSON = "json"
+    JSONL = "jsonl"
 class ExportMetadata(BaseModel):
     """Metadata about the export.

strapi_kit/models/import_options.py CHANGED Viewed

@@ -90,6 +90,7 @@ class ImportResult(BaseModel):
         entities_skipped: Number of entities skipped
         entities_updated: Number of entities updated
         entities_failed: Number of entities that failed
+        relations_imported: Number of relation updates performed
         media_imported: Number of media files imported
         media_skipped: Number of media files skipped
         errors: List of error messages
@@ -103,6 +104,7 @@ class ImportResult(BaseModel):
     entities_skipped: int = Field(default=0, description="Entities skipped")
     entities_updated: int = Field(default=0, description="Entities updated")
     entities_failed: int = Field(default=0, description="Entities failed")
+    relations_imported: int = Field(default=0, description="Relation updates performed")
     media_imported: int = Field(default=0, description="Media files imported")
     media_skipped: int = Field(default=0, description="Media files skipped")
     errors: list[str] = Field(default_factory=list, description="Error messages")
@@ -111,6 +113,14 @@ class ImportResult(BaseModel):
         default_factory=dict,
         description="Mapping of old IDs to new IDs per content type",
     )
+    doc_id_mapping: dict[str, dict[int, str]] = Field(
+        default_factory=dict,
+        description="Mapping of old IDs to document_ids per content type (for v5 endpoints)",
+    )
+    doc_id_to_new_id: dict[str, dict[str, int]] = Field(
+        default_factory=dict,
+        description="Mapping of old document_ids to new IDs (for v5 string relation resolution)",
+    )
     def add_error(self, error: str) -> None:
         """Add an error message.

strapi_kit/models/schema.py CHANGED Viewed

@@ -12,7 +12,7 @@ class FieldType(StrEnum):
     TEXT = "text"
     RICH_TEXT = "richtext"
     EMAIL = "email"
-    PASSWORD = "password"  # nosec B105 - This is a field type enum, not a hardcoded password
+    PASSWORD = "password"  # noqa: S105 - Field type enum, not a hardcoded password
     INTEGER = "integer"
     BIG_INTEGER = "biginteger"
     FLOAT = "float"
@@ -53,6 +53,11 @@ class FieldSchema(BaseModel):
     mapped_by: str | None = None
     inversed_by: str | None = None
+    # Component-specific
+    component: str | None = None  # Component UID for COMPONENT type
+    components: list[str] | None = None  # Allowed UIDs for DYNAMIC_ZONE
+    repeatable: bool = False  # True for repeatable components
 class ContentTypeSchema(BaseModel):
     """Complete schema for a content type."""

strapi_kit/utils/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ This package contains helper utilities including:
 """
 from strapi_kit.utils.rate_limiter import AsyncTokenBucketRateLimiter, TokenBucketRateLimiter
+from strapi_kit.utils.schema import extract_info_from_schema
 from strapi_kit.utils.seo import SEOConfiguration, detect_seo_configuration
 from strapi_kit.utils.uid import (
     api_id_to_singular,
@@ -31,4 +32,6 @@ __all__ = [
     # SEO utilities
     "detect_seo_configuration",
     "SEOConfiguration",
+    # Schema utilities
+    "extract_info_from_schema",
 ]

strapi-kit 0.0.6__py3-none-any.whl → 0.1.0__py3-none-any.whl

strapi-kit 0.0.6py3-none-any.whl → 0.1.0py3-none-any.whl