strapi-kit 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,195 @@
1
+ """JSONL streaming import reader.
2
+
3
+ Provides O(1) memory import by reading entities one at a time.
4
+ """
5
+
6
+ import json
7
+ import logging
8
+ from collections.abc import Generator
9
+ from pathlib import Path
10
+ from typing import IO, Any
11
+
12
+ from strapi_kit.exceptions import FormatError, ImportExportError
13
+ from strapi_kit.models.export_format import (
14
+ ExportedEntity,
15
+ ExportedMediaFile,
16
+ ExportMetadata,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class JSONLImportReader:
23
+ """Streaming JSONL import reader.
24
+
25
+ Reads entities one at a time from a JSONL file for memory-efficient
26
+ import of large datasets.
27
+
28
+ Example:
29
+ >>> with JSONLImportReader("export.jsonl") as reader:
30
+ ... metadata = reader.read_metadata()
31
+ ... for entity in reader.iter_entities():
32
+ ... process_entity(entity)
33
+ ... media_manifest = reader.read_media_manifest()
34
+ """
35
+
36
+ def __init__(self, file_path: str | Path) -> None:
37
+ """Initialize JSONL reader.
38
+
39
+ Args:
40
+ file_path: Path to input JSONL file
41
+
42
+ Raises:
43
+ FormatError: If file doesn't exist
44
+ """
45
+ self.file_path = Path(file_path)
46
+ if not self.file_path.exists():
47
+ raise FormatError(f"JSONL file not found: {file_path}")
48
+
49
+ self._file: IO[str] | None = None
50
+ self._metadata: ExportMetadata | None = None
51
+ self._media_manifest: list[ExportedMediaFile] | None = None
52
+ self._current_line = 0
53
+
54
+ def __enter__(self) -> "JSONLImportReader":
55
+ """Open file for reading."""
56
+ self._file = open(self.file_path, encoding="utf-8")
57
+ return self
58
+
59
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
60
+ """Close file."""
61
+ if self._file:
62
+ self._file.close()
63
+ self._file = None
64
+
65
+ def read_metadata(self) -> ExportMetadata:
66
+ """Read metadata from first line.
67
+
68
+ Returns:
69
+ Export metadata
70
+
71
+ Raises:
72
+ FormatError: If first line is not metadata
73
+ """
74
+ if not self._file:
75
+ raise ImportExportError("Reader not opened - use context manager")
76
+
77
+ if self._metadata is not None:
78
+ return self._metadata
79
+
80
+ line = self._file.readline()
81
+ self._current_line = 1
82
+
83
+ if not line:
84
+ raise FormatError("Empty JSONL file")
85
+
86
+ try:
87
+ record = json.loads(line)
88
+ except json.JSONDecodeError as e:
89
+ raise FormatError(f"Invalid JSON on line 1: {e}") from e
90
+
91
+ if record.get("_type") != "metadata":
92
+ raise FormatError(f"Expected metadata on line 1, got: {record.get('_type')}")
93
+
94
+ # Remove _type field before parsing
95
+ record.pop("_type", None)
96
+ self._metadata = ExportMetadata(**record)
97
+ return self._metadata
98
+
99
+ def iter_entities(self) -> Generator[ExportedEntity, None, None]:
100
+ """Iterate over entities in the file.
101
+
102
+ Yields entities one at a time for memory-efficient processing.
103
+
104
+ Yields:
105
+ ExportedEntity objects
106
+
107
+ Raises:
108
+ FormatError: If entity parsing fails
109
+ """
110
+ if not self._file:
111
+ raise ImportExportError("Reader not opened - use context manager")
112
+
113
+ # Ensure metadata is read first
114
+ if self._metadata is None:
115
+ self.read_metadata()
116
+
117
+ for line in self._file:
118
+ self._current_line += 1
119
+ line = line.strip()
120
+ if not line:
121
+ continue
122
+
123
+ try:
124
+ record = json.loads(line)
125
+ except json.JSONDecodeError as e:
126
+ raise FormatError(f"Invalid JSON on line {self._current_line}: {e}") from e
127
+
128
+ record_type = record.get("_type")
129
+
130
+ if record_type == "entity":
131
+ record.pop("_type", None)
132
+ yield ExportedEntity(**record)
133
+
134
+ elif record_type == "media_manifest":
135
+ # Parse and cache media manifest
136
+ files_data = record.get("files", [])
137
+ self._media_manifest = [ExportedMediaFile(**f) for f in files_data]
138
+ # Don't yield - this is handled separately
139
+ break
140
+
141
+ elif record_type == "metadata":
142
+ # Skip duplicate metadata
143
+ continue
144
+
145
+ else:
146
+ logger.warning(f"Unknown record type on line {self._current_line}: {record_type}")
147
+
148
+ def read_media_manifest(self) -> list[ExportedMediaFile]:
149
+ """Read media manifest from file.
150
+
151
+ Must be called after iter_entities() has completed, or will consume
152
+ remaining entities to find the manifest.
153
+
154
+ Returns:
155
+ List of media file references, or empty list if no manifest found
156
+ """
157
+ if self._media_manifest is not None:
158
+ return self._media_manifest
159
+
160
+ # If we haven't read through entities yet, do so now
161
+ if not self._file:
162
+ raise ImportExportError("Reader not opened - use context manager")
163
+
164
+ # Consume remaining lines to find media manifest
165
+ for _ in self.iter_entities():
166
+ pass # Discard entities, we just want the manifest
167
+
168
+ if self._media_manifest is None:
169
+ # No media manifest found - return empty list
170
+ return []
171
+
172
+ return self._media_manifest
173
+
174
+ def get_entity_count(self) -> int:
175
+ """Count total entities without loading them all.
176
+
177
+ Note: This reads through the entire file.
178
+
179
+ Returns:
180
+ Total entity count
181
+ """
182
+ count = 0
183
+ # Create a new file handle to not disturb current position
184
+ with open(self.file_path, encoding="utf-8") as f:
185
+ for line in f:
186
+ line = line.strip()
187
+ if not line:
188
+ continue
189
+ try:
190
+ record = json.loads(line)
191
+ if record.get("_type") == "entity":
192
+ count += 1
193
+ except json.JSONDecodeError:
194
+ continue
195
+ return count
@@ -0,0 +1,134 @@
1
+ """JSONL streaming export writer.
2
+
3
+ Provides O(1) memory export by writing entities as they're fetched,
4
+ one JSON object per line.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from pathlib import Path
10
+ from typing import IO, Any
11
+
12
+ from strapi_kit.exceptions import ImportExportError
13
+ from strapi_kit.models.export_format import (
14
+ ExportedEntity,
15
+ ExportedMediaFile,
16
+ ExportMetadata,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class JSONLExportWriter:
23
+ """Streaming JSONL export writer.
24
+
25
+ Writes entities one at a time to a JSONL file for memory-efficient
26
+ export of large datasets.
27
+
28
+ JSONL Format:
29
+ Line 1: {"_type": "metadata", ...}
30
+ Lines 2-N: {"_type": "entity", "content_type": "...", "data": {...}}
31
+ Last line: {"_type": "media_manifest", "files": [...]}
32
+
33
+ Example:
34
+ >>> with JSONLExportWriter("export.jsonl") as writer:
35
+ ... writer.write_metadata(metadata)
36
+ ... for entity in entities:
37
+ ... writer.write_entity(entity)
38
+ ... writer.write_media_manifest(media_files)
39
+ """
40
+
41
+ def __init__(self, file_path: str | Path) -> None:
42
+ """Initialize JSONL writer.
43
+
44
+ Args:
45
+ file_path: Path to output JSONL file
46
+ """
47
+ self.file_path = Path(file_path)
48
+ self._file: IO[str] | None = None
49
+ self._entity_count = 0
50
+ self._content_type_counts: dict[str, int] = {}
51
+
52
+ def __enter__(self) -> "JSONLExportWriter":
53
+ """Open file for writing."""
54
+ self.file_path.parent.mkdir(parents=True, exist_ok=True)
55
+ self._file = open(self.file_path, "w", encoding="utf-8")
56
+ return self
57
+
58
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
59
+ """Close file."""
60
+ if self._file:
61
+ self._file.close()
62
+ self._file = None
63
+
64
+ def write_metadata(self, metadata: ExportMetadata) -> None:
65
+ """Write metadata as first line.
66
+
67
+ Args:
68
+ metadata: Export metadata
69
+ """
70
+ if not self._file:
71
+ raise ImportExportError("Writer not opened - use context manager")
72
+
73
+ record = {
74
+ "_type": "metadata",
75
+ **metadata.model_dump(mode="json"),
76
+ }
77
+ self._write_line(record)
78
+ logger.debug("Wrote metadata to JSONL")
79
+
80
+ def write_entity(self, entity: ExportedEntity) -> None:
81
+ """Write a single entity.
82
+
83
+ Args:
84
+ entity: Entity to write
85
+ """
86
+ if not self._file:
87
+ raise ImportExportError("Writer not opened - use context manager")
88
+
89
+ record = {
90
+ "_type": "entity",
91
+ **entity.model_dump(mode="json"),
92
+ }
93
+ self._write_line(record)
94
+
95
+ self._entity_count += 1
96
+ ct = entity.content_type
97
+ self._content_type_counts[ct] = self._content_type_counts.get(ct, 0) + 1
98
+
99
+ def write_media_manifest(self, media_files: list[ExportedMediaFile]) -> None:
100
+ """Write media manifest as final line.
101
+
102
+ Args:
103
+ media_files: List of media file references
104
+ """
105
+ if not self._file:
106
+ raise ImportExportError("Writer not opened - use context manager")
107
+
108
+ record = {
109
+ "_type": "media_manifest",
110
+ "files": [m.model_dump(mode="json") for m in media_files],
111
+ }
112
+ self._write_line(record)
113
+ logger.debug(f"Wrote media manifest with {len(media_files)} files")
114
+
115
+ def _write_line(self, record: dict[str, Any]) -> None:
116
+ """Write a single JSON line.
117
+
118
+ Args:
119
+ record: Dictionary to serialize as JSON line
120
+ """
121
+ if self._file is None:
122
+ raise ImportExportError("Writer not opened - use context manager")
123
+ line = json.dumps(record, ensure_ascii=False, default=str)
124
+ self._file.write(line + "\n")
125
+
126
+ @property
127
+ def entity_count(self) -> int:
128
+ """Get total entities written."""
129
+ return self._entity_count
130
+
131
+ @property
132
+ def content_type_counts(self) -> dict[str, int]:
133
+ """Get entity counts per content type."""
134
+ return self._content_type_counts.copy()
@@ -4,8 +4,17 @@ This module handles extracting relations from entities during export
4
4
  and resolving them during import using ID mappings.
5
5
  """
6
6
 
7
+ from __future__ import annotations
8
+
7
9
  import logging
8
- from typing import Any
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ from ..exceptions import StrapiError
13
+ from ..models.schema import FieldType
14
+
15
+ if TYPE_CHECKING:
16
+ from ..cache.schema_cache import InMemorySchemaCache
17
+ from ..models.schema import ContentTypeSchema
9
18
 
10
19
  logger = logging.getLogger(__name__)
11
20
 
@@ -170,3 +179,223 @@ class RelationResolver:
170
179
  payload[field_name] = ids
171
180
 
172
181
  return payload
182
+
183
+ # Schema-aware extraction methods
184
+
185
+ @staticmethod
186
+ def extract_relations_with_schema(
187
+ data: dict[str, Any],
188
+ schema: ContentTypeSchema,
189
+ schema_cache: InMemorySchemaCache | None = None,
190
+ ) -> dict[str, list[int | str]]:
191
+ """Extract relations using schema - only actual relation fields.
192
+
193
+ This method uses the content type schema to identify relation fields,
194
+ avoiding false positives from fields that happen to contain {"data": ...}.
195
+ It also recursively extracts relations from components and dynamic zones.
196
+
197
+ Args:
198
+ data: Entity attributes dictionary
199
+ schema: Content type schema with field definitions
200
+ schema_cache: Optional schema cache for component lookups
201
+
202
+ Returns:
203
+ Dictionary mapping relation field paths to lists of IDs
204
+
205
+ Example:
206
+ >>> # Only extracts from actual relation fields defined in schema
207
+ >>> data = {
208
+ ... "title": "Article",
209
+ ... "author": {"data": {"id": 5}},
210
+ ... "metadata": {"data": "not a relation"} # Won't be extracted
211
+ ... }
212
+ >>> relations = RelationResolver.extract_relations_with_schema(data, schema)
213
+ {'author': [5]} # metadata excluded because not a relation in schema
214
+ """
215
+ relations: dict[str, list[int | str]] = {}
216
+
217
+ for field_name, field_value in data.items():
218
+ field_schema = schema.fields.get(field_name)
219
+ if not field_schema:
220
+ continue
221
+
222
+ if field_schema.type == FieldType.RELATION:
223
+ # Extract IDs from relation field
224
+ ids = RelationResolver._extract_ids_from_field(field_value)
225
+ if ids is not None:
226
+ relations[field_name] = ids
227
+
228
+ elif field_schema.type == FieldType.COMPONENT and schema_cache:
229
+ # Recursively extract from component
230
+ component_uid = field_schema.component
231
+ if component_uid and field_value:
232
+ if field_schema.repeatable and isinstance(field_value, list):
233
+ # Repeatable component - list of components
234
+ for idx, item in enumerate(field_value):
235
+ if isinstance(item, dict):
236
+ nested = RelationResolver._extract_from_component(
237
+ item, component_uid, schema_cache, f"{field_name}[{idx}]."
238
+ )
239
+ relations.update(nested)
240
+ elif isinstance(field_value, dict):
241
+ # Single component
242
+ nested = RelationResolver._extract_from_component(
243
+ field_value, component_uid, schema_cache, f"{field_name}."
244
+ )
245
+ relations.update(nested)
246
+
247
+ elif field_schema.type == FieldType.DYNAMIC_ZONE and schema_cache:
248
+ # Recursively extract from dynamic zone components
249
+ if isinstance(field_value, list):
250
+ for idx, item in enumerate(field_value):
251
+ if isinstance(item, dict) and "__component" in item:
252
+ component_uid = item["__component"]
253
+ nested = RelationResolver._extract_from_component(
254
+ item, component_uid, schema_cache, f"{field_name}[{idx}]."
255
+ )
256
+ relations.update(nested)
257
+
258
+ return relations
259
+
260
+ @staticmethod
261
+ def _extract_from_component(
262
+ component_data: dict[str, Any],
263
+ component_uid: str,
264
+ schema_cache: InMemorySchemaCache,
265
+ prefix: str = "",
266
+ ) -> dict[str, list[int | str]]:
267
+ """Recursively extract relations from a component.
268
+
269
+ Args:
270
+ component_data: Component data dictionary
271
+ component_uid: Component UID for schema lookup
272
+ schema_cache: Schema cache for component lookups
273
+ prefix: Field path prefix for nested fields
274
+
275
+ Returns:
276
+ Dictionary mapping prefixed field paths to lists of IDs
277
+ """
278
+ try:
279
+ component_schema = schema_cache.get_component_schema(component_uid)
280
+ except StrapiError:
281
+ logger.warning(f"Could not fetch component schema: {component_uid}", exc_info=True)
282
+ return {}
283
+
284
+ relations: dict[str, list[int | str]] = {}
285
+
286
+ for field_name, field_value in component_data.items():
287
+ if field_name == "__component":
288
+ continue # Skip component type marker
289
+
290
+ field_schema = component_schema.fields.get(field_name)
291
+ if not field_schema:
292
+ continue
293
+
294
+ full_key = f"{prefix}{field_name}"
295
+
296
+ if field_schema.type == FieldType.RELATION:
297
+ ids = RelationResolver._extract_ids_from_field(field_value)
298
+ if ids is not None:
299
+ relations[full_key] = ids
300
+
301
+ elif field_schema.type == FieldType.COMPONENT:
302
+ nested_uid = field_schema.component
303
+ if nested_uid and field_value:
304
+ if field_schema.repeatable and isinstance(field_value, list):
305
+ for idx, item in enumerate(field_value):
306
+ if isinstance(item, dict):
307
+ nested = RelationResolver._extract_from_component(
308
+ item, nested_uid, schema_cache, f"{full_key}[{idx}]."
309
+ )
310
+ relations.update(nested)
311
+ elif isinstance(field_value, dict):
312
+ nested = RelationResolver._extract_from_component(
313
+ field_value, nested_uid, schema_cache, f"{full_key}."
314
+ )
315
+ relations.update(nested)
316
+
317
+ elif field_schema.type == FieldType.DYNAMIC_ZONE:
318
+ if isinstance(field_value, list):
319
+ for idx, item in enumerate(field_value):
320
+ if isinstance(item, dict) and "__component" in item:
321
+ dz_uid = item["__component"]
322
+ nested = RelationResolver._extract_from_component(
323
+ item, dz_uid, schema_cache, f"{full_key}[{idx}]."
324
+ )
325
+ relations.update(nested)
326
+
327
+ return relations
328
+
329
+ @staticmethod
330
+ def _extract_ids_from_field(field_value: Any) -> list[int | str] | None:
331
+ """Extract IDs from a relation field value.
332
+
333
+ Handles both v4 nested format and v5 flat format.
334
+
335
+ Args:
336
+ field_value: Field value from entity data
337
+
338
+ Returns:
339
+ List of IDs if this looks like a relation, None otherwise
340
+ """
341
+ if field_value is None:
342
+ return []
343
+
344
+ # v4 format: {"data": ...}
345
+ if isinstance(field_value, dict) and "data" in field_value:
346
+ relation_data = field_value["data"]
347
+ if relation_data is None:
348
+ return []
349
+ elif isinstance(relation_data, dict) and "id" in relation_data:
350
+ return [relation_data["id"]]
351
+ elif isinstance(relation_data, list):
352
+ return [
353
+ item["id"] for item in relation_data if isinstance(item, dict) and "id" in item
354
+ ]
355
+
356
+ # v5 format: direct ID or list of IDs (can be int or str)
357
+ if isinstance(field_value, (int, str)):
358
+ return [field_value]
359
+ elif isinstance(field_value, list):
360
+ ids: list[int | str] = [item for item in field_value if isinstance(item, (int, str))]
361
+ if ids:
362
+ return ids
363
+
364
+ return None
365
+
366
+ @staticmethod
367
+ def strip_relations_with_schema(
368
+ data: dict[str, Any],
369
+ schema: ContentTypeSchema,
370
+ ) -> dict[str, Any]:
371
+ """Remove only actual relation fields from entity data.
372
+
373
+ Uses schema to identify relation fields, preserving non-relation
374
+ fields that happen to contain {"data": ...}.
375
+
376
+ Args:
377
+ data: Entity attributes dictionary
378
+ schema: Content type schema with field definitions
379
+
380
+ Returns:
381
+ Copy of data with relation fields removed
382
+
383
+ Example:
384
+ >>> data = {
385
+ ... "title": "Article",
386
+ ... "author": {"data": {"id": 5}}, # Relation - removed
387
+ ... "metadata": {"data": "custom"} # Not relation - kept
388
+ ... }
389
+ >>> stripped = RelationResolver.strip_relations_with_schema(data, schema)
390
+ {'title': 'Article', 'metadata': {'data': 'custom'}}
391
+ """
392
+ cleaned_data = {}
393
+
394
+ for field_name, field_value in data.items():
395
+ field_schema = schema.fields.get(field_name)
396
+
397
+ # Keep field if it's not in schema or not a relation
398
+ if not field_schema or field_schema.type != FieldType.RELATION:
399
+ cleaned_data[field_name] = field_value
400
+
401
+ return cleaned_data
@@ -9,7 +9,13 @@ from .content_type import ComponentListItem, ContentTypeListItem
9
9
  from .content_type import ContentTypeInfo as CTBContentTypeInfo
10
10
  from .content_type import ContentTypeSchema as CTBContentTypeSchema
11
11
  from .enums import FilterOperator, PublicationState, SortDirection
12
- from .export_format import ExportData, ExportedEntity, ExportedMediaFile, ExportMetadata
12
+ from .export_format import (
13
+ ExportData,
14
+ ExportedEntity,
15
+ ExportedMediaFile,
16
+ ExportFormat,
17
+ ExportMetadata,
18
+ )
13
19
  from .import_options import ConflictResolution, ImportOptions, ImportResult
14
20
  from .request.fields import FieldSelection
15
21
  from .request.filters import FilterBuilder, FilterCondition, FilterGroup
@@ -47,6 +53,7 @@ __all__ = [
47
53
  "ExportMetadata",
48
54
  "ExportedEntity",
49
55
  "ExportedMediaFile",
56
+ "ExportFormat",
50
57
  "ImportOptions",
51
58
  "ImportResult",
52
59
  "ConflictResolution",
@@ -17,7 +17,10 @@ class RetryConfig(BaseSettings):
17
17
  Controls how the client handles failed requests with exponential backoff.
18
18
  """
19
19
 
20
- model_config = SettingsConfigDict(env_prefix="STRAPI_RETRY_")
20
+ model_config = SettingsConfigDict(
21
+ env_prefix="STRAPI_RETRY_",
22
+ extra="ignore",
23
+ )
21
24
 
22
25
  max_attempts: int = Field(
23
26
  default=3,
@@ -77,6 +80,7 @@ class StrapiConfig(BaseSettings):
77
80
  env_file=".env",
78
81
  env_file_encoding="utf-8",
79
82
  case_sensitive=False,
83
+ extra="ignore",
80
84
  )
81
85
 
82
86
  base_url: str = Field(
@@ -6,7 +6,7 @@ This module defines core enums used throughout the models package:
6
6
  - PublicationState: Draft, published, preview content states
7
7
  """
8
8
 
9
- from enum import Enum
9
+ from enum import StrEnum
10
10
  from typing import Literal
11
11
 
12
12
  # Type aliases for common Strapi types
@@ -14,7 +14,7 @@ StrapiVersion = Literal["v4", "v5", "auto"]
14
14
  LocaleCode = str # ISO 639-1 language codes (e.g., "en", "fr", "de")
15
15
 
16
16
 
17
- class FilterOperator(str, Enum):
17
+ class FilterOperator(StrEnum):
18
18
  """Filter operators supported by Strapi REST API.
19
19
 
20
20
  Strapi supports 24 filter operators for querying content.
@@ -66,7 +66,7 @@ class FilterOperator(str, Enum):
66
66
  NOT = "$not" # Logical NOT
67
67
 
68
68
 
69
- class SortDirection(str, Enum):
69
+ class SortDirection(StrEnum):
70
70
  """Sort direction for query results.
71
71
 
72
72
  Examples:
@@ -80,7 +80,7 @@ class SortDirection(str, Enum):
80
80
  DESC = "desc" # Descending order (Z-A, 9-0, newest-oldest)
81
81
 
82
82
 
83
- class PublicationState(str, Enum):
83
+ class PublicationState(StrEnum):
84
84
  """Content publication state filter.
85
85
 
86
86
  Only applicable to content types with draft & publish enabled.
@@ -5,6 +5,7 @@ and version compatibility.
5
5
  """
6
6
 
7
7
  from datetime import UTC, datetime
8
+ from enum import StrEnum
8
9
  from pathlib import PureWindowsPath
9
10
  from typing import Any
10
11
 
@@ -15,6 +16,18 @@ from strapi_kit.exceptions import FormatError
15
16
  from .schema import ContentTypeSchema
16
17
 
17
18
 
19
+ class ExportFormat(StrEnum):
20
+ """Export file format options.
21
+
22
+ Attributes:
23
+ JSON: Standard JSON format (default). Loads entire file into memory.
24
+ JSONL: JSON Lines format. Streams entities one per line for O(1) memory.
25
+ """
26
+
27
+ JSON = "json"
28
+ JSONL = "jsonl"
29
+
30
+
18
31
  class ExportMetadata(BaseModel):
19
32
  """Metadata about the export.
20
33