zenml-nightly 0.70.0.dev20241128__py3-none-any.whl → 0.70.0.dev20241130__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. zenml/VERSION +1 -1
  2. zenml/artifacts/artifact_config.py +7 -1
  3. zenml/artifacts/utils.py +55 -30
  4. zenml/cli/__init__.py +15 -0
  5. zenml/cli/base.py +4 -4
  6. zenml/cli/pipeline.py +80 -0
  7. zenml/cli/server.py +1 -1
  8. zenml/cli/stack.py +0 -3
  9. zenml/cli/stack_components.py +0 -1
  10. zenml/cli/utils.py +0 -5
  11. zenml/client.py +8 -18
  12. zenml/config/compiler.py +12 -3
  13. zenml/config/pipeline_configurations.py +20 -0
  14. zenml/config/pipeline_run_configuration.py +1 -0
  15. zenml/config/step_configurations.py +21 -0
  16. zenml/enums.py +1 -0
  17. zenml/integrations/feast/__init__.py +1 -1
  18. zenml/integrations/feast/feature_stores/feast_feature_store.py +13 -9
  19. zenml/materializers/built_in_materializer.py +18 -1
  20. zenml/materializers/structured_string_materializer.py +8 -3
  21. zenml/model/model.py +11 -3
  22. zenml/model/utils.py +18 -16
  23. zenml/models/__init__.py +6 -0
  24. zenml/models/v2/core/artifact_version.py +6 -3
  25. zenml/models/v2/core/component.py +0 -22
  26. zenml/models/v2/core/model_version.py +6 -3
  27. zenml/models/v2/core/pipeline_run.py +19 -3
  28. zenml/models/v2/core/run_metadata.py +30 -9
  29. zenml/models/v2/core/step_run.py +6 -4
  30. zenml/models/v2/misc/run_metadata.py +38 -0
  31. zenml/orchestrators/input_utils.py +19 -6
  32. zenml/orchestrators/publish_utils.py +12 -5
  33. zenml/orchestrators/step_launcher.py +7 -3
  34. zenml/orchestrators/step_run_utils.py +18 -6
  35. zenml/orchestrators/step_runner.py +39 -2
  36. zenml/orchestrators/utils.py +0 -21
  37. zenml/pipelines/pipeline_decorator.py +4 -0
  38. zenml/pipelines/pipeline_definition.py +14 -3
  39. zenml/pipelines/run_utils.py +9 -5
  40. zenml/steps/base_step.py +11 -1
  41. zenml/steps/entrypoint_function_utils.py +4 -2
  42. zenml/steps/step_decorator.py +4 -0
  43. zenml/steps/utils.py +23 -7
  44. zenml/types.py +4 -0
  45. zenml/utils/metadata_utils.py +186 -153
  46. zenml/utils/string_utils.py +41 -16
  47. zenml/utils/visualization_utils.py +4 -1
  48. zenml/zen_server/routers/workspaces_endpoints.py +19 -19
  49. zenml/zen_server/template_execution/utils.py +1 -0
  50. zenml/zen_stores/migrations/versions/b73bc71f1106_remove_component_spec_path.py +36 -0
  51. zenml/zen_stores/migrations/versions/cc269488e5a9_separate_run_metadata.py +135 -0
  52. zenml/zen_stores/schemas/__init__.py +5 -1
  53. zenml/zen_stores/schemas/artifact_schemas.py +12 -11
  54. zenml/zen_stores/schemas/component_schemas.py +0 -3
  55. zenml/zen_stores/schemas/model_schemas.py +13 -11
  56. zenml/zen_stores/schemas/pipeline_run_schemas.py +44 -16
  57. zenml/zen_stores/schemas/run_metadata_schemas.py +66 -31
  58. zenml/zen_stores/schemas/step_run_schemas.py +32 -12
  59. zenml/zen_stores/schemas/utils.py +47 -3
  60. zenml/zen_stores/sql_zen_store.py +117 -34
  61. {zenml_nightly-0.70.0.dev20241128.dist-info → zenml_nightly-0.70.0.dev20241130.dist-info}/METADATA +1 -1
  62. {zenml_nightly-0.70.0.dev20241128.dist-info → zenml_nightly-0.70.0.dev20241130.dist-info}/RECORD +65 -62
  63. {zenml_nightly-0.70.0.dev20241128.dist-info → zenml_nightly-0.70.0.dev20241130.dist-info}/LICENSE +0 -0
  64. {zenml_nightly-0.70.0.dev20241128.dist-info → zenml_nightly-0.70.0.dev20241130.dist-info}/WHEEL +0 -0
  65. {zenml_nightly-0.70.0.dev20241128.dist-info → zenml_nightly-0.70.0.dev20241130.dist-info}/entry_points.txt +0 -0
@@ -23,6 +23,7 @@ from sqlalchemy import TEXT, Column, String
23
23
  from sqlalchemy.dialects.mysql import MEDIUMTEXT
24
24
  from sqlmodel import Field, Relationship, SQLModel
25
25
 
26
+ from zenml.config.pipeline_configurations import PipelineConfiguration
26
27
  from zenml.config.step_configurations import Step
27
28
  from zenml.constants import MEDIUMTEXT_MAX_LENGTH
28
29
  from zenml.enums import (
@@ -50,16 +51,19 @@ from zenml.zen_stores.schemas.pipeline_deployment_schemas import (
50
51
  from zenml.zen_stores.schemas.pipeline_run_schemas import PipelineRunSchema
51
52
  from zenml.zen_stores.schemas.schema_utils import build_foreign_key_field
52
53
  from zenml.zen_stores.schemas.user_schemas import UserSchema
54
+ from zenml.zen_stores.schemas.utils import RunMetadataInterface
53
55
  from zenml.zen_stores.schemas.workspace_schemas import WorkspaceSchema
54
56
 
55
57
  if TYPE_CHECKING:
56
58
  from zenml.zen_stores.schemas.artifact_schemas import ArtifactVersionSchema
57
59
  from zenml.zen_stores.schemas.logs_schemas import LogsSchema
58
60
  from zenml.zen_stores.schemas.model_schemas import ModelVersionSchema
59
- from zenml.zen_stores.schemas.run_metadata_schemas import RunMetadataSchema
61
+ from zenml.zen_stores.schemas.run_metadata_schemas import (
62
+ RunMetadataResourceSchema,
63
+ )
60
64
 
61
65
 
62
- class StepRunSchema(NamedSchema, table=True):
66
+ class StepRunSchema(NamedSchema, RunMetadataInterface, table=True):
63
67
  """SQL Model for steps of pipeline runs."""
64
68
 
65
69
  __tablename__ = "step_run"
@@ -139,12 +143,12 @@ class StepRunSchema(NamedSchema, table=True):
139
143
  deployment: Optional["PipelineDeploymentSchema"] = Relationship(
140
144
  back_populates="step_runs"
141
145
  )
142
- run_metadata: List["RunMetadataSchema"] = Relationship(
143
- back_populates="step_run",
146
+ run_metadata_resources: List["RunMetadataResourceSchema"] = Relationship(
147
+ back_populates="step_runs",
144
148
  sa_relationship_kwargs=dict(
145
- primaryjoin=f"and_(RunMetadataSchema.resource_type=='{MetadataResourceTypes.STEP_RUN.value}', foreign(RunMetadataSchema.resource_id)==StepRunSchema.id)",
149
+ primaryjoin=f"and_(RunMetadataResourceSchema.resource_type=='{MetadataResourceTypes.STEP_RUN.value}', foreign(RunMetadataResourceSchema.resource_id)==StepRunSchema.id)",
146
150
  cascade="delete",
147
- overlaps="run_metadata",
151
+ overlaps="run_metadata_resources",
148
152
  ),
149
153
  )
150
154
  input_artifacts: List["StepRunInputArtifactSchema"] = Relationship(
@@ -163,9 +167,15 @@ class StepRunSchema(NamedSchema, table=True):
163
167
  "primaryjoin": "StepRunParentsSchema.child_id == StepRunSchema.id",
164
168
  },
165
169
  )
170
+ pipeline_run: "PipelineRunSchema" = Relationship(
171
+ back_populates="step_runs"
172
+ )
166
173
  model_version: "ModelVersionSchema" = Relationship(
167
174
  back_populates="step_runs",
168
175
  )
176
+ original_step_run: Optional["StepRunSchema"] = Relationship(
177
+ sa_relationship_kwargs={"remote_side": "StepRunSchema.id"}
178
+ )
169
179
 
170
180
  model_config = ConfigDict(protected_namespaces=()) # type: ignore[assignment]
171
181
 
@@ -218,11 +228,6 @@ class StepRunSchema(NamedSchema, table=True):
218
228
  RuntimeError: If the step run schema does not have a deployment_id
219
229
  or a step_configuration.
220
230
  """
221
- run_metadata = {
222
- metadata_schema.key: json.loads(metadata_schema.value)
223
- for metadata_schema in self.run_metadata
224
- }
225
-
226
231
  input_artifacts = {
227
232
  artifact.name: StepRunInputResponse(
228
233
  input_type=StepRunInputArtifactType(artifact.type),
@@ -248,6 +253,21 @@ class StepRunSchema(NamedSchema, table=True):
248
253
  full_step_config = Step.model_validate(
249
254
  step_configuration[self.name]
250
255
  )
256
+ new_substitutions = (
257
+ full_step_config.config._get_full_substitutions(
258
+ PipelineConfiguration.model_validate_json(
259
+ self.deployment.pipeline_configuration
260
+ ),
261
+ self.pipeline_run.start_time,
262
+ )
263
+ )
264
+ full_step_config = full_step_config.model_copy(
265
+ update={
266
+ "config": full_step_config.config.model_copy(
267
+ update={"substitutions": new_substitutions}
268
+ )
269
+ }
270
+ )
251
271
  elif not self.step_configuration:
252
272
  raise ValueError(
253
273
  f"Unable to load the configuration for step `{self.name}` from the"
@@ -294,7 +314,7 @@ class StepRunSchema(NamedSchema, table=True):
294
314
  pipeline_run_id=self.pipeline_run_id,
295
315
  original_step_run_id=self.original_step_run_id,
296
316
  parent_step_ids=[p.parent_id for p in self.parents],
297
- run_metadata=run_metadata,
317
+ run_metadata=self.fetch_metadata(),
298
318
  )
299
319
 
300
320
  resources = None
@@ -13,11 +13,14 @@
13
13
  # permissions and limitations under the License.
14
14
  """Utils for schemas."""
15
15
 
16
+ import json
16
17
  import math
17
- from typing import List, Type, TypeVar
18
+ from typing import Dict, List, Type, TypeVar
18
19
 
19
- from zenml.models.v2.base.base import BaseResponse
20
- from zenml.models.v2.base.page import Page
20
+ from sqlmodel import Relationship
21
+
22
+ from zenml.metadata.metadata_types import MetadataType
23
+ from zenml.models import BaseResponse, Page, RunMetadataEntry
21
24
  from zenml.zen_stores.schemas.base_schemas import BaseSchema
22
25
 
23
26
  S = TypeVar("S", bound=BaseSchema)
@@ -67,3 +70,44 @@ def get_page_from_list(
67
70
  total=total,
68
71
  items=page_items,
69
72
  )
73
+
74
+
75
+ class RunMetadataInterface:
76
+ """The interface for entities with run metadata."""
77
+
78
+ run_metadata_resources = Relationship()
79
+
80
+ def fetch_metadata_collection(self) -> Dict[str, List[RunMetadataEntry]]:
81
+ """Fetches all the metadata entries related to the artifact version.
82
+
83
+ Returns:
84
+ a dictionary, where the key is the key of the metadata entry
85
+ and the values represent the list of entries with this key.
86
+ """
87
+ metadata_collection: Dict[str, List[RunMetadataEntry]] = {}
88
+
89
+ # Fetch the metadata related to this step
90
+ for rm in self.run_metadata_resources:
91
+ if rm.run_metadata.key not in metadata_collection:
92
+ metadata_collection[rm.run_metadata.key] = []
93
+ metadata_collection[rm.run_metadata.key].append(
94
+ RunMetadataEntry(
95
+ value=json.loads(rm.run_metadata.value),
96
+ created=rm.run_metadata.created,
97
+ )
98
+ )
99
+
100
+ return metadata_collection
101
+
102
+ def fetch_metadata(self) -> Dict[str, MetadataType]:
103
+ """Fetches the latest metadata entry related to the artifact version.
104
+
105
+ Returns:
106
+ a dictionary, where the key is the key of the metadata entry
107
+ and the values represent the latest entry with this key.
108
+ """
109
+ metadata_collection = self.fetch_metadata_collection()
110
+ return {
111
+ k: sorted(v, key=lambda x: x.created, reverse=True)[0].value
112
+ for k, v in metadata_collection.items()
113
+ }
@@ -26,6 +26,7 @@ from datetime import datetime, timezone
26
26
  from functools import lru_cache
27
27
  from pathlib import Path
28
28
  from typing import (
29
+ TYPE_CHECKING,
29
30
  Any,
30
31
  Callable,
31
32
  ClassVar,
@@ -219,6 +220,7 @@ from zenml.models import (
219
220
  PipelineRunUpdate,
220
221
  PipelineUpdate,
221
222
  RunMetadataRequest,
223
+ RunMetadataResource,
222
224
  RunTemplateFilter,
223
225
  RunTemplateRequest,
224
226
  RunTemplateResponse,
@@ -325,6 +327,7 @@ from zenml.zen_stores.schemas import (
325
327
  PipelineDeploymentSchema,
326
328
  PipelineRunSchema,
327
329
  PipelineSchema,
330
+ RunMetadataResourceSchema,
328
331
  RunMetadataSchema,
329
332
  RunTemplateSchema,
330
333
  ScheduleSchema,
@@ -354,6 +357,9 @@ from zenml.zen_stores.secrets_stores.sql_secrets_store import (
354
357
  SqlSecretsStoreConfiguration,
355
358
  )
356
359
 
360
+ if TYPE_CHECKING:
361
+ from zenml.metadata.metadata_types import MetadataType, MetadataTypeEnum
362
+
357
363
  AnyNamedSchema = TypeVar("AnyNamedSchema", bound=NamedSchema)
358
364
  AnySchema = TypeVar("AnySchema", bound=BaseSchema)
359
365
 
@@ -2726,7 +2732,9 @@ class SqlZenStore(BaseZenStore):
2726
2732
  # -------------------- Artifact Versions --------------------
2727
2733
 
2728
2734
  def _get_or_create_artifact_for_name(
2729
- self, name: str, has_custom_name: bool
2735
+ self,
2736
+ name: str,
2737
+ has_custom_name: bool,
2730
2738
  ) -> ArtifactSchema:
2731
2739
  """Get or create an artifact with a specific name.
2732
2740
 
@@ -2747,7 +2755,8 @@ class SqlZenStore(BaseZenStore):
2747
2755
  try:
2748
2756
  with session.begin_nested():
2749
2757
  artifact_request = ArtifactRequest(
2750
- name=name, has_custom_name=has_custom_name
2758
+ name=name,
2759
+ has_custom_name=has_custom_name,
2751
2760
  )
2752
2761
  artifact = ArtifactSchema.from_request(
2753
2762
  artifact_request
@@ -2915,17 +2924,41 @@ class SqlZenStore(BaseZenStore):
2915
2924
 
2916
2925
  # Save metadata of the artifact
2917
2926
  if artifact_version.metadata:
2927
+ values: Dict[str, "MetadataType"] = {}
2928
+ types: Dict[str, "MetadataTypeEnum"] = {}
2918
2929
  for key, value in artifact_version.metadata.items():
2919
- run_metadata_schema = RunMetadataSchema(
2920
- workspace_id=artifact_version.workspace,
2921
- user_id=artifact_version.user,
2922
- resource_id=artifact_version_id,
2923
- resource_type=MetadataResourceTypes.ARTIFACT_VERSION,
2924
- key=key,
2925
- value=json.dumps(value),
2926
- type=get_metadata_type(value),
2930
+ # Skip metadata that is too large to be stored in the DB.
2931
+ if len(json.dumps(value)) > TEXT_FIELD_MAX_LENGTH:
2932
+ logger.warning(
2933
+ f"Metadata value for key '{key}' is too large to be "
2934
+ "stored in the database. Skipping."
2935
+ )
2936
+ continue
2937
+ # Skip metadata that is not of a supported type.
2938
+ try:
2939
+ metadata_type = get_metadata_type(value)
2940
+ except ValueError as e:
2941
+ logger.warning(
2942
+ f"Metadata value for key '{key}' is not of a "
2943
+ f"supported type. Skipping. Full error: {e}"
2944
+ )
2945
+ continue
2946
+ values[key] = value
2947
+ types[key] = metadata_type
2948
+ self.create_run_metadata(
2949
+ RunMetadataRequest(
2950
+ workspace=artifact_version.workspace,
2951
+ user=artifact_version.user,
2952
+ resources=[
2953
+ RunMetadataResource(
2954
+ id=artifact_version_id,
2955
+ type=MetadataResourceTypes.ARTIFACT_VERSION,
2956
+ )
2957
+ ],
2958
+ values=values,
2959
+ types=types,
2927
2960
  )
2928
- session.add(run_metadata_schema)
2961
+ )
2929
2962
 
2930
2963
  session.commit()
2931
2964
  artifact_version_schema = session.exec(
@@ -5529,20 +5562,29 @@ class SqlZenStore(BaseZenStore):
5529
5562
  The created run metadata.
5530
5563
  """
5531
5564
  with Session(self.engine) as session:
5532
- for key, value in run_metadata.values.items():
5533
- type_ = run_metadata.types[key]
5534
- run_metadata_schema = RunMetadataSchema(
5535
- workspace_id=run_metadata.workspace,
5536
- user_id=run_metadata.user,
5537
- resource_id=run_metadata.resource_id,
5538
- resource_type=run_metadata.resource_type.value,
5539
- stack_component_id=run_metadata.stack_component_id,
5540
- key=key,
5541
- value=json.dumps(value),
5542
- type=type_,
5543
- )
5544
- session.add(run_metadata_schema)
5545
- session.commit()
5565
+ if run_metadata.resources:
5566
+ for key, value in run_metadata.values.items():
5567
+ type_ = run_metadata.types[key]
5568
+ run_metadata_schema = RunMetadataSchema(
5569
+ workspace_id=run_metadata.workspace,
5570
+ user_id=run_metadata.user,
5571
+ stack_component_id=run_metadata.stack_component_id,
5572
+ key=key,
5573
+ value=json.dumps(value),
5574
+ type=type_,
5575
+ publisher_step_id=run_metadata.publisher_step_id,
5576
+ )
5577
+ session.add(run_metadata_schema)
5578
+ session.commit()
5579
+
5580
+ for resource in run_metadata.resources:
5581
+ rm_resource_link = RunMetadataResourceSchema(
5582
+ resource_id=resource.id,
5583
+ resource_type=resource.type.value,
5584
+ run_metadata_id=run_metadata_schema.id,
5585
+ )
5586
+ session.add(rm_resource_link)
5587
+ session.commit()
5546
5588
  return None
5547
5589
 
5548
5590
  # ----------------------------- Schedules -----------------------------
@@ -8153,6 +8195,46 @@ class SqlZenStore(BaseZenStore):
8153
8195
  )
8154
8196
  session.add(log_entry)
8155
8197
 
8198
+ # If cached, attach metadata of the original step
8199
+ if (
8200
+ step_run.status == ExecutionStatus.CACHED
8201
+ and step_run.original_step_run_id is not None
8202
+ ):
8203
+ original_metadata_links = session.exec(
8204
+ select(RunMetadataResourceSchema)
8205
+ .where(
8206
+ RunMetadataResourceSchema.run_metadata_id
8207
+ == RunMetadataSchema.id
8208
+ )
8209
+ .where(
8210
+ RunMetadataResourceSchema.resource_id
8211
+ == step_run.original_step_run_id
8212
+ )
8213
+ .where(
8214
+ RunMetadataResourceSchema.resource_type
8215
+ == MetadataResourceTypes.STEP_RUN
8216
+ )
8217
+ .where(
8218
+ RunMetadataSchema.publisher_step_id
8219
+ == step_run.original_step_run_id
8220
+ )
8221
+ ).all()
8222
+
8223
+ # Create new links in a batch
8224
+ new_links = [
8225
+ RunMetadataResourceSchema(
8226
+ resource_id=step_schema.id,
8227
+ resource_type=link.resource_type,
8228
+ run_metadata_id=link.run_metadata_id,
8229
+ )
8230
+ for link in original_metadata_links
8231
+ ]
8232
+ # Add all new links in a single operation
8233
+ session.add_all(new_links)
8234
+ # Commit the changes
8235
+ session.commit()
8236
+ session.refresh(step_schema)
8237
+
8156
8238
  # Save parent step IDs into the database.
8157
8239
  for parent_step_id in step_run.parent_step_ids:
8158
8240
  self._set_run_step_parent_step(
@@ -8182,12 +8264,12 @@ class SqlZenStore(BaseZenStore):
8182
8264
  )
8183
8265
 
8184
8266
  # Save output artifact IDs into the database.
8185
- for output_name, artifact_version_ids in step_run.outputs.items():
8267
+ for name, artifact_version_ids in step_run.outputs.items():
8186
8268
  for artifact_version_id in artifact_version_ids:
8187
8269
  self._set_run_step_output_artifact(
8188
8270
  step_run_id=step_schema.id,
8189
8271
  artifact_version_id=artifact_version_id,
8190
- name=output_name,
8272
+ name=name,
8191
8273
  session=session,
8192
8274
  )
8193
8275
 
@@ -8291,13 +8373,14 @@ class SqlZenStore(BaseZenStore):
8291
8373
  session.add(existing_step_run)
8292
8374
 
8293
8375
  # Update the artifacts.
8294
- for name, artifact_version_id in step_run_update.outputs.items():
8295
- self._set_run_step_output_artifact(
8296
- step_run_id=step_run_id,
8297
- artifact_version_id=artifact_version_id,
8298
- name=name,
8299
- session=session,
8300
- )
8376
+ for name, artifact_version_ids in step_run_update.outputs.items():
8377
+ for artifact_version_id in artifact_version_ids:
8378
+ self._set_run_step_output_artifact(
8379
+ step_run_id=step_run_id,
8380
+ artifact_version_id=artifact_version_id,
8381
+ name=name,
8382
+ session=session,
8383
+ )
8301
8384
 
8302
8385
  # Update loaded artifacts.
8303
8386
  for (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: zenml-nightly
3
- Version: 0.70.0.dev20241128
3
+ Version: 0.70.0.dev20241130
4
4
  Summary: ZenML: Write production-ready ML code.
5
5
  Home-page: https://zenml.io
6
6
  License: Apache-2.0