starflow-py 0.49.0__tar.gz → 0.57.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {starflow_py-0.49.0 → starflow_py-0.57.0}/PKG-INFO +1 -1
  2. starflow_py-0.57.0/src/starflow/VERSION +1 -0
  3. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/k8s_branch_operator.py +88 -6
  4. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/k8s_operator.py +210 -55
  5. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/task.py +18 -2
  6. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/task_branch.py +2 -2
  7. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow_py.egg-info/PKG-INFO +1 -1
  8. starflow_py-0.49.0/src/starflow/VERSION +0 -1
  9. {starflow_py-0.49.0 → starflow_py-0.57.0}/LICENSE +0 -0
  10. {starflow_py-0.49.0 → starflow_py-0.57.0}/README.md +0 -0
  11. {starflow_py-0.49.0 → starflow_py-0.57.0}/pyproject.toml +0 -0
  12. {starflow_py-0.49.0 → starflow_py-0.57.0}/setup.cfg +0 -0
  13. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/__init__.py +0 -0
  14. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/actions/github_actions.py +0 -0
  15. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/base_piece.py +0 -0
  16. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/cli/__init__.py +0 -0
  17. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/cli/cli.py +0 -0
  18. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/cli/utils/__init__.py +0 -0
  19. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/cli/utils/config-starflow-local.toml +0 -0
  20. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/cli/utils/constants.py +0 -0
  21. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/cli/utils/docker-compose.yaml +0 -0
  22. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/cli/utils/pieces_repository.py +0 -0
  23. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/cli/utils/platform.py +0 -0
  24. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/client/__init__.py +0 -0
  25. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/client/github_rest_client.py +0 -0
  26. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/client/legacy/fs_client.py +0 -0
  27. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/client/legacy/s3_client.py +0 -0
  28. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/client/local_files_client.py +0 -0
  29. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/client/starflow_backend_client.py +0 -0
  30. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/__init__.py +0 -0
  31. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/deprecated/base_operator.py +0 -0
  32. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/deprecated/external_python_operator.py +0 -0
  33. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/docker_operator.py +0 -0
  34. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/python_operator.py +0 -0
  35. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/sidecar/__init__.py +0 -0
  36. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/sidecar/fuse.conf +0 -0
  37. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/sidecar/logger.py +0 -0
  38. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/sidecar/mount.py +0 -0
  39. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/sidecar/rclone.conf +0 -0
  40. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/sidecar/sidecar_lifecycle.sh +0 -0
  41. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/custom_operators/worker_operator.py +0 -0
  42. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/exceptions/__init__.py +0 -0
  43. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/exceptions/exceptions.py +0 -0
  44. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/logger.py +0 -0
  45. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/models/__init__.py +0 -0
  46. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/models/output_modifier.py +0 -0
  47. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/schemas/__init__.py +0 -0
  48. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/schemas/container_resources.py +0 -0
  49. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/schemas/deploy_mode.py +0 -0
  50. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/schemas/display_result.py +0 -0
  51. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/schemas/piece_metadata.py +0 -0
  52. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/schemas/shared_storage.py +0 -0
  53. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/scripts/__init__.py +0 -0
  54. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/scripts/build_docker_images_pieces.py +0 -0
  55. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/scripts/deprecated/create_docker_compose_file.py +0 -0
  56. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/scripts/deprecated/deprecated_piece_dry_run.py +0 -0
  57. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/scripts/deprecated/docker_compose_constants.py +0 -0
  58. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/scripts/deprecated/docker_compose_scripts.py +0 -0
  59. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/scripts/deprecated/run_piece_bash.py +0 -0
  60. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/scripts/load_piece.py +0 -0
  61. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/scripts/run_piece_branch_docker.py +0 -0
  62. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/scripts/run_piece_docker.py +0 -0
  63. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/storage/__init__.py +0 -0
  64. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/storage/s3.py +0 -0
  65. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/testing/__init__.py +0 -0
  66. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/testing/dry_run.py +0 -0
  67. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/testing/http_client.py +0 -0
  68. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/testing/http_server.py +0 -0
  69. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/testing/utils.py +0 -0
  70. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/utils/__init__.py +0 -0
  71. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/utils/metadata_default.py +0 -0
  72. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/utils/piece_generator.py +0 -0
  73. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow/utils/workflow_shared_storage.py +0 -0
  74. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow_py.egg-info/SOURCES.txt +0 -0
  75. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow_py.egg-info/dependency_links.txt +0 -0
  76. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow_py.egg-info/entry_points.txt +0 -0
  77. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow_py.egg-info/requires.txt +0 -0
  78. {starflow_py-0.49.0 → starflow_py-0.57.0}/src/starflow_py.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: starflow-py
3
- Version: 0.49.0
3
+ Version: 0.57.0
4
4
  Summary: Python package for starflow.
5
5
  Author-email: Alexandre Lazar <alexandre.lazar@gmail.com>
6
6
  License:
@@ -0,0 +1 @@
1
+ 0.57.0
@@ -38,6 +38,18 @@ class starflowKubernetesBranchPodOperator(KubernetesPodOperator, BaseBranchOpera
38
38
  container_resources: Optional[Dict] = None,
39
39
  **k8s_operator_kwargs
40
40
  ):
41
+ import logging
42
+ logger = logging.getLogger(__name__)
43
+
44
+ logger.info("=" * 80)
45
+ logger.info(f"[StarflowK8sBranchOperator] INIT METHOD CALLED - task_id: {task_id}")
46
+ logger.info(f"[StarflowK8sBranchOperator] dag_id: {dag_id}")
47
+ logger.info(f"[StarflowK8sBranchOperator] piece_name: {piece_name}")
48
+ logger.info(f"[StarflowK8sBranchOperator] repository_url: {repository_url}")
49
+ logger.info(f"[StarflowK8sBranchOperator] repository_version: {repository_version}")
50
+ logger.info(f"[StarflowK8sBranchOperator] workspace_id: {workspace_id}")
51
+ logger.info("=" * 80)
52
+
41
53
  self.logger = get_configured_logger("starflowKubernetesBranchPodOperator")
42
54
  self.task_id = task_id
43
55
  self.piece_name = piece_name
@@ -214,9 +226,27 @@ class starflowKubernetesBranchPodOperator(KubernetesPodOperator, BaseBranchOpera
214
226
  self.task_id_replaced = self.task_id.lower().replace("_", "-") # doing this because airflow doesn't allow underscores and upper case in mount names and max len is 63
215
227
  self.shared_storage_base_mount_path = '/home/shared_storage'
216
228
 
229
+ # Fix pod name to comply with Kubernetes naming rules (no underscores)
230
+ if pod.metadata.name:
231
+ # Replace underscores with hyphens and ensure it's lowercase
232
+ pod.metadata.name = pod.metadata.name.lower().replace("_", "-")
233
+ # Ensure it starts and ends with alphanumeric characters
234
+ pod.metadata.name = pod.metadata.name.strip("-")
235
+ if not pod.metadata.name[0].isalnum():
236
+ pod.metadata.name = "pod-" + pod.metadata.name
237
+ if not pod.metadata.name[-1].isalnum():
238
+ pod.metadata.name = pod.metadata.name + "-pod"
239
+
217
240
  pod.metadata.namespace = "airflow"
218
241
  pod.spec.image_pull_secrets = [k8s.V1LocalObjectReference('ghcr-login-secret')]
219
242
 
243
+ # Override xcom sidecar image to use Harbor registry instead of quay.io
244
+ if pod.spec.containers:
245
+ for container in pod.spec.containers:
246
+ if container.name == "airflow-xcom-sidecar":
247
+ container.image = "harbor.prochain.local/public/alpine:latest"
248
+ self.logger.info(f"[StarflowK8sBranchOperator] Updated xcom sidecar image to: {container.image}")
249
+
220
250
  # if not self.workflow_shared_storage or self.workflow_shared_storage.mode.name == 'none':
221
251
  # return pod
222
252
  # if self.workflow_shared_storage.source.name in ["aws_s3", "gcs"]:
@@ -226,7 +256,6 @@ class starflowKubernetesBranchPodOperator(KubernetesPodOperator, BaseBranchOpera
226
256
 
227
257
  return pod
228
258
 
229
-
230
259
  def add_local_shared_storage_volumes(self, pod: k8s.V1Pod) -> k8s.V1Pod:
231
260
  """Adds local shared storage volumes to the pod."""
232
261
  pod_cp = copy.deepcopy(pod)
@@ -397,6 +426,11 @@ class starflowKubernetesBranchPodOperator(KubernetesPodOperator, BaseBranchOpera
397
426
  piece_repository_data = self.starflow_client.get_piece_repositories_from_workspace_id(
398
427
  params=params
399
428
  ).json()
429
+
430
+ # Check if any piece repositories were found
431
+ if not piece_repository_data.get("data") or len(piece_repository_data["data"]) == 0:
432
+ return {}
433
+
400
434
  secrets_response = self.starflow_client.get_piece_secrets(
401
435
  piece_repository_id=piece_repository_data["data"][0]["id"],
402
436
  piece_name=piece_name
@@ -467,8 +501,26 @@ class starflowKubernetesBranchPodOperator(KubernetesPodOperator, BaseBranchOpera
467
501
  Runs at the begining of the execute method.
468
502
  Pass extra arguments and configuration as environment variables to the pod
469
503
  """
504
+ import logging
505
+ logger = logging.getLogger(__name__)
506
+
470
507
  # Fetch upstream tasks ids and save them in an ENV var
471
- upstream_task_ids = [t.task_id for t in self.get_direct_relatives(upstream=True)]
508
+ # Check if operator is properly assigned to a DAG
509
+ try:
510
+ # Try to access the dag property - this will raise RuntimeError if not assigned
511
+ dag = self.dag
512
+ if dag is None:
513
+ logger.warning("[StarflowK8sBranchOperator] Operator DAG is None, skipping upstream task IDs")
514
+ upstream_task_ids = []
515
+ else:
516
+ upstream_task_ids = [t.task_id for t in self.get_direct_relatives(upstream=True)]
517
+ logger.info(f"[StarflowK8sBranchOperator] Upstream task IDs: {upstream_task_ids}")
518
+ except RuntimeError as e:
519
+ if "has not been assigned to a DAG yet" in str(e):
520
+ logger.warning("[StarflowK8sBranchOperator] Operator not assigned to a DAG, skipping upstream task IDs")
521
+ upstream_task_ids = []
522
+ else:
523
+ raise
472
524
  self.env_vars.append({
473
525
  'name': 'AIRFLOW_UPSTREAM_TASKS_IDS',
474
526
  'value': str(upstream_task_ids),
@@ -518,19 +570,49 @@ class starflowKubernetesBranchPodOperator(KubernetesPodOperator, BaseBranchOpera
518
570
  """
519
571
  Code from here onward is executed by the Worker and not by the Scheduler.
520
572
  """
521
- # TODO change url based on platform configuration
522
- self.starflow_client = starflowBackendRestClient(base_url="http://starflow-sa-rest-service.prochain-starflow:8000/")
523
- self._prepare_execute_environment(context=context)
524
- remote_pod = None
573
+ import logging
574
+ logger = logging.getLogger(__name__)
575
+
576
+ # Log at the very beginning to confirm execute method is called
577
+ logger.info("=" * 80)
578
+ logger.info(f"[StarflowK8sBranchOperator] EXECUTE METHOD CALLED - task_id: {self.task_id}")
579
+ logger.info(f"[StarflowK8sBranchOperator] Context type: {type(context)}")
580
+ logger.info(f"[StarflowK8sBranchOperator] Context keys: {list(context.keys()) if hasattr(context, 'keys') else 'No keys method'}")
581
+ logger.info("=" * 80)
582
+
525
583
  try:
584
+ logger.info(f"[StarflowK8sBranchOperator] Starting execution for task_id: {self.task_id}")
585
+
586
+ # Initialize remote_pod early to avoid UnboundLocalError in finally block
587
+ remote_pod = None
588
+
589
+ # TODO change url based on platform configuration
590
+ logger.info("[StarflowK8sBranchOperator] Initializing Starflow backend client...")
591
+ self.starflow_client = starflowBackendRestClient(base_url="http://starflow-rest-service.starflow:8000/")
592
+ logger.info("[StarflowK8sBranchOperator] Starflow backend client initialized successfully")
593
+
594
+ logger.info("[StarflowK8sBranchOperator] Preparing execute environment...")
595
+ self._prepare_execute_environment(context=context)
596
+ logger.info("[StarflowK8sBranchOperator] Execute environment prepared successfully")
597
+ logger.info("[StarflowK8sBranchOperator] Building pod request object...")
526
598
  self.pod_request_obj = self.build_pod_request_obj(context)
599
+ logger.info("[StarflowK8sBranchOperator] Pod request object built successfully")
600
+
601
+ logger.info("[StarflowK8sBranchOperator] Creating/getting pod...")
527
602
  self.pod = self.get_or_create_pod( # must set `self.pod` for `on_kill`
528
603
  pod_request_obj=self.pod_request_obj,
529
604
  context=context,
530
605
  )
606
+ logger.info(f"[StarflowK8sBranchOperator] Pod created successfully: {self.pod.metadata.name}")
607
+
531
608
  # get remote pod for use in cleanup methods
609
+ logger.info("[StarflowK8sBranchOperator] Finding remote pod...")
532
610
  remote_pod = self.find_pod(self.pod.metadata.namespace, context=context)
611
+ logger.info("[StarflowK8sBranchOperator] Remote pod found successfully")
612
+
613
+ logger.info("[StarflowK8sBranchOperator] Awaiting pod start...")
533
614
  self.await_pod_start(pod=self.pod)
615
+ logger.info("[StarflowK8sBranchOperator] Pod started successfully")
534
616
 
535
617
  if self.get_logs:
536
618
  self.pod_manager.fetch_container_logs(
@@ -34,6 +34,18 @@ class starflowKubernetesPodOperator(KubernetesPodOperator):
34
34
  container_resources: Optional[Dict] = None,
35
35
  **k8s_operator_kwargs
36
36
  ):
37
+ import logging
38
+ logger = logging.getLogger(__name__)
39
+
40
+ logger.info("=" * 80)
41
+ logger.info(f"[StarflowK8sOperator] INIT METHOD CALLED - task_id: {task_id}")
42
+ logger.info(f"[StarflowK8sOperator] dag_id: {dag_id}")
43
+ logger.info(f"[StarflowK8sOperator] piece_name: {piece_name}")
44
+ logger.info(f"[StarflowK8sOperator] repository_url: {repository_url}")
45
+ logger.info(f"[StarflowK8sOperator] repository_version: {repository_version}")
46
+ logger.info(f"[StarflowK8sOperator] workspace_id: {workspace_id}")
47
+ logger.info("=" * 80)
48
+
37
49
  self.logger = get_configured_logger("starflowKubernetesPodOperator")
38
50
  self.task_id = task_id
39
51
  self.piece_name = piece_name
@@ -45,6 +57,10 @@ class starflowKubernetesPodOperator(KubernetesPodOperator):
45
57
  self.workflow_shared_storage = workflow_shared_storage
46
58
  self.piece_source_image = k8s_operator_kwargs["image"]
47
59
  self.username_var = username
60
+
61
+ # Initialize attributes needed by build_pod_request_obj
62
+ self.task_id_replaced = self.task_id.lower().replace("_", "-") # doing this because airflow doesn't allow underscores and upper case in mount names and max len is 63
63
+ self.shared_storage_base_mount_path = '/home/shared_storage'
48
64
 
49
65
  # Environment variables
50
66
  pod_env_vars = {
@@ -195,15 +211,31 @@ class starflowKubernetesPodOperator(KubernetesPodOperator):
195
211
  def build_pod_request_obj(self, context: Optional['Context'] = None) -> k8s.V1Pod:
196
212
  """
197
213
  We override this method to add the shared storage to the pod.
198
- This function runs after our own self.execute, by super().execute()
214
+ This function is called by the parent class's execute method.
199
215
  """
200
216
  pod = super().build_pod_request_obj(context)
201
- self.task_id_replaced = self.task_id.lower().replace("_", "-") # doing this because airflow doesn't allow underscores and upper case in mount names and max len is 63
202
- self.shared_storage_base_mount_path = '/home/shared_storage'
217
+
218
+ # Fix pod name to comply with Kubernetes naming rules (no underscores)
219
+ if pod.metadata.name:
220
+ # Replace underscores with hyphens and ensure it's lowercase
221
+ pod.metadata.name = pod.metadata.name.lower().replace("_", "-")
222
+ # Ensure it starts and ends with alphanumeric characters
223
+ pod.metadata.name = pod.metadata.name.strip("-")
224
+ if not pod.metadata.name[0].isalnum():
225
+ pod.metadata.name = "pod-" + pod.metadata.name
226
+ if not pod.metadata.name[-1].isalnum():
227
+ pod.metadata.name = pod.metadata.name + "-pod"
203
228
 
204
229
  pod.metadata.namespace = "airflow"
205
230
  pod.spec.image_pull_secrets = [k8s.V1LocalObjectReference('ghcr-login-secret')]
206
231
 
232
+ # Override xcom sidecar image to use Harbor public registry instead of quay.io
233
+ if pod.spec.containers:
234
+ for container in pod.spec.containers:
235
+ if container.name == "airflow-xcom-sidecar":
236
+ container.image = "harbor.prochain.local/public/alpine:latest"
237
+ self.logger.info(f"[StarflowK8sOperator] Updated xcom sidecar image to: {container.image}")
238
+
207
239
  # if not self.workflow_shared_storage or self.workflow_shared_storage.mode.name == 'none':
208
240
  # return pod
209
241
  # if self.workflow_shared_storage.source.name in ["aws_s3", "gcs"]:
@@ -213,7 +245,6 @@ class starflowKubernetesPodOperator(KubernetesPodOperator):
213
245
 
214
246
  return pod
215
247
 
216
-
217
248
  def add_local_shared_storage_volumes(self, pod: k8s.V1Pod) -> k8s.V1Pod:
218
249
  """Adds local shared storage volumes to the pod."""
219
250
  pod_cp = copy.deepcopy(pod)
@@ -373,30 +404,74 @@ class starflowKubernetesPodOperator(KubernetesPodOperator):
373
404
  source: str = 'github'
374
405
  ) -> Dict[str, Any]:
375
406
  """Get piece secrets values from starflow API"""
376
- params = {
377
- "workspace_id": self.workspace_id,
378
- "url": repository_url,
379
- "version": repository_version,
380
- 'source': source,
381
- "page": 0,
382
- "page_size": 1,
383
- }
384
- piece_repository_data = self.starflow_client.get_piece_repositories_from_workspace_id(
385
- params=params
386
- ).json()
387
- secrets_response = self.starflow_client.get_piece_secrets(
388
- piece_repository_id=piece_repository_data["data"][0]["id"],
389
- piece_name=piece_name
390
- )
391
- if secrets_response.status_code != 200:
392
- raise Exception(f"Error getting piece secrets: {secrets_response.json()}")
393
-
394
- piece_secrets = {}
395
- for e in secrets_response.json():
396
- if not e.get('value') and not e.get('required'):
397
- continue
398
- piece_secrets[e.get('name')] = e.get('value')
399
- return piece_secrets
407
+ import logging
408
+ logger = logging.getLogger(__name__)
409
+
410
+ try:
411
+ logger.info(f"[StarflowK8sOperator] Getting piece secrets for piece: {piece_name}")
412
+ logger.info(f"[StarflowK8sOperator] Repository URL: {repository_url}")
413
+ logger.info(f"[StarflowK8sOperator] Repository version: {repository_version}")
414
+ logger.info(f"[StarflowK8sOperator] Workspace ID: {self.workspace_id}")
415
+
416
+ params = {
417
+ "workspace_id": self.workspace_id,
418
+ "url": repository_url,
419
+ "version": repository_version,
420
+ 'source': source,
421
+ "page": 0,
422
+ "page_size": 1,
423
+ }
424
+
425
+ logger.info("[StarflowK8sOperator] Calling get_piece_repositories_from_workspace_id...")
426
+ piece_repository_response = self.starflow_client.get_piece_repositories_from_workspace_id(
427
+ params=params
428
+ )
429
+ logger.info(f"[StarflowK8sOperator] get_piece_repositories_from_workspace_id response status: {piece_repository_response.status_code}")
430
+
431
+ piece_repository_data = piece_repository_response.json()
432
+ logger.info(f"[StarflowK8sOperator] Piece repository data: {piece_repository_data}")
433
+
434
+ # Check if any piece repositories were found
435
+ if not piece_repository_data.get("data") or len(piece_repository_data["data"]) == 0:
436
+ logger.warning(f"[StarflowK8sOperator] No piece repositories found for workspace_id={self.workspace_id}, url={repository_url}, version={repository_version}")
437
+ logger.info("[StarflowK8sOperator] Returning empty secrets dict")
438
+ return {}
439
+
440
+ piece_repository_id = piece_repository_data["data"][0]["id"]
441
+ logger.info(f"[StarflowK8sOperator] Piece repository ID: {piece_repository_id}")
442
+
443
+ logger.info("[StarflowK8sOperator] Calling get_piece_secrets...")
444
+ secrets_response = self.starflow_client.get_piece_secrets(
445
+ piece_repository_id=piece_repository_id,
446
+ piece_name=piece_name
447
+ )
448
+ logger.info(f"[StarflowK8sOperator] get_piece_secrets response status: {secrets_response.status_code}")
449
+
450
+ if secrets_response.status_code == 404:
451
+ logger.info("[StarflowK8sOperator] No secrets configured for this piece, returning empty dict")
452
+ return {}
453
+ elif secrets_response.status_code != 200:
454
+ logger.error(f"[StarflowK8sOperator] Error getting piece secrets: {secrets_response.json()}")
455
+ raise Exception(f"Error getting piece secrets: {secrets_response.json()}")
456
+
457
+ secrets_data = secrets_response.json()
458
+ logger.info(f"[StarflowK8sOperator] Secrets data: {secrets_data}")
459
+
460
+ piece_secrets = {}
461
+ for e in secrets_data:
462
+ if not e.get('value') and not e.get('required'):
463
+ continue
464
+ piece_secrets[e.get('name')] = e.get('value')
465
+
466
+ logger.info(f"[StarflowK8sOperator] Piece secrets processed: {list(piece_secrets.keys())}")
467
+ return piece_secrets
468
+
469
+ except Exception as e:
470
+ logger.error(f"[StarflowK8sOperator] Error in _get_piece_secrets: {str(e)}")
471
+ logger.error(f"[StarflowK8sOperator] Exception type: {type(e).__name__}")
472
+ import traceback
473
+ logger.error(f"[StarflowK8sOperator] Traceback: {traceback.format_exc()}")
474
+ raise
400
475
 
401
476
 
402
477
  @staticmethod
@@ -454,30 +529,80 @@ class starflowKubernetesPodOperator(KubernetesPodOperator):
454
529
  Runs at the begining of the execute method.
455
530
  Pass extra arguments and configuration as environment variables to the pod
456
531
  """
457
- # Fetch upstream tasks ids and save them in an ENV var
458
- upstream_task_ids = [t.task_id for t in self.get_direct_relatives(upstream=True)]
459
- self.env_vars.append({
460
- 'name': 'AIRFLOW_UPSTREAM_TASKS_IDS',
461
- 'value': str(upstream_task_ids),
462
- 'value_from': None
463
- })
464
- # Pass forward the workflow shared storage source name
465
- self.env_vars.append({
466
- 'name': 'STARFLOW_WORKFLOW_SHARED_STORAGE_SOURCE_NAME',
467
- 'value': str(self.workflow_shared_storage.source.name) if self.workflow_shared_storage else None,
468
- 'value_from': None
469
- })
470
- # Save updated piece input kwargs with upstream data to environment variable
471
- self.upstream_xcoms_data = self._get_upstream_xcom_data_from_task_ids(task_ids=upstream_task_ids, context=context)
472
- self._update_piece_kwargs_with_upstream_xcom()
532
+ import logging
533
+ logger = logging.getLogger(__name__)
534
+
535
+ try:
536
+ logger.info("[StarflowK8sOperator] Fetching upstream task IDs...")
537
+ # Fetch upstream tasks ids and save them in an ENV var
538
+ # Check if operator is properly assigned to a DAG
539
+ try:
540
+ # Try to access the dag property - this will raise RuntimeError if not assigned
541
+ dag = self.dag
542
+ if dag is None:
543
+ logger.warning("[StarflowK8sOperator] Operator DAG is None, skipping upstream task IDs")
544
+ upstream_task_ids = []
545
+ else:
546
+ upstream_task_ids = [t.task_id for t in self.get_direct_relatives(upstream=True)]
547
+ logger.info(f"[StarflowK8sOperator] Upstream task IDs: {upstream_task_ids}")
548
+ except RuntimeError as e:
549
+ if "has not been assigned to a DAG yet" in str(e):
550
+ logger.warning("[StarflowK8sOperator] Operator not assigned to a DAG, skipping upstream task IDs")
551
+ upstream_task_ids = []
552
+ else:
553
+ raise
554
+
555
+ self.env_vars.append({
556
+ 'name': 'AIRFLOW_UPSTREAM_TASKS_IDS',
557
+ 'value': str(upstream_task_ids),
558
+ 'value_from': None
559
+ })
560
+
561
+ # Pass forward the workflow shared storage source name
562
+ logger.info("[StarflowK8sOperator] Setting workflow shared storage environment variable...")
563
+ self.env_vars.append({
564
+ 'name': 'STARFLOW_WORKFLOW_SHARED_STORAGE_SOURCE_NAME',
565
+ 'value': str(self.workflow_shared_storage.source.name) if self.workflow_shared_storage else None,
566
+ 'value_from': None
567
+ })
568
+
569
+ # Save updated piece input kwargs with upstream data to environment variable
570
+ logger.info("[StarflowK8sOperator] Getting upstream XCom data...")
571
+ self.upstream_xcoms_data = self._get_upstream_xcom_data_from_task_ids(task_ids=upstream_task_ids, context=context)
572
+ logger.info("[StarflowK8sOperator] Upstream XCom data retrieved successfully")
573
+
574
+ logger.info("[StarflowK8sOperator] Updating piece kwargs with upstream XCom...")
575
+ self._update_piece_kwargs_with_upstream_xcom()
576
+ logger.info("[StarflowK8sOperator] Piece kwargs updated successfully")
577
+
578
+ logger.info("[StarflowK8sOperator] Getting piece secrets...")
579
+ piece_secrets = self._get_piece_secrets(
580
+ repository_url=self.repository_url,
581
+ repository_version=self.repository_version,
582
+ piece_name=self.piece_name,
583
+ source='github'
584
+ )
585
+ logger.info(f"[StarflowK8sOperator] Piece secrets retrieved: {list(piece_secrets.keys())}")
586
+
587
+ logger.info("[StarflowK8sOperator] Setting piece secrets environment variables...")
588
+ for secret_name, secret_value in piece_secrets.items():
589
+ self.env_vars.append({
590
+ 'name': secret_name,
591
+ 'value': str(secret_value),
592
+ 'value_from': None
593
+ })
594
+ logger.info("[StarflowK8sOperator] Piece secrets environment variables set successfully")
595
+
596
+ except Exception as e:
597
+ logger.error(f"[StarflowK8sOperator] Error in _prepare_execute_environment: {str(e)}")
598
+ logger.error(f"[StarflowK8sOperator] Exception type: {type(e).__name__}")
599
+ import traceback
600
+ logger.error(f"[StarflowK8sOperator] Traceback: {traceback.format_exc()}")
601
+ raise
602
+
603
+ logger.info("[StarflowK8sOperator] Updating STARFLOW_RUN_PIECE_KWARGS environment variable...")
473
604
  self._update_env_var_value_from_name(name='STARFLOW_RUN_PIECE_KWARGS', value=str(self.piece_input_kwargs))
474
-
475
- # # Add pieces secrets to environment variables
476
- # piece_secrets = self._get_piece_secrets(
477
- # repository_url=self.repository_url,
478
- # repository_version=self.repository_version,
479
- # piece_name=self.piece_name,
480
- # )
605
+ logger.info("[StarflowK8sOperator] STARFLOW_RUN_PIECE_KWARGS environment variable updated successfully")
481
606
  # self.env_vars.append({
482
607
  # "name": "STARFLOW_PIECE_SECRETS",
483
608
  # "value": str(piece_secrets),
@@ -505,19 +630,49 @@ class starflowKubernetesPodOperator(KubernetesPodOperator):
505
630
  """
506
631
  Code from here onward is executed by the Worker and not by the Scheduler.
507
632
  """
508
- # TODO change url based on platform configuration
509
- self.starflow_client = starflowBackendRestClient(base_url="http://starflow-sa-rest-service.prochain-starflow:8000/")
510
- self._prepare_execute_environment(context=context)
511
- remote_pod = None
633
+ import logging
634
+ logger = logging.getLogger(__name__)
635
+
636
+ # Log at the very beginning to confirm execute method is called
637
+ logger.info("=" * 80)
638
+ logger.info(f"[StarflowK8sOperator] EXECUTE METHOD CALLED - task_id: {self.task_id}")
639
+ logger.info(f"[StarflowK8sOperator] Context type: {type(context)}")
640
+ logger.info(f"[StarflowK8sOperator] Context keys: {list(context.keys()) if hasattr(context, 'keys') else 'No keys method'}")
641
+ logger.info("=" * 80)
642
+
512
643
  try:
644
+ logger.info(f"[StarflowK8sOperator] Starting execution for task_id: {self.task_id}")
645
+
646
+ # Initialize remote_pod early to avoid UnboundLocalError in finally block
647
+ remote_pod = None
648
+
649
+ # TODO change url based on platform configuration
650
+ logger.info("[StarflowK8sOperator] Initializing Starflow backend client...")
651
+ self.starflow_client = starflowBackendRestClient(base_url="http://starflow-rest-service.starflow:8000/")
652
+ logger.info("[StarflowK8sOperator] Starflow backend client initialized successfully")
653
+
654
+ logger.info("[StarflowK8sOperator] Preparing execute environment...")
655
+ self._prepare_execute_environment(context=context)
656
+ logger.info("[StarflowK8sOperator] Execute environment prepared successfully")
657
+ logger.info("[StarflowK8sOperator] Building pod request object...")
513
658
  self.pod_request_obj = self.build_pod_request_obj(context)
659
+ logger.info("[StarflowK8sOperator] Pod request object built successfully")
660
+
661
+ logger.info("[StarflowK8sOperator] Creating/getting pod...")
514
662
  self.pod = self.get_or_create_pod( # must set `self.pod` for `on_kill`
515
663
  pod_request_obj=self.pod_request_obj,
516
664
  context=context,
517
665
  )
666
+ logger.info(f"[StarflowK8sOperator] Pod created successfully: {self.pod.metadata.name}")
667
+
518
668
  # get remote pod for use in cleanup methods
669
+ logger.info("[StarflowK8sOperator] Finding remote pod...")
519
670
  remote_pod = self.find_pod(self.pod.metadata.namespace, context=context)
671
+ logger.info("[StarflowK8sOperator] Remote pod found successfully")
672
+
673
+ logger.info("[StarflowK8sOperator] Awaiting pod start...")
520
674
  self.await_pod_start(pod=self.pod)
675
+ logger.info("[StarflowK8sOperator] Pod started successfully")
521
676
 
522
677
  if self.get_logs:
523
678
  self.pod_manager.fetch_container_logs(
@@ -66,10 +66,26 @@ class Task(object):
66
66
  self.container_resources = container_resources
67
67
 
68
68
  # Get deploy mode
69
- self.deploy_mode = os.environ.get('STARFLOW_DEPLOY_MODE')
69
+ self.deploy_mode = os.environ.get('STARFLOW_DEPLOY_MODE', 'prod')
70
70
 
71
71
  # Set up task operator
72
72
  self._task_operator = self._set_operator()
73
+
74
+ def _construct_image_url(self) -> str:
75
+ """Construct the Docker image URL from repository_url and piece name"""
76
+ # Extract the base image name from repository_url
77
+ # repository_url is like: ghcr.io/prochain-star-atlas/prochain-starflow-pieces:0.1.75-group0
78
+ # We need to construct: ghcr.io/prochain-star-atlas/prochain-starflow-pieces:0.1.75-group0
79
+
80
+ # For now, use the repository_url as the base and append the piece name
81
+ # This matches the pattern we see in the logs
82
+ base_image = self.repository_url
83
+ if ':' in base_image:
84
+ # If it already has a tag, use it as is
85
+ return base_image
86
+ else:
87
+ # If no tag, append the version
88
+ return f"{base_image}:{self.repository_version}"
73
89
 
74
90
  def _set_operator(self) -> BaseOperator:
75
91
  """
@@ -96,7 +112,7 @@ class Task(object):
96
112
  container_resources=self.container_resources,
97
113
  # ----------------- Kubernetes -----------------
98
114
  namespace='airflow',
99
- image=self.piece.get("source_image"),
115
+ image=self.piece.get("source_image") or self._construct_image_url(),
100
116
  image_pull_policy='IfNotPresent',
101
117
  name=f"airflow-worker-pod-{self.task_id}",
102
118
  startup_timeout_seconds=600,
@@ -70,7 +70,7 @@ class TaskBranch(object):
70
70
  self.container_resources = container_resources
71
71
 
72
72
  # Get deploy mode
73
- self.deploy_mode = os.environ.get('STARFLOW_DEPLOY_MODE')
73
+ self.deploy_mode = os.environ.get('STARFLOW_DEPLOY_MODE', 'prod')
74
74
 
75
75
  # Set up task operator
76
76
  self._task_operator = self._set_operator()
@@ -104,7 +104,7 @@ class TaskBranch(object):
104
104
  namespace='airflow',
105
105
  image=self.piece.get("source_image"),
106
106
  image_pull_policy='IfNotPresent',
107
- name=f"airflow-worker-pod-{self.task_id}",
107
+ name=f"airflow-worker-pod-{self.task_id.lower().replace('_', '-')}",
108
108
  startup_timeout_seconds=600,
109
109
  annotations={"sidecar.istio.io/inject": "false"}, # TODO - remove this when istio is working with airflow k8s pod
110
110
  # cmds=["/bin/bash"],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: starflow-py
3
- Version: 0.49.0
3
+ Version: 0.57.0
4
4
  Summary: Python package for starflow.
5
5
  Author-email: Alexandre Lazar <alexandre.lazar@gmail.com>
6
6
  License:
@@ -1 +0,0 @@
1
- 0.49.0
File without changes
File without changes
File without changes