PyPI - so-campaign-manager - Versions diffs - 0.0.4__py3-none-any.whl - Mend

so-campaign-manager 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

so_campaign_manager-0.0.4.dist-info/METADATA +179 -0
so_campaign_manager-0.0.4.dist-info/RECORD +44 -0
so_campaign_manager-0.0.4.dist-info/WHEEL +5 -0
so_campaign_manager-0.0.4.dist-info/entry_points.txt +2 -0
so_campaign_manager-0.0.4.dist-info/licenses/LICENSE +24 -0
so_campaign_manager-0.0.4.dist-info/top_level.txt +1 -0
socm/__about__.py +34 -0
socm/__init__.py +0 -0
socm/__main__.py +35 -0
socm/bookkeeper/__init__.py +1 -0
socm/bookkeeper/bookkeeper.py +488 -0
socm/configs/slurmise.toml +2 -0
socm/core/__init__.py +1 -0
socm/core/models.py +235 -0
socm/enactor/__init__.py +3 -0
socm/enactor/base.py +123 -0
socm/enactor/dryrun_enactor.py +216 -0
socm/enactor/rp_enactor.py +273 -0
socm/execs/__init__.py +3 -0
socm/execs/mapmaking.py +73 -0
socm/planner/__init__.py +2 -0
socm/planner/base.py +87 -0
socm/planner/heft_planner.py +442 -0
socm/resources/__init__.py +5 -0
socm/resources/perlmutter.py +22 -0
socm/resources/tiger.py +24 -0
socm/resources/universe.py +18 -0
socm/utils/__init__.py +0 -0
socm/utils/misc.py +90 -0
socm/utils/states.py +17 -0
socm/workflows/__init__.py +41 -0
socm/workflows/ml_mapmaking.py +111 -0
socm/workflows/ml_null_tests/__init__.py +10 -0
socm/workflows/ml_null_tests/base.py +117 -0
socm/workflows/ml_null_tests/day_night_null_test.py +132 -0
socm/workflows/ml_null_tests/direction_null_test.py +133 -0
socm/workflows/ml_null_tests/elevation_null_test.py +118 -0
socm/workflows/ml_null_tests/moon_close_null_test.py +165 -0
socm/workflows/ml_null_tests/moonrise_set_null_test.py +151 -0
socm/workflows/ml_null_tests/pwv_null_test.py +118 -0
socm/workflows/ml_null_tests/sun_close_null_test.py +173 -0
socm/workflows/ml_null_tests/time_null_test.py +76 -0
socm/workflows/ml_null_tests/wafer_null_test.py +175 -0
socm/workflows/sat_simulation.py +76 -0

socm/core/models.py ADDED Viewed

@@ -0,0 +1,235 @@
+from collections.abc import Iterable
+from numbers import Number
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union, get_args, get_origin
+from pydantic import BaseModel, Field, PrivateAttr
+if TYPE_CHECKING:
+    from radical.pilot import TaskDescription
+class QosPolicy(BaseModel):
+    name: str
+    max_walltime: Optional[int] = None  # in minutes
+    max_jobs: Optional[int] = None
+    max_cores: Optional[int] = None
+class Resource(BaseModel):
+    name: str
+    nodes: int
+    cores_per_node: int
+    memory_per_node: int
+    qos: List[QosPolicy] = Field(default_factory=list)
+    _existing_jobs: Dict[str, List[Tuple[str, int, int]]] = PrivateAttr(default_factory=dict)
+    def fits_in_qos(self, walltime: int, cores: int) -> QosPolicy | None:
+        """
+        Check if the given walltime and cores fit within the specified QoS policy.
+        Args:
+            walltime (int): The requested walltime in minutes.
+            cores (int): The requested number of cores.
+        Returns:
+            QosPolicy | None: The matching QoS policy object or None if no match is found.
+        """
+        # What happens when the job does not fit in the best possible QoS?
+        for policy in self.qos:
+            existing_jobs = self._existing_jobs.get(policy.name, [])
+            # Check walltime constraint (None means unlimited)
+            if policy.max_walltime is not None and policy.max_walltime < walltime:
+                continue
+            # Check cores constraint (None means unlimited)
+            if policy.max_cores is not None:
+                remaining_cores = policy.max_cores - sum(job[2] for job in existing_jobs)
+                if remaining_cores < cores:
+                    continue
+            # Check max jobs constraint (None means unlimited)
+            if policy.max_jobs is not None and len(existing_jobs) >= policy.max_jobs:
+                continue
+            return policy
+        return None
+    def register_job(self, job_id: str, walltime: int, cores: int) -> bool:
+        """
+        Register a job with the resource if it fits within the QoS policies.
+        Args:
+            job_id (str): The unique identifier for the job.
+            walltime (int): The requested walltime in minutes.
+            cores (int): The requested number of cores.
+        Returns:
+            bool: True if the job was registered successfully, False otherwise.
+        """
+        qos_policy = self.fits_in_qos(walltime, cores)
+        if qos_policy:
+            qos_name = qos_policy.name
+            existing_jobs = self._existing_jobs.get(qos_name, [])
+            existing_jobs.append((job_id, walltime, cores))
+            self._existing_jobs[qos_name] = existing_jobs
+            return True
+        return False
+class Workflow(BaseModel):
+    name: str
+    executable: str
+    context: str
+    subcommand: str = ""
+    id: Optional[int] = None
+    environment: Optional[Dict[str, str]] = None
+    resources: Optional[Dict[str, int | float]] = None
+    model_config = {
+        "extra": "allow",
+    }
+    def get_command(self, **kargs) -> str:
+        raise NotImplementedError("This method should be implemented in subclasses")
+    def get_arguments(self, **kargs) -> str:
+        raise NotImplementedError("This method should be implemented in subclasses")
+    def get_numeric_fields(self, avoid_attributes: List[str] | None = None) -> List[str]:
+        """
+        Returns a list of field names that are either numeric types
+        or iterable collections of numeric types.
+        Uses Pydantic v2 model_fields for type introspection.
+        Returns:
+            List[str]: Field names with numeric values
+        """
+        if avoid_attributes is None:
+            avoid_attributes = []
+        numeric_fields = []
+        # Get field information from Pydantic v2 model_fields
+        for field_name, field_info in self.__class__.model_fields.items():
+            # Get the annotation type
+            if field_name in avoid_attributes or getattr(self, field_name) is None:
+                continue
+            field_type = field_info.annotation
+            # Check for direct numeric types
+            if isinstance(field_type, type) and issubclass(field_type, Number):
+                numeric_fields.append(field_name)
+                continue
+            # Check for complex types (Optional, List, etc)
+            origin = get_origin(field_type)
+            if origin is not None:
+                args = get_args(field_type)
+                # Check for Optional numeric types
+                if origin is Union:
+                    for arg in args:
+                        if isinstance(arg, type) and issubclass(arg, Number):
+                            numeric_fields.append(field_name)
+                            break
+                # Check for iterables of numbers
+                elif issubclass(origin, Iterable):
+                    # Check if it's a parameterized generic like List[int]
+                    if args and len(args) > 0:
+                        element_type = args[0]
+                        if isinstance(element_type, type) and issubclass(element_type, Number):
+                            numeric_fields.append(field_name)
+        # Also check actual instance values for numeric fields not captured by annotations
+        for field_name, value in self.__dict__.items():
+            if field_name not in numeric_fields and field_name not in avoid_attributes:
+                if isinstance(value, Number):
+                    numeric_fields.append(field_name)
+                elif isinstance(value, Iterable) and not isinstance(value, (str, bytes, dict)):
+                    # Check if all elements are numbers
+                    try:
+                        if all(isinstance(item, Number) for item in value):
+                            numeric_fields.append(field_name)
+                    except (TypeError, ValueError):
+                        pass
+        return numeric_fields
+    def get_categorical_fields(self, avoid_attributes: List[str] | None = None) -> List[str]:
+        """
+        Returns a list of field names that are either string types
+        or iterable collections of string types.
+        Uses Pydantic v2 model_fields for type introspection.
+        Returns:
+            List[str]: Field names with categorical (string) values
+        """
+        if avoid_attributes is None:
+            avoid_attributes = []
+        categorical_fields = []
+        # Get field information from Pydantic v2 model_fields
+        for field_name, field_info in self.__class__.model_fields.items():
+            # Get the annotation type
+            if field_name in avoid_attributes or getattr(self, field_name) is None:
+                continue
+            field_type = field_info.annotation
+            # Check for direct numeric types
+            if isinstance(field_type, type) and issubclass(field_type, str):
+                categorical_fields.append(field_name)
+                continue
+            # Check for complex types (Optional, List, etc)
+            origin = get_origin(field_type)
+            if origin is not None:
+                args = get_args(field_type)
+                # Check for Optional numeric types
+                if origin is Union:
+                    for arg in args:
+                        if isinstance(arg, type) and issubclass(arg, str):
+                            categorical_fields.append(field_name)
+                            break
+                # Check for iterables of numbers
+                elif issubclass(origin, Iterable):
+                    # Check if it's a parameterized generic like List[int]
+                    if args and len(args) > 0:
+                        element_type = args[0]
+                        if isinstance(element_type, type) and issubclass(element_type, str):
+                            categorical_fields.append(field_name)
+        # Also check actual instance values for numeric fields not captured by annotations
+        for field_name, value in self.__dict__.items():
+            if field_name not in categorical_fields and field_name not in avoid_attributes:
+                if isinstance(value, str):
+                    categorical_fields.append(field_name)
+                elif isinstance(value, Iterable) and not isinstance(value, (Number, bytes, dict)):
+                    # Check if all elements are numbers
+                    try:
+                        if all(isinstance(item, str) for item in value):
+                            categorical_fields.append(field_name)
+                    except (TypeError, ValueError):
+                        pass
+        return categorical_fields
+    def get_tasks(self) -> List["TaskDescription"]:
+        """
+        Returns a list of TaskDescription objects for the workflow.
+        This is a placeholder method and should be implemented in subclasses.
+        """
+        raise NotImplementedError("This method should be implemented in subclasses")
+class Campaign(BaseModel):
+    id: int
+    workflows: List[Workflow]
+    deadline: str
+    target_resource: str = "tiger3"
+    campaign_policy: str = "time"
+    execution_schema: str = "batch"
+    requested_resources: int = 0

socm/enactor/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .base import Enactor  # noqa: F401
+from .dryrun_enactor import DryrunEnactor  # noqa: F401
+from .rp_enactor import RPEnactor  # noqa: F401

socm/enactor/base.py ADDED Viewed

@@ -0,0 +1,123 @@
+import os
+from typing import Dict, List
+import radical.utils as ru
+from socm.core import Resource
+from socm.utils.states import States
+class Enactor(object):
+    """
+    The Enactor is responsible to execute workflows on resources. The Enactor
+    takes as input a list of tuples <workflow,resource> and executes the
+    workflows on their selected resources.
+    The Enactor offers a set of methods to execute and monitor workflows.
+    *Parameters:*
+    *workflows*: A list with the workflow IDs that are executing.
+    *execution_status*: a hash table table that holds the state, and
+                      execution status.
+    *logger*: a logging object.
+    """
+    def __init__(self, sid=None):
+        self._worflows = list()  # A list of workflows IDs
+        # This will a hash table of workflows. The table will include the
+        # following:
+        # 'workflowsID': {'state': The state of the workflow based on the WFM,
+        #                 'endpoint': Process ID or object to WMF for the specific
+        #                             workflow,
+        #                 'start_time': Epoch of when the workflow is submitted
+        #                               to the WMF,
+        #                 'end_time': Epoch of when the workflow finished.}
+        self._execution_status = dict()  # This will create a hash table of workflows
+        self._uid = ru.generate_id("enactor.%(counter)04d", mode=ru.ID_CUSTOM, ns=sid)
+        path = os.getcwd() + "/" + sid
+        # print(path)
+        name = self._uid
+        self._logger = ru.Logger(name=self._uid, path=path, level="DEBUG")
+        self._prof = ru.Profiler(name=name, path=path)
+    def setup(self, resource: Resource, walltime: int, cores: int, execution_schema: str | None = None) -> None:
+        """
+        Sets up the enactor to execute workflows.
+        """
+        raise NotImplementedError("setup is not implemented")
+    def enact(self, workflows, resources):
+        """
+        Method enact receives a set workflows and resources. It is responsible to
+        start the execution of the workflow and set a endpoint to the WMF that
+        executes the workflow
+        *workflows:* A workflows that will execute on a resource
+        *resources:* The resource that will be used.
+        """
+        raise NotImplementedError("enact is not implemented")
+    def _monitor(self):
+        """
+        This method monitors the execution of workflows
+        """
+        raise NotImplementedError("_monitor is not implemented")
+    def get_status(self, workflows: str | List[str] | None = None) -> Dict[str, States]:
+        """
+        Get the state of a workflow or workflows.
+        *Parameter*
+        *workflows:* A workflow ID or a list of workflow IDs
+        *Returns*
+        *status*: A dictionary with the state of each workflow.
+        """
+        status = dict()
+        if workflows is None:
+            for workflow in self._execution_status:
+                status[workflow] = self._execution_status[workflow]["state"]
+        elif isinstance(workflows, list):
+            for workflow in workflows:
+                status[workflow] = self._execution_status[workflow]["state"]
+        else:
+            status[workflows] = self._execution_status[workflows]["state"]
+        return status
+    def update_status_cb(self, workflow, new_state):
+        """
+        Update the state of a workflow that is executing
+        """
+        if workflow not in self._execution_status:
+            self._logger.warning(
+                "Has not enacted on workflow %s yet.",
+                workflow,
+                self._get_workflow_state(workflow),
+            )
+        else:
+            self._execution_status[workflow]["state"] = new_state
+    def _get_workflow_state(self, workflow):
+        """
+        Get a workflow's update
+        """
+        return self._execution_status[workflow]["state"]
+    def terminate(self):
+        """
+        Public method to terminate the Enactor
+        """
+        raise NotImplementedError("terminate is not implemented")

socm/enactor/dryrun_enactor.py ADDED Viewed

@@ -0,0 +1,216 @@
+# Imports from general packages
+import os
+import threading as mt
+from copy import deepcopy
+from datetime import datetime
+from time import sleep
+from typing import Dict, List
+# Imports from dependent packages
+import radical.utils as ru
+from socm.core import Resource, Workflow
+from socm.enactor.base import Enactor
+from socm.utils.states import States
+class DryrunEnactor(Enactor):
+    """
+    The DryrunEnactor is responsible for simulating the execution of workflows
+    on resources without actually running them. It takes as input a list of
+    tuples <workflow, resource> and emulates the execution of workflows on their
+    selected resources for testing and validation purposes.
+    """
+    def __init__(self, sid: str):
+        super(DryrunEnactor, self).__init__(sid=sid)
+        # List with all the workflows that are executing and require to be
+        # monitored. This list is atomic and requires a lock
+        self._to_monitor = list()
+        os.environ["RADICAL_CONFIG_USER_DIR"] = os.path.join(
+            os.path.dirname(__file__) + "/../configs/"
+        )
+        self._prof.prof("enactor_setup", uid=self._uid)
+        # Lock to provide atomicity in the monitoring data structure
+        self._monitoring_lock = ru.RLock("cm.monitor_lock")
+        self._cb_lock = ru.RLock("enactor.cb_lock")
+        self._callbacks = dict()
+        # Creating a thread to execute the monitoring method.
+        self._monitoring_thread = None  # Private attribute that will hold the thread
+        self._terminate_monitor = mt.Event()  # Thread event to terminate.
+        self._run = False
+        self._resource = None
+        self._prof.prof("enactor_started", uid=self._uid)
+        self._logger.info("Enactor is ready")
+    def setup(self, resource: Resource, walltime: int, cores: int, execution_schema: str | None = None) -> None:
+        """
+        Sets up the enactor to execute workflows.
+        """
+        self._resource = resource
+    def enact(self, workflows: List[Workflow]) -> None:
+        """
+        Method enact receives a set workflows and resources. It is responsible to
+        start the execution of the workflow and set a endpoint to the WMF that
+        executes the workflow
+        *workflows:* A workflows that will execute on a resource
+        *resources:* The resource that will be used.
+        """
+        self._prof.prof("enacting_start", uid=self._uid)
+        for workflow in workflows:
+            # If the enactor has already received a workflow issue a warning and
+            # proceed.
+            if workflow.id in self._execution_status:
+                self._logger.info(
+                    "Workflow %s is in state %s",
+                    workflow,
+                    self._get_workflow_state(workflow.id).name,
+                )
+                continue
+            try:
+                # Create a calculator task. This is equivalent because with
+                # the emulated resources, a workflow is a number of operations
+                # that need to be executed.
+                with self._monitoring_lock:
+                    self._to_monitor.append(workflow.id)
+                    self._execution_status[workflow.id] = {
+                        "state": States.EXECUTING,
+                        "exec_thread": None,
+                        "start_time": datetime.now(),
+                        "end_time": None,
+                    }
+                for cb in self._callbacks:
+                    self._callbacks[cb](
+                        workflow_ids=[workflow.id],
+                        new_state=States.EXECUTING,
+                        step_ids=[None],
+                    )
+                # Execute the task.
+            except Exception as ex:
+                self._logger.error(f"Workflow {workflow} could not be executed")
+                self._logger.error(f"Exception raised {ex}", exc_info=True)
+        self._prof.prof("enacting_stop", uid=self._uid)
+        # If there is no monitoring tasks, start one.
+        if self._monitoring_thread is None and self._to_monitor:
+            self._logger.info("Starting monitor thread")
+            self._monitoring_thread = mt.Thread(
+                target=self._monitor, name="monitor-thread"
+            )
+            self._monitoring_thread.start()
+        sleep(1)
+    def _monitor(self):
+        """
+        **Purpose**: Thread in the master process to monitor the campaign execution
+                     data structure up to date.
+        """
+        while not self._terminate_monitor.is_set():
+            if self._to_monitor:
+                workflows_executing = [f"workflow.{workflow_id}" for workflow_id in self._to_monitor]
+                self._prof.prof("workflow_monitor_start", uid=self._uid)
+                # with self._monitoring_lock:
+                # It does not iterate correctly.
+                monitoring_list = deepcopy(self._to_monitor)
+                # self._logger.info("Monitoring workflows %s" % monitoring_list)
+                to_remove_wfs = list()
+                to_remove_sids = list()
+                self._logger.debug(f"Executing workflows: {workflows_executing}, monitoring list: {monitoring_list}")
+                for workflow_id in monitoring_list:
+                    if f"workflow.{workflow_id}" in workflows_executing:
+                        with self._monitoring_lock:
+                            self._logger.debug(f"workflow.{workflow_id} Done")
+                            self._execution_status[workflow_id]["state"] = States.DONE
+                            self._execution_status[workflow_id][
+                                "end_time"
+                            ] = datetime.now()
+                            self._logger.debug(
+                                "Workflow %s finished: %s, step_id: %s",
+                                workflow_id,
+                                self._execution_status[workflow_id]["end_time"],
+                                0,
+                            )
+                            to_remove_wfs.append(workflow_id)
+                            to_remove_sids.append(0)
+                        self._prof.prof("workflow_success", uid=self._uid)
+                if to_remove_wfs:
+                    for cb in self._callbacks:
+                        self._callbacks[cb](
+                            workflow_ids=to_remove_wfs,
+                            new_state=States.DONE,
+                            step_ids=to_remove_sids,
+                        )
+                    with self._monitoring_lock:
+                        for wid in to_remove_wfs:
+                            self._to_monitor.remove(wid)
+                self._prof.prof("workflow_monitor_end", uid=self._uid)
+    def get_status(self, workflows: str | List[str] | None = None) -> Dict[str, States]:
+        """
+        Get the state of a workflow or workflows.
+        *Parameter*
+        *workflows:* A workflow ID or a list of workflow IDs
+        *Returns*
+        *status*: A dictionary with the state of each workflow.
+        """
+        status = dict()
+        if workflows is None:
+            for workflow in self._execution_status:
+                status[workflow] = self._execution_status[workflow]["state"]
+        elif isinstance(workflows, list):
+            for workflow in workflows:
+                status[workflow] = self._execution_status[workflow]["state"]
+        else:
+            status[workflows] = self._execution_status[workflows]["state"]
+        return status
+    def update_status(self, workflow, new_state):
+        """
+        Update the state of a workflow that is executing
+        """
+        if workflow not in self._execution_status:
+            self._logger.warning(
+                "Has not enacted on workflow %s yet.",
+                workflow,
+            )
+        else:
+            self._execution_status[workflow]["state"] = new_state
+    def terminate(self):
+        """
+        Public method to terminate the Enactor
+        """
+        self._logger.info("Start terminating procedure")
+        self._prof.prof("str_terminating", uid=self._uid)
+        if self._monitoring_thread:
+            self._prof.prof("monitor_terminate", uid=self._uid)
+            self._terminate_monitor.set()
+            self._monitoring_thread.join()
+            self._prof.prof("monitor_terminated", uid=self._uid)
+        self._logger.debug("Monitor thread terminated")
+        self._logger.debug("Enactor thread terminated")
+    def register_state_cb(self, cb):
+        """
+        Registers a new state update callback function with the Enactor.
+        """
+        with self._cb_lock:
+            cb_name = cb.__name__
+            self._callbacks[cb_name] = cb