PyPI - tasktree - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

tasktree 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

tasktree/cli.py +78 -22
tasktree/docker.py +25 -0
tasktree/executor.py +346 -34
tasktree/graph.py +124 -26
tasktree/hasher.py +73 -2
tasktree/parser.py +1288 -35
tasktree/substitution.py +198 -0
tasktree/types.py +11 -2
tasktree-0.0.9.dist-info/METADATA +1240 -0
tasktree-0.0.9.dist-info/RECORD +15 -0
tasktree-0.0.7.dist-info/METADATA +0 -654
tasktree-0.0.7.dist-info/RECORD +0 -14
{tasktree-0.0.7.dist-info → tasktree-0.0.9.dist-info}/WHEEL +0 -0
{tasktree-0.0.7.dist-info → tasktree-0.0.9.dist-info}/entry_points.txt +0 -0

tasktree/executor.py CHANGED Viewed

@@ -9,14 +9,14 @@ import subprocess
 import tempfile
 import time
 from dataclasses import dataclass, field
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
 from tasktree import docker as docker_module
 from tasktree.graph import get_implicit_inputs, resolve_execution_order
 from tasktree.hasher import hash_args, hash_task, make_cache_key
-from tasktree.parser import Recipe, Task
+from tasktree.parser import Recipe, Task, Environment
 from tasktree.state import StateManager, TaskState
@@ -41,6 +41,18 @@ class ExecutionError(Exception):
 class Executor:
     """Executes tasks with incremental execution logic."""
+    # Protected environment variables that cannot be overridden by exported args
+    PROTECTED_ENV_VARS = {
+        'PATH',
+        'LD_LIBRARY_PATH',
+        'LD_PRELOAD',
+        'PYTHONPATH',
+        'HOME',
+        'SHELL',
+        'USER',
+        'LOGNAME',
+    }
     def __init__(self, recipe: Recipe, state_manager: StateManager):
         """Initialize executor.
@@ -52,6 +64,162 @@ class Executor:
         self.state = state_manager
         self.docker_manager = docker_module.DockerManager(recipe.project_root)
+    def _has_regular_args(self, task: Task) -> bool:
+        """Check if a task has any regular (non-exported) arguments.
+        Args:
+            task: Task to check
+        Returns:
+            True if task has at least one regular (non-exported) argument, False otherwise
+        """
+        if not task.args:
+            return False
+        # Check if any arg is not exported (doesn't start with $)
+        for arg_spec in task.args:
+            # Handle both string and dict arg specs
+            if isinstance(arg_spec, str):
+                # Remove default value part if present
+                arg_name = arg_spec.split('=')[0].split(':')[0].strip()
+                if not arg_name.startswith('$'):
+                    return True
+            elif isinstance(arg_spec, dict):
+                # Dict format: { argname: { ... } } or { $argname: { ... } }
+                for key in arg_spec.keys():
+                    if not key.startswith('$'):
+                        return True
+        return False
+    def _filter_regular_args(self, task: Task, task_args: dict[str, Any]) -> dict[str, Any]:
+        """Filter task_args to only include regular (non-exported) arguments.
+        Args:
+            task: Task definition
+            task_args: Dictionary of all task arguments
+        Returns:
+            Dictionary containing only regular (non-exported) arguments
+        """
+        if not task.args or not task_args:
+            return {}
+        # Build set of exported arg names (without the $ prefix)
+        exported_names = set()
+        for arg_spec in task.args:
+            if isinstance(arg_spec, str):
+                arg_name = arg_spec.split('=')[0].split(':')[0].strip()
+                if arg_name.startswith('$'):
+                    exported_names.add(arg_name[1:])  # Remove $ prefix
+            elif isinstance(arg_spec, dict):
+                for key in arg_spec.keys():
+                    if key.startswith('$'):
+                        exported_names.add(key[1:])  # Remove $ prefix
+        # Filter out exported args
+        return {k: v for k, v in task_args.items() if k not in exported_names}
+    def _collect_early_builtin_variables(self, task: Task, timestamp: datetime) -> dict[str, str]:
+        """Collect built-in variables that don't depend on working_dir.
+        These variables can be used in the working_dir field itself.
+        Args:
+            task: Task being executed
+            timestamp: Timestamp when task started execution
+        Returns:
+            Dictionary mapping built-in variable names to their string values
+        Raises:
+            ExecutionError: If any built-in variable fails to resolve
+        """
+        import os
+        builtin_vars = {}
+        # {{ tt.project_root }} - Absolute path to project root
+        builtin_vars['project_root'] = str(self.recipe.project_root.resolve())
+        # {{ tt.recipe_dir }} - Absolute path to directory containing the recipe file
+        builtin_vars['recipe_dir'] = str(self.recipe.recipe_path.parent.resolve())
+        # {{ tt.task_name }} - Name of currently executing task
+        builtin_vars['task_name'] = task.name
+        # {{ tt.timestamp }} - ISO8601 timestamp when task started execution
+        builtin_vars['timestamp'] = timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')
+        # {{ tt.timestamp_unix }} - Unix epoch timestamp when task started
+        builtin_vars['timestamp_unix'] = str(int(timestamp.timestamp()))
+        # {{ tt.user_home }} - Current user's home directory (cross-platform)
+        try:
+            user_home = Path.home()
+            builtin_vars['user_home'] = str(user_home)
+        except Exception as e:
+            raise ExecutionError(
+                f"Failed to get user home directory for {{ tt.user_home }}: {e}"
+            )
+        # {{ tt.user_name }} - Current username (with fallback)
+        try:
+            user_name = os.getlogin()
+        except OSError:
+            # Fallback to environment variables if os.getlogin() fails
+            user_name = os.environ.get('USER') or os.environ.get('USERNAME') or 'unknown'
+        builtin_vars['user_name'] = user_name
+        return builtin_vars
+    def _collect_builtin_variables(self, task: Task, working_dir: Path, timestamp: datetime) -> dict[str, str]:
+        """Collect built-in variables for task execution.
+        Args:
+            task: Task being executed
+            working_dir: Resolved working directory for the task
+            timestamp: Timestamp when task started execution
+        Returns:
+            Dictionary mapping built-in variable names to their string values
+        Raises:
+            ExecutionError: If any built-in variable fails to resolve
+        """
+        # Get early builtin vars (those that don't depend on working_dir)
+        builtin_vars = self._collect_early_builtin_variables(task, timestamp)
+        # {{ tt.working_dir }} - Absolute path to task's effective working directory
+        # This is added after working_dir is resolved to avoid circular dependency
+        builtin_vars['working_dir'] = str(working_dir.resolve())
+        return builtin_vars
+    def _prepare_env_with_exports(self, exported_env_vars: dict[str, str] | None = None) -> dict[str, str]:
+        """Prepare environment with exported arguments.
+        Args:
+            exported_env_vars: Exported arguments to set as environment variables
+        Returns:
+            Environment dict with exported args merged
+        Raises:
+            ValueError: If an exported arg attempts to override a protected environment variable
+        """
+        env = os.environ.copy()
+        if exported_env_vars:
+            # Check for protected environment variable overrides
+            for key in exported_env_vars:
+                if key in self.PROTECTED_ENV_VARS:
+                    raise ValueError(
+                        f"Cannot override protected environment variable: {key}\n"
+                        f"Protected variables are: {', '.join(sorted(self.PROTECTED_ENV_VARS))}"
+                    )
+            env.update(exported_env_vars)
+        return env
     def _get_platform_default_environment(self) -> tuple[str, list[str]]:
         """Get default shell and args for current platform.
@@ -165,9 +333,9 @@ class Executor:
                 reason="forced",
             )
-        # Compute hashes (include effective environment)
+        # Compute hashes (include effective environment and dependencies)
         effective_env = self._get_effective_env_name(task)
-        task_hash = hash_task(task.cmd, task.outputs, task.working_dir, task.args, effective_env)
+        task_hash = hash_task(task.cmd, task.outputs, task.working_dir, task.args, effective_env, task.deps)
         args_hash = hash_args(args_dict) if args_dict else None
         cache_key = make_cache_key(task_hash, args_hash)
@@ -260,22 +428,39 @@ class Executor:
         # Resolve execution order
         if only:
             # Only execute the target task, skip dependencies
-            execution_order = [task_name]
+            execution_order = [(task_name, args_dict)]
         else:
             # Execute task and all dependencies
-            execution_order = resolve_execution_order(self.recipe, task_name)
+            execution_order = resolve_execution_order(self.recipe, task_name, args_dict)
         # Single phase: Check and execute incrementally
         statuses: dict[str, TaskStatus] = {}
-        for name in execution_order:
+        for name, task_args in execution_order:
             task = self.recipe.tasks[name]
-            # Determine task-specific args (only for target task)
-            task_args = args_dict if name == task_name else {}
+            # Convert None to {} for internal use (None is used to distinguish simple deps in graph)
+            args_dict_for_execution = task_args if task_args is not None else {}
             # Check if task needs to run (based on CURRENT filesystem state)
-            status = self.check_task_status(task, task_args, force=force)
-            statuses[name] = status
+            status = self.check_task_status(task, args_dict_for_execution, force=force)
+            # Use a key that includes args for status tracking
+            # Only include regular (non-exported) args in status key for parameterized dependencies
+            # For the root task (invoked from CLI), status key is always just the task name
+            # For dependencies with parameterized invocations, include the regular args
+            is_root_task = (name == task_name)
+            if not is_root_task and args_dict_for_execution and self._has_regular_args(task):
+                import json
+                # Filter to only include regular (non-exported) args
+                regular_args = self._filter_regular_args(task, args_dict_for_execution)
+                if regular_args:
+                    args_str = json.dumps(regular_args, sort_keys=True, separators=(",", ":"))
+                    status_key = f"{name}({args_str})"
+                else:
+                    status_key = name
+            else:
+                status_key = name
+            statuses[status_key] = status
             # Execute immediately if needed
             if status.will_run:
@@ -287,7 +472,7 @@ class Executor:
                         file=sys.stderr,
                     )
-                self._run_task(task, task_args)
+                self._run_task(task, args_dict_for_execution)
         return statuses
@@ -301,11 +486,47 @@ class Executor:
         Raises:
             ExecutionError: If task execution fails
         """
-        # Substitute arguments in command
-        cmd = self._substitute_args(task.cmd, args_dict)
+        # Capture timestamp at task start for consistency (in UTC)
+        task_start_time = datetime.now(timezone.utc)
+        # Parse task arguments to identify exported args
+        # Note: args_dict already has defaults applied by CLI (cli.py:413-424)
+        from tasktree.parser import parse_arg_spec
+        exported_args = set()
+        regular_args = {}
+        exported_env_vars = {}
+        for arg_spec in task.args:
+            parsed = parse_arg_spec(arg_spec)
+            if parsed.is_exported:
+                exported_args.add(parsed.name)
+                # Get value and convert to string for environment variable
+                # Value should always be in args_dict (CLI applies defaults)
+                if parsed.name in args_dict:
+                    exported_env_vars[parsed.name] = str(args_dict[parsed.name])
+            else:
+                if parsed.name in args_dict:
+                    regular_args[parsed.name] = args_dict[parsed.name]
+        # Collect early built-in variables (those that don't depend on working_dir)
+        # These can be used in the working_dir field itself
+        early_builtin_vars = self._collect_early_builtin_variables(task, task_start_time)
+        # Resolve working directory
+        # Validate that working_dir doesn't contain {{ tt.working_dir }} (circular dependency)
+        self._validate_no_working_dir_circular_ref(task.working_dir)
+        working_dir_str = self._substitute_builtin(task.working_dir, early_builtin_vars)
+        working_dir_str = self._substitute_args(working_dir_str, regular_args, exported_args)
+        working_dir_str = self._substitute_env(working_dir_str)
+        working_dir = self.recipe.project_root / working_dir_str
-        # Determine working directory
-        working_dir = self.recipe.project_root / task.working_dir
+        # Collect all built-in variables (including tt.working_dir now that it's resolved)
+        builtin_vars = self._collect_builtin_variables(task, working_dir, task_start_time)
+        # Substitute built-in variables, arguments, and environment variables in command
+        cmd = self._substitute_builtin(task.cmd, builtin_vars)
+        cmd = self._substitute_args(cmd, regular_args, exported_args)
+        cmd = self._substitute_env(cmd)
         # Check if task uses Docker environment
         env_name = self._get_effective_env_name(task)
@@ -319,22 +540,23 @@ class Executor:
         # Route to Docker execution or regular execution
         if env and env.dockerfile:
             # Docker execution path
-            self._run_task_in_docker(task, env, cmd, working_dir)
+            self._run_task_in_docker(task, env, cmd, working_dir, exported_env_vars)
         else:
             # Regular execution path
             shell, shell_args, preamble = self._resolve_environment(task)
             # Detect multi-line commands (ignore trailing newlines from YAML folded blocks)
             if "\n" in cmd.rstrip():
-                self._run_multiline_command(cmd, working_dir, task.name, shell, preamble)
+                self._run_multiline_command(cmd, working_dir, task.name, shell, preamble, exported_env_vars)
             else:
-                self._run_single_line_command(cmd, working_dir, task.name, shell, shell_args)
+                self._run_single_line_command(cmd, working_dir, task.name, shell, shell_args, exported_env_vars)
         # Update state
         self._update_state(task, args_dict)
     def _run_single_line_command(
-        self, cmd: str, working_dir: Path, task_name: str, shell: str, shell_args: list[str]
+        self, cmd: str, working_dir: Path, task_name: str, shell: str, shell_args: list[str],
+        exported_env_vars: dict[str, str] | None = None
     ) -> None:
         """Execute a single-line command via shell.
@@ -344,10 +566,14 @@ class Executor:
             task_name: Task name (for error messages)
             shell: Shell executable to use
             shell_args: Arguments to pass to shell
+            exported_env_vars: Exported arguments to set as environment variables
         Raises:
             ExecutionError: If command execution fails
         """
+        # Prepare environment with exported args
+        env = self._prepare_env_with_exports(exported_env_vars)
         try:
             # Build command: shell + args + cmd
             full_cmd = [shell] + shell_args + [cmd]
@@ -356,6 +582,7 @@ class Executor:
                 cwd=working_dir,
                 check=True,
                 capture_output=False,
+                env=env,
             )
         except subprocess.CalledProcessError as e:
             raise ExecutionError(
@@ -363,7 +590,8 @@ class Executor:
             )
     def _run_multiline_command(
-        self, cmd: str, working_dir: Path, task_name: str, shell: str, preamble: str
+        self, cmd: str, working_dir: Path, task_name: str, shell: str, preamble: str,
+        exported_env_vars: dict[str, str] | None = None
     ) -> None:
         """Execute a multi-line command via temporary script file.
@@ -373,10 +601,14 @@ class Executor:
             task_name: Task name (for error messages)
             shell: Shell to use for script execution
             preamble: Preamble text to prepend to script
+            exported_env_vars: Exported arguments to set as environment variables
         Raises:
             ExecutionError: If command execution fails
         """
+        # Prepare environment with exported args
+        env = self._prepare_env_with_exports(exported_env_vars)
         # Determine file extension based on platform
         is_windows = platform.system() == "Windows"
         script_ext = ".bat" if is_windows else ".sh"
@@ -417,6 +649,7 @@ class Executor:
                     cwd=working_dir,
                     check=True,
                     capture_output=False,
+                    env=env,
                 )
             except subprocess.CalledProcessError as e:
                 raise ExecutionError(
@@ -430,7 +663,8 @@ class Executor:
                 pass  # Ignore cleanup errors
     def _run_task_in_docker(
-        self, task: Task, env: Any, cmd: str, working_dir: Path
+        self, task: Task, env: Any, cmd: str, working_dir: Path,
+        exported_env_vars: dict[str, str] | None = None
     ) -> None:
         """Execute task inside Docker container.
@@ -439,6 +673,7 @@ class Executor:
             env: Docker environment configuration
             cmd: Command to execute
             working_dir: Host working directory
+            exported_env_vars: Exported arguments to set as environment variables
         Raises:
             ExecutionError: If Docker execution fails
@@ -448,10 +683,26 @@ class Executor:
             env.working_dir, task.working_dir
         )
+        # Validate and merge exported args with env vars (exported args take precedence)
+        docker_env_vars = env.env_vars.copy() if env.env_vars else {}
+        if exported_env_vars:
+            # Check for protected environment variable overrides
+            for key in exported_env_vars:
+                if key in self.PROTECTED_ENV_VARS:
+                    raise ValueError(
+                        f"Cannot override protected environment variable: {key}\n"
+                        f"Protected variables are: {', '.join(sorted(self.PROTECTED_ENV_VARS))}"
+                    )
+            docker_env_vars.update(exported_env_vars)
+        # Create modified environment with merged env vars using dataclass replace
+        from dataclasses import replace
+        modified_env = replace(env, env_vars=docker_env_vars)
         # Execute in container
         try:
             self.docker_manager.run_in_container(
-                env=env,
+                env=modified_env,
                 cmd=cmd,
                 working_dir=working_dir,
                 container_working_dir=container_working_dir,
@@ -459,21 +710,82 @@ class Executor:
         except docker_module.DockerError as e:
             raise ExecutionError(str(e)) from e
-    def _substitute_args(self, cmd: str, args_dict: dict[str, Any]) -> str:
-        """Substitute arguments in command string.
+    def _validate_no_working_dir_circular_ref(self, text: str) -> None:
+        """Validate that working_dir field does not contain {{ tt.working_dir }}.
+        Using {{ tt.working_dir }} in the working_dir field creates a circular dependency.
+        Args:
+            text: The working_dir field value to validate
+        Raises:
+            ExecutionError: If {{ tt.working_dir }} placeholder is found
+        """
+        import re
+        # Pattern to match {{ tt.working_dir }} specifically
+        pattern = re.compile(r'\{\{\s*tt\s*\.\s*working_dir\s*\}\}')
+        if pattern.search(text):
+            raise ExecutionError(
+                f"Cannot use {{{{ tt.working_dir }}}} in the 'working_dir' field.\n\n"
+                f"This creates a circular dependency (working_dir cannot reference itself).\n"
+                f"Other built-in variables like {{{{ tt.task_name }}}} or {{{{ tt.timestamp }}}} are allowed."
+            )
+    def _substitute_builtin(self, text: str, builtin_vars: dict[str, str]) -> str:
+        """Substitute {{ tt.name }} placeholders in text.
+        Built-in variables are resolved at execution time.
+        Args:
+            text: Text with {{ tt.name }} placeholders
+            builtin_vars: Built-in variable values
+        Returns:
+            Text with built-in variables substituted
+        Raises:
+            ValueError: If built-in variable is not defined
+        """
+        from tasktree.substitution import substitute_builtin_variables
+        return substitute_builtin_variables(text, builtin_vars)
+    def _substitute_args(self, cmd: str, args_dict: dict[str, Any], exported_args: set[str] | None = None) -> str:
+        """Substitute {{ arg.name }} placeholders in command string.
+        Variables are already substituted at parse time by the parser.
+        This only handles runtime argument substitution.
         Args:
-            cmd: Command template with {{arg}} placeholders
-            args_dict: Arguments to substitute
+            cmd: Command with {{ arg.name }} placeholders
+            args_dict: Argument values to substitute (only regular args)
+            exported_args: Set of argument names that are exported (not available for substitution)
         Returns:
             Command with arguments substituted
+        Raises:
+            ValueError: If an exported argument is used in template substitution
+        """
+        from tasktree.substitution import substitute_arguments
+        return substitute_arguments(cmd, args_dict, exported_args)
+    def _substitute_env(self, text: str) -> str:
+        """Substitute {{ env.NAME }} placeholders in text.
+        Environment variables are resolved at execution time from os.environ.
+        Args:
+            text: Text with {{ env.NAME }} placeholders
+        Returns:
+            Text with environment variables substituted
+        Raises:
+            ValueError: If environment variable is not set
         """
-        result = cmd
-        for key, value in args_dict.items():
-            placeholder = f"{{{{{key}}}}}"
-            result = result.replace(placeholder, str(value))
-        return result
+        from tasktree.substitution import substitute_environment
+        return substitute_environment(text)
     def _get_all_inputs(self, task: Task) -> list[str]:
         """Get all inputs for a task (explicit + implicit from dependencies).
@@ -723,9 +1035,9 @@ class Executor:
             task: Task that was executed
             args_dict: Arguments used for execution
         """
-        # Compute hashes (include effective environment)
+        # Compute hashes (include effective environment and dependencies)
         effective_env = self._get_effective_env_name(task)
-        task_hash = hash_task(task.cmd, task.outputs, task.working_dir, task.args, effective_env)
+        task_hash = hash_task(task.cmd, task.outputs, task.working_dir, task.args, effective_env, task.deps)
         args_hash = hash_args(args_dict) if args_dict else None
         cache_key = make_cache_key(task_hash, args_hash)

tasktree 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

tasktree 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl