tasktree 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tasktree/executor.py CHANGED
@@ -9,14 +9,14 @@ import subprocess
9
9
  import tempfile
10
10
  import time
11
11
  from dataclasses import dataclass, field
12
- from datetime import datetime
12
+ from datetime import datetime, timezone
13
13
  from pathlib import Path
14
14
  from typing import Any
15
15
 
16
16
  from tasktree import docker as docker_module
17
17
  from tasktree.graph import get_implicit_inputs, resolve_execution_order
18
18
  from tasktree.hasher import hash_args, hash_task, make_cache_key
19
- from tasktree.parser import Recipe, Task
19
+ from tasktree.parser import Recipe, Task, Environment
20
20
  from tasktree.state import StateManager, TaskState
21
21
 
22
22
 
@@ -41,6 +41,18 @@ class ExecutionError(Exception):
41
41
  class Executor:
42
42
  """Executes tasks with incremental execution logic."""
43
43
 
44
+ # Protected environment variables that cannot be overridden by exported args
45
+ PROTECTED_ENV_VARS = {
46
+ 'PATH',
47
+ 'LD_LIBRARY_PATH',
48
+ 'LD_PRELOAD',
49
+ 'PYTHONPATH',
50
+ 'HOME',
51
+ 'SHELL',
52
+ 'USER',
53
+ 'LOGNAME',
54
+ }
55
+
44
56
  def __init__(self, recipe: Recipe, state_manager: StateManager):
45
57
  """Initialize executor.
46
58
 
@@ -52,6 +64,162 @@ class Executor:
52
64
  self.state = state_manager
53
65
  self.docker_manager = docker_module.DockerManager(recipe.project_root)
54
66
 
67
+ def _has_regular_args(self, task: Task) -> bool:
68
+ """Check if a task has any regular (non-exported) arguments.
69
+
70
+ Args:
71
+ task: Task to check
72
+
73
+ Returns:
74
+ True if task has at least one regular (non-exported) argument, False otherwise
75
+ """
76
+ if not task.args:
77
+ return False
78
+
79
+ # Check if any arg is not exported (doesn't start with $)
80
+ for arg_spec in task.args:
81
+ # Handle both string and dict arg specs
82
+ if isinstance(arg_spec, str):
83
+ # Remove default value part if present
84
+ arg_name = arg_spec.split('=')[0].split(':')[0].strip()
85
+ if not arg_name.startswith('$'):
86
+ return True
87
+ elif isinstance(arg_spec, dict):
88
+ # Dict format: { argname: { ... } } or { $argname: { ... } }
89
+ for key in arg_spec.keys():
90
+ if not key.startswith('$'):
91
+ return True
92
+
93
+ return False
94
+
95
+ def _filter_regular_args(self, task: Task, task_args: dict[str, Any]) -> dict[str, Any]:
96
+ """Filter task_args to only include regular (non-exported) arguments.
97
+
98
+ Args:
99
+ task: Task definition
100
+ task_args: Dictionary of all task arguments
101
+
102
+ Returns:
103
+ Dictionary containing only regular (non-exported) arguments
104
+ """
105
+ if not task.args or not task_args:
106
+ return {}
107
+
108
+ # Build set of exported arg names (without the $ prefix)
109
+ exported_names = set()
110
+ for arg_spec in task.args:
111
+ if isinstance(arg_spec, str):
112
+ arg_name = arg_spec.split('=')[0].split(':')[0].strip()
113
+ if arg_name.startswith('$'):
114
+ exported_names.add(arg_name[1:]) # Remove $ prefix
115
+ elif isinstance(arg_spec, dict):
116
+ for key in arg_spec.keys():
117
+ if key.startswith('$'):
118
+ exported_names.add(key[1:]) # Remove $ prefix
119
+
120
+ # Filter out exported args
121
+ return {k: v for k, v in task_args.items() if k not in exported_names}
122
+
123
+ def _collect_early_builtin_variables(self, task: Task, timestamp: datetime) -> dict[str, str]:
124
+ """Collect built-in variables that don't depend on working_dir.
125
+
126
+ These variables can be used in the working_dir field itself.
127
+
128
+ Args:
129
+ task: Task being executed
130
+ timestamp: Timestamp when task started execution
131
+
132
+ Returns:
133
+ Dictionary mapping built-in variable names to their string values
134
+
135
+ Raises:
136
+ ExecutionError: If any built-in variable fails to resolve
137
+ """
138
+ import os
139
+
140
+ builtin_vars = {}
141
+
142
+ # {{ tt.project_root }} - Absolute path to project root
143
+ builtin_vars['project_root'] = str(self.recipe.project_root.resolve())
144
+
145
+ # {{ tt.recipe_dir }} - Absolute path to directory containing the recipe file
146
+ builtin_vars['recipe_dir'] = str(self.recipe.recipe_path.parent.resolve())
147
+
148
+ # {{ tt.task_name }} - Name of currently executing task
149
+ builtin_vars['task_name'] = task.name
150
+
151
+ # {{ tt.timestamp }} - ISO8601 timestamp when task started execution
152
+ builtin_vars['timestamp'] = timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')
153
+
154
+ # {{ tt.timestamp_unix }} - Unix epoch timestamp when task started
155
+ builtin_vars['timestamp_unix'] = str(int(timestamp.timestamp()))
156
+
157
+ # {{ tt.user_home }} - Current user's home directory (cross-platform)
158
+ try:
159
+ user_home = Path.home()
160
+ builtin_vars['user_home'] = str(user_home)
161
+ except Exception as e:
162
+ raise ExecutionError(
163
+ f"Failed to get user home directory for {{ tt.user_home }}: {e}"
164
+ )
165
+
166
+ # {{ tt.user_name }} - Current username (with fallback)
167
+ try:
168
+ user_name = os.getlogin()
169
+ except OSError:
170
+ # Fallback to environment variables if os.getlogin() fails
171
+ user_name = os.environ.get('USER') or os.environ.get('USERNAME') or 'unknown'
172
+ builtin_vars['user_name'] = user_name
173
+
174
+ return builtin_vars
175
+
176
+ def _collect_builtin_variables(self, task: Task, working_dir: Path, timestamp: datetime) -> dict[str, str]:
177
+ """Collect built-in variables for task execution.
178
+
179
+ Args:
180
+ task: Task being executed
181
+ working_dir: Resolved working directory for the task
182
+ timestamp: Timestamp when task started execution
183
+
184
+ Returns:
185
+ Dictionary mapping built-in variable names to their string values
186
+
187
+ Raises:
188
+ ExecutionError: If any built-in variable fails to resolve
189
+ """
190
+ # Get early builtin vars (those that don't depend on working_dir)
191
+ builtin_vars = self._collect_early_builtin_variables(task, timestamp)
192
+
193
+ # {{ tt.working_dir }} - Absolute path to task's effective working directory
194
+ # This is added after working_dir is resolved to avoid circular dependency
195
+ builtin_vars['working_dir'] = str(working_dir.resolve())
196
+
197
+ return builtin_vars
198
+
199
+ def _prepare_env_with_exports(self, exported_env_vars: dict[str, str] | None = None) -> dict[str, str]:
200
+ """Prepare environment with exported arguments.
201
+
202
+ Args:
203
+ exported_env_vars: Exported arguments to set as environment variables
204
+
205
+ Returns:
206
+ Environment dict with exported args merged
207
+
208
+ Raises:
209
+ ValueError: If an exported arg attempts to override a protected environment variable
210
+ """
211
+ env = os.environ.copy()
212
+ if exported_env_vars:
213
+ # Check for protected environment variable overrides
214
+ for key in exported_env_vars:
215
+ if key in self.PROTECTED_ENV_VARS:
216
+ raise ValueError(
217
+ f"Cannot override protected environment variable: {key}\n"
218
+ f"Protected variables are: {', '.join(sorted(self.PROTECTED_ENV_VARS))}"
219
+ )
220
+ env.update(exported_env_vars)
221
+ return env
222
+
55
223
  def _get_platform_default_environment(self) -> tuple[str, list[str]]:
56
224
  """Get default shell and args for current platform.
57
225
 
@@ -165,9 +333,9 @@ class Executor:
165
333
  reason="forced",
166
334
  )
167
335
 
168
- # Compute hashes (include effective environment)
336
+ # Compute hashes (include effective environment and dependencies)
169
337
  effective_env = self._get_effective_env_name(task)
170
- task_hash = hash_task(task.cmd, task.outputs, task.working_dir, task.args, effective_env)
338
+ task_hash = hash_task(task.cmd, task.outputs, task.working_dir, task.args, effective_env, task.deps)
171
339
  args_hash = hash_args(args_dict) if args_dict else None
172
340
  cache_key = make_cache_key(task_hash, args_hash)
173
341
 
@@ -260,22 +428,39 @@ class Executor:
260
428
  # Resolve execution order
261
429
  if only:
262
430
  # Only execute the target task, skip dependencies
263
- execution_order = [task_name]
431
+ execution_order = [(task_name, args_dict)]
264
432
  else:
265
433
  # Execute task and all dependencies
266
- execution_order = resolve_execution_order(self.recipe, task_name)
434
+ execution_order = resolve_execution_order(self.recipe, task_name, args_dict)
267
435
 
268
436
  # Single phase: Check and execute incrementally
269
437
  statuses: dict[str, TaskStatus] = {}
270
- for name in execution_order:
438
+ for name, task_args in execution_order:
271
439
  task = self.recipe.tasks[name]
272
440
 
273
- # Determine task-specific args (only for target task)
274
- task_args = args_dict if name == task_name else {}
441
+ # Convert None to {} for internal use (None is used to distinguish simple deps in graph)
442
+ args_dict_for_execution = task_args if task_args is not None else {}
275
443
 
276
444
  # Check if task needs to run (based on CURRENT filesystem state)
277
- status = self.check_task_status(task, task_args, force=force)
278
- statuses[name] = status
445
+ status = self.check_task_status(task, args_dict_for_execution, force=force)
446
+
447
+ # Use a key that includes args for status tracking
448
+ # Only include regular (non-exported) args in status key for parameterized dependencies
449
+ # For the root task (invoked from CLI), status key is always just the task name
450
+ # For dependencies with parameterized invocations, include the regular args
451
+ is_root_task = (name == task_name)
452
+ if not is_root_task and args_dict_for_execution and self._has_regular_args(task):
453
+ import json
454
+ # Filter to only include regular (non-exported) args
455
+ regular_args = self._filter_regular_args(task, args_dict_for_execution)
456
+ if regular_args:
457
+ args_str = json.dumps(regular_args, sort_keys=True, separators=(",", ":"))
458
+ status_key = f"{name}({args_str})"
459
+ else:
460
+ status_key = name
461
+ else:
462
+ status_key = name
463
+ statuses[status_key] = status
279
464
 
280
465
  # Execute immediately if needed
281
466
  if status.will_run:
@@ -287,7 +472,7 @@ class Executor:
287
472
  file=sys.stderr,
288
473
  )
289
474
 
290
- self._run_task(task, task_args)
475
+ self._run_task(task, args_dict_for_execution)
291
476
 
292
477
  return statuses
293
478
 
@@ -301,11 +486,47 @@ class Executor:
301
486
  Raises:
302
487
  ExecutionError: If task execution fails
303
488
  """
304
- # Substitute arguments in command
305
- cmd = self._substitute_args(task.cmd, args_dict)
489
+ # Capture timestamp at task start for consistency (in UTC)
490
+ task_start_time = datetime.now(timezone.utc)
491
+
492
+ # Parse task arguments to identify exported args
493
+ # Note: args_dict already has defaults applied by CLI (cli.py:413-424)
494
+ from tasktree.parser import parse_arg_spec
495
+ exported_args = set()
496
+ regular_args = {}
497
+ exported_env_vars = {}
498
+
499
+ for arg_spec in task.args:
500
+ parsed = parse_arg_spec(arg_spec)
501
+ if parsed.is_exported:
502
+ exported_args.add(parsed.name)
503
+ # Get value and convert to string for environment variable
504
+ # Value should always be in args_dict (CLI applies defaults)
505
+ if parsed.name in args_dict:
506
+ exported_env_vars[parsed.name] = str(args_dict[parsed.name])
507
+ else:
508
+ if parsed.name in args_dict:
509
+ regular_args[parsed.name] = args_dict[parsed.name]
510
+
511
+ # Collect early built-in variables (those that don't depend on working_dir)
512
+ # These can be used in the working_dir field itself
513
+ early_builtin_vars = self._collect_early_builtin_variables(task, task_start_time)
514
+
515
+ # Resolve working directory
516
+ # Validate that working_dir doesn't contain {{ tt.working_dir }} (circular dependency)
517
+ self._validate_no_working_dir_circular_ref(task.working_dir)
518
+ working_dir_str = self._substitute_builtin(task.working_dir, early_builtin_vars)
519
+ working_dir_str = self._substitute_args(working_dir_str, regular_args, exported_args)
520
+ working_dir_str = self._substitute_env(working_dir_str)
521
+ working_dir = self.recipe.project_root / working_dir_str
306
522
 
307
- # Determine working directory
308
- working_dir = self.recipe.project_root / task.working_dir
523
+ # Collect all built-in variables (including tt.working_dir now that it's resolved)
524
+ builtin_vars = self._collect_builtin_variables(task, working_dir, task_start_time)
525
+
526
+ # Substitute built-in variables, arguments, and environment variables in command
527
+ cmd = self._substitute_builtin(task.cmd, builtin_vars)
528
+ cmd = self._substitute_args(cmd, regular_args, exported_args)
529
+ cmd = self._substitute_env(cmd)
309
530
 
310
531
  # Check if task uses Docker environment
311
532
  env_name = self._get_effective_env_name(task)
@@ -319,22 +540,23 @@ class Executor:
319
540
  # Route to Docker execution or regular execution
320
541
  if env and env.dockerfile:
321
542
  # Docker execution path
322
- self._run_task_in_docker(task, env, cmd, working_dir)
543
+ self._run_task_in_docker(task, env, cmd, working_dir, exported_env_vars)
323
544
  else:
324
545
  # Regular execution path
325
546
  shell, shell_args, preamble = self._resolve_environment(task)
326
547
 
327
548
  # Detect multi-line commands (ignore trailing newlines from YAML folded blocks)
328
549
  if "\n" in cmd.rstrip():
329
- self._run_multiline_command(cmd, working_dir, task.name, shell, preamble)
550
+ self._run_multiline_command(cmd, working_dir, task.name, shell, preamble, exported_env_vars)
330
551
  else:
331
- self._run_single_line_command(cmd, working_dir, task.name, shell, shell_args)
552
+ self._run_single_line_command(cmd, working_dir, task.name, shell, shell_args, exported_env_vars)
332
553
 
333
554
  # Update state
334
555
  self._update_state(task, args_dict)
335
556
 
336
557
  def _run_single_line_command(
337
- self, cmd: str, working_dir: Path, task_name: str, shell: str, shell_args: list[str]
558
+ self, cmd: str, working_dir: Path, task_name: str, shell: str, shell_args: list[str],
559
+ exported_env_vars: dict[str, str] | None = None
338
560
  ) -> None:
339
561
  """Execute a single-line command via shell.
340
562
 
@@ -344,10 +566,14 @@ class Executor:
344
566
  task_name: Task name (for error messages)
345
567
  shell: Shell executable to use
346
568
  shell_args: Arguments to pass to shell
569
+ exported_env_vars: Exported arguments to set as environment variables
347
570
 
348
571
  Raises:
349
572
  ExecutionError: If command execution fails
350
573
  """
574
+ # Prepare environment with exported args
575
+ env = self._prepare_env_with_exports(exported_env_vars)
576
+
351
577
  try:
352
578
  # Build command: shell + args + cmd
353
579
  full_cmd = [shell] + shell_args + [cmd]
@@ -356,6 +582,7 @@ class Executor:
356
582
  cwd=working_dir,
357
583
  check=True,
358
584
  capture_output=False,
585
+ env=env,
359
586
  )
360
587
  except subprocess.CalledProcessError as e:
361
588
  raise ExecutionError(
@@ -363,7 +590,8 @@ class Executor:
363
590
  )
364
591
 
365
592
  def _run_multiline_command(
366
- self, cmd: str, working_dir: Path, task_name: str, shell: str, preamble: str
593
+ self, cmd: str, working_dir: Path, task_name: str, shell: str, preamble: str,
594
+ exported_env_vars: dict[str, str] | None = None
367
595
  ) -> None:
368
596
  """Execute a multi-line command via temporary script file.
369
597
 
@@ -373,10 +601,14 @@ class Executor:
373
601
  task_name: Task name (for error messages)
374
602
  shell: Shell to use for script execution
375
603
  preamble: Preamble text to prepend to script
604
+ exported_env_vars: Exported arguments to set as environment variables
376
605
 
377
606
  Raises:
378
607
  ExecutionError: If command execution fails
379
608
  """
609
+ # Prepare environment with exported args
610
+ env = self._prepare_env_with_exports(exported_env_vars)
611
+
380
612
  # Determine file extension based on platform
381
613
  is_windows = platform.system() == "Windows"
382
614
  script_ext = ".bat" if is_windows else ".sh"
@@ -417,6 +649,7 @@ class Executor:
417
649
  cwd=working_dir,
418
650
  check=True,
419
651
  capture_output=False,
652
+ env=env,
420
653
  )
421
654
  except subprocess.CalledProcessError as e:
422
655
  raise ExecutionError(
@@ -430,7 +663,8 @@ class Executor:
430
663
  pass # Ignore cleanup errors
431
664
 
432
665
  def _run_task_in_docker(
433
- self, task: Task, env: Any, cmd: str, working_dir: Path
666
+ self, task: Task, env: Any, cmd: str, working_dir: Path,
667
+ exported_env_vars: dict[str, str] | None = None
434
668
  ) -> None:
435
669
  """Execute task inside Docker container.
436
670
 
@@ -439,6 +673,7 @@ class Executor:
439
673
  env: Docker environment configuration
440
674
  cmd: Command to execute
441
675
  working_dir: Host working directory
676
+ exported_env_vars: Exported arguments to set as environment variables
442
677
 
443
678
  Raises:
444
679
  ExecutionError: If Docker execution fails
@@ -448,10 +683,26 @@ class Executor:
448
683
  env.working_dir, task.working_dir
449
684
  )
450
685
 
686
+ # Validate and merge exported args with env vars (exported args take precedence)
687
+ docker_env_vars = env.env_vars.copy() if env.env_vars else {}
688
+ if exported_env_vars:
689
+ # Check for protected environment variable overrides
690
+ for key in exported_env_vars:
691
+ if key in self.PROTECTED_ENV_VARS:
692
+ raise ValueError(
693
+ f"Cannot override protected environment variable: {key}\n"
694
+ f"Protected variables are: {', '.join(sorted(self.PROTECTED_ENV_VARS))}"
695
+ )
696
+ docker_env_vars.update(exported_env_vars)
697
+
698
+ # Create modified environment with merged env vars using dataclass replace
699
+ from dataclasses import replace
700
+ modified_env = replace(env, env_vars=docker_env_vars)
701
+
451
702
  # Execute in container
452
703
  try:
453
704
  self.docker_manager.run_in_container(
454
- env=env,
705
+ env=modified_env,
455
706
  cmd=cmd,
456
707
  working_dir=working_dir,
457
708
  container_working_dir=container_working_dir,
@@ -459,21 +710,82 @@ class Executor:
459
710
  except docker_module.DockerError as e:
460
711
  raise ExecutionError(str(e)) from e
461
712
 
462
- def _substitute_args(self, cmd: str, args_dict: dict[str, Any]) -> str:
463
- """Substitute arguments in command string.
713
+ def _validate_no_working_dir_circular_ref(self, text: str) -> None:
714
+ """Validate that working_dir field does not contain {{ tt.working_dir }}.
715
+
716
+ Using {{ tt.working_dir }} in the working_dir field creates a circular dependency.
717
+
718
+ Args:
719
+ text: The working_dir field value to validate
720
+
721
+ Raises:
722
+ ExecutionError: If {{ tt.working_dir }} placeholder is found
723
+ """
724
+ import re
725
+ # Pattern to match {{ tt.working_dir }} specifically
726
+ pattern = re.compile(r'\{\{\s*tt\s*\.\s*working_dir\s*\}\}')
727
+
728
+ if pattern.search(text):
729
+ raise ExecutionError(
730
+ f"Cannot use {{{{ tt.working_dir }}}} in the 'working_dir' field.\n\n"
731
+ f"This creates a circular dependency (working_dir cannot reference itself).\n"
732
+ f"Other built-in variables like {{{{ tt.task_name }}}} or {{{{ tt.timestamp }}}} are allowed."
733
+ )
734
+
735
+ def _substitute_builtin(self, text: str, builtin_vars: dict[str, str]) -> str:
736
+ """Substitute {{ tt.name }} placeholders in text.
737
+
738
+ Built-in variables are resolved at execution time.
739
+
740
+ Args:
741
+ text: Text with {{ tt.name }} placeholders
742
+ builtin_vars: Built-in variable values
743
+
744
+ Returns:
745
+ Text with built-in variables substituted
746
+
747
+ Raises:
748
+ ValueError: If built-in variable is not defined
749
+ """
750
+ from tasktree.substitution import substitute_builtin_variables
751
+ return substitute_builtin_variables(text, builtin_vars)
752
+
753
+ def _substitute_args(self, cmd: str, args_dict: dict[str, Any], exported_args: set[str] | None = None) -> str:
754
+ """Substitute {{ arg.name }} placeholders in command string.
755
+
756
+ Variables are already substituted at parse time by the parser.
757
+ This only handles runtime argument substitution.
464
758
 
465
759
  Args:
466
- cmd: Command template with {{arg}} placeholders
467
- args_dict: Arguments to substitute
760
+ cmd: Command with {{ arg.name }} placeholders
761
+ args_dict: Argument values to substitute (only regular args)
762
+ exported_args: Set of argument names that are exported (not available for substitution)
468
763
 
469
764
  Returns:
470
765
  Command with arguments substituted
766
+
767
+ Raises:
768
+ ValueError: If an exported argument is used in template substitution
769
+ """
770
+ from tasktree.substitution import substitute_arguments
771
+ return substitute_arguments(cmd, args_dict, exported_args)
772
+
773
+ def _substitute_env(self, text: str) -> str:
774
+ """Substitute {{ env.NAME }} placeholders in text.
775
+
776
+ Environment variables are resolved at execution time from os.environ.
777
+
778
+ Args:
779
+ text: Text with {{ env.NAME }} placeholders
780
+
781
+ Returns:
782
+ Text with environment variables substituted
783
+
784
+ Raises:
785
+ ValueError: If environment variable is not set
471
786
  """
472
- result = cmd
473
- for key, value in args_dict.items():
474
- placeholder = f"{{{{{key}}}}}"
475
- result = result.replace(placeholder, str(value))
476
- return result
787
+ from tasktree.substitution import substitute_environment
788
+ return substitute_environment(text)
477
789
 
478
790
  def _get_all_inputs(self, task: Task) -> list[str]:
479
791
  """Get all inputs for a task (explicit + implicit from dependencies).
@@ -723,9 +1035,9 @@ class Executor:
723
1035
  task: Task that was executed
724
1036
  args_dict: Arguments used for execution
725
1037
  """
726
- # Compute hashes (include effective environment)
1038
+ # Compute hashes (include effective environment and dependencies)
727
1039
  effective_env = self._get_effective_env_name(task)
728
- task_hash = hash_task(task.cmd, task.outputs, task.working_dir, task.args, effective_env)
1040
+ task_hash = hash_task(task.cmd, task.outputs, task.working_dir, task.args, effective_env, task.deps)
729
1041
  args_hash = hash_args(args_dict) if args_dict else None
730
1042
  cache_key = make_cache_key(task_hash, args_hash)
731
1043