tasktree 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tasktree/executor.py CHANGED
@@ -9,13 +9,14 @@ import subprocess
9
9
  import tempfile
10
10
  import time
11
11
  from dataclasses import dataclass, field
12
- from datetime import datetime
12
+ from datetime import datetime, timezone
13
13
  from pathlib import Path
14
14
  from typing import Any
15
15
 
16
+ from tasktree import docker as docker_module
16
17
  from tasktree.graph import get_implicit_inputs, resolve_execution_order
17
18
  from tasktree.hasher import hash_args, hash_task, make_cache_key
18
- from tasktree.parser import Recipe, Task
19
+ from tasktree.parser import Recipe, Task, Environment
19
20
  from tasktree.state import StateManager, TaskState
20
21
 
21
22
 
@@ -26,7 +27,7 @@ class TaskStatus:
26
27
  task_name: str
27
28
  will_run: bool
28
29
  reason: str # "fresh", "inputs_changed", "definition_changed",
29
- # "never_run", "dependency_triggered", "no_outputs"
30
+ # "never_run", "no_outputs", "outputs_missing", "forced", "environment_changed"
30
31
  changed_files: list[str] = field(default_factory=list)
31
32
  last_run: datetime | None = None
32
33
 
@@ -40,6 +41,18 @@ class ExecutionError(Exception):
40
41
  class Executor:
41
42
  """Executes tasks with incremental execution logic."""
42
43
 
44
+ # Protected environment variables that cannot be overridden by exported args
45
+ PROTECTED_ENV_VARS = {
46
+ 'PATH',
47
+ 'LD_LIBRARY_PATH',
48
+ 'LD_PRELOAD',
49
+ 'PYTHONPATH',
50
+ 'HOME',
51
+ 'SHELL',
52
+ 'USER',
53
+ 'LOGNAME',
54
+ }
55
+
43
56
  def __init__(self, recipe: Recipe, state_manager: StateManager):
44
57
  """Initialize executor.
45
58
 
@@ -49,6 +62,107 @@ class Executor:
49
62
  """
50
63
  self.recipe = recipe
51
64
  self.state = state_manager
65
+ self.docker_manager = docker_module.DockerManager(recipe.project_root)
66
+
67
+ def _collect_early_builtin_variables(self, task: Task, timestamp: datetime) -> dict[str, str]:
68
+ """Collect built-in variables that don't depend on working_dir.
69
+
70
+ These variables can be used in the working_dir field itself.
71
+
72
+ Args:
73
+ task: Task being executed
74
+ timestamp: Timestamp when task started execution
75
+
76
+ Returns:
77
+ Dictionary mapping built-in variable names to their string values
78
+
79
+ Raises:
80
+ ExecutionError: If any built-in variable fails to resolve
81
+ """
82
+ import os
83
+
84
+ builtin_vars = {}
85
+
86
+ # {{ tt.project_root }} - Absolute path to project root
87
+ builtin_vars['project_root'] = str(self.recipe.project_root.resolve())
88
+
89
+ # {{ tt.recipe_dir }} - Absolute path to directory containing the recipe file
90
+ builtin_vars['recipe_dir'] = str(self.recipe.recipe_path.parent.resolve())
91
+
92
+ # {{ tt.task_name }} - Name of currently executing task
93
+ builtin_vars['task_name'] = task.name
94
+
95
+ # {{ tt.timestamp }} - ISO8601 timestamp when task started execution
96
+ builtin_vars['timestamp'] = timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')
97
+
98
+ # {{ tt.timestamp_unix }} - Unix epoch timestamp when task started
99
+ builtin_vars['timestamp_unix'] = str(int(timestamp.timestamp()))
100
+
101
+ # {{ tt.user_home }} - Current user's home directory (cross-platform)
102
+ try:
103
+ user_home = Path.home()
104
+ builtin_vars['user_home'] = str(user_home)
105
+ except Exception as e:
106
+ raise ExecutionError(
107
+ f"Failed to get user home directory for {{ tt.user_home }}: {e}"
108
+ )
109
+
110
+ # {{ tt.user_name }} - Current username (with fallback)
111
+ try:
112
+ user_name = os.getlogin()
113
+ except OSError:
114
+ # Fallback to environment variables if os.getlogin() fails
115
+ user_name = os.environ.get('USER') or os.environ.get('USERNAME') or 'unknown'
116
+ builtin_vars['user_name'] = user_name
117
+
118
+ return builtin_vars
119
+
120
+ def _collect_builtin_variables(self, task: Task, working_dir: Path, timestamp: datetime) -> dict[str, str]:
121
+ """Collect built-in variables for task execution.
122
+
123
+ Args:
124
+ task: Task being executed
125
+ working_dir: Resolved working directory for the task
126
+ timestamp: Timestamp when task started execution
127
+
128
+ Returns:
129
+ Dictionary mapping built-in variable names to their string values
130
+
131
+ Raises:
132
+ ExecutionError: If any built-in variable fails to resolve
133
+ """
134
+ # Get early builtin vars (those that don't depend on working_dir)
135
+ builtin_vars = self._collect_early_builtin_variables(task, timestamp)
136
+
137
+ # {{ tt.working_dir }} - Absolute path to task's effective working directory
138
+ # This is added after working_dir is resolved to avoid circular dependency
139
+ builtin_vars['working_dir'] = str(working_dir.resolve())
140
+
141
+ return builtin_vars
142
+
143
+ def _prepare_env_with_exports(self, exported_env_vars: dict[str, str] | None = None) -> dict[str, str]:
144
+ """Prepare environment with exported arguments.
145
+
146
+ Args:
147
+ exported_env_vars: Exported arguments to set as environment variables
148
+
149
+ Returns:
150
+ Environment dict with exported args merged
151
+
152
+ Raises:
153
+ ValueError: If an exported arg attempts to override a protected environment variable
154
+ """
155
+ env = os.environ.copy()
156
+ if exported_env_vars:
157
+ # Check for protected environment variable overrides
158
+ for key in exported_env_vars:
159
+ if key in self.PROTECTED_ENV_VARS:
160
+ raise ValueError(
161
+ f"Cannot override protected environment variable: {key}\n"
162
+ f"Protected variables are: {', '.join(sorted(self.PROTECTED_ENV_VARS))}"
163
+ )
164
+ env.update(exported_env_vars)
165
+ return env
52
166
 
53
167
  def _get_platform_default_environment(self) -> tuple[str, list[str]]:
54
168
  """Get default shell and args for current platform.
@@ -133,7 +247,6 @@ class Executor:
133
247
  self,
134
248
  task: Task,
135
249
  args_dict: dict[str, Any],
136
- dep_statuses: dict[str, TaskStatus],
137
250
  force: bool = False,
138
251
  ) -> TaskStatus:
139
252
  """Check if a task needs to run.
@@ -141,16 +254,16 @@ class Executor:
141
254
  A task executes if ANY of these conditions are met:
142
255
  1. Force flag is set (--force)
143
256
  2. Task definition hash differs from cached state
144
- 3. Any explicit inputs have newer mtime than last_run
145
- 4. Any implicit inputs (from deps) have changed
146
- 5. No cached state exists for this task+args combination
147
- 6. Task has no inputs AND no outputs (always runs)
148
- 7. Different arguments than any cached execution
257
+ 3. Environment definition has changed
258
+ 4. Any explicit inputs have newer mtime than last_run
259
+ 5. Any implicit inputs (from deps) have changed
260
+ 6. No cached state exists for this task+args combination
261
+ 7. Task has no inputs AND no outputs (always runs)
262
+ 8. Different arguments than any cached execution
149
263
 
150
264
  Args:
151
265
  task: Task to check
152
266
  args_dict: Arguments for this task execution
153
- dep_statuses: Status of dependencies
154
267
  force: If True, ignore freshness and force execution
155
268
 
156
269
  Returns:
@@ -179,21 +292,23 @@ class Executor:
179
292
  reason="no_outputs",
180
293
  )
181
294
 
182
- # Check if any dependency triggered
183
- if any(status.will_run for status in dep_statuses.values()):
295
+ # Check cached state
296
+ cached_state = self.state.get(cache_key)
297
+ if cached_state is None:
184
298
  return TaskStatus(
185
299
  task_name=task.name,
186
300
  will_run=True,
187
- reason="dependency_triggered",
301
+ reason="never_run",
188
302
  )
189
303
 
190
- # Check cached state
191
- cached_state = self.state.get(cache_key)
192
- if cached_state is None:
304
+ # Check if environment definition has changed
305
+ env_changed = self._check_environment_changed(task, cached_state, effective_env)
306
+ if env_changed:
193
307
  return TaskStatus(
194
308
  task_name=task.name,
195
309
  will_run=True,
196
- reason="never_run",
310
+ reason="environment_changed",
311
+ last_run=datetime.fromtimestamp(cached_state.last_run),
197
312
  )
198
313
 
199
314
  # Check if inputs have changed
@@ -262,23 +377,19 @@ class Executor:
262
377
  # Execute task and all dependencies
263
378
  execution_order = resolve_execution_order(self.recipe, task_name)
264
379
 
265
- # Check status of all tasks
380
+ # Single phase: Check and execute incrementally
266
381
  statuses: dict[str, TaskStatus] = {}
267
382
  for name in execution_order:
268
383
  task = self.recipe.tasks[name]
269
384
 
270
- # Get status of dependencies
271
- dep_statuses = {dep: statuses[dep] for dep in task.deps if dep in statuses}
272
-
273
385
  # Determine task-specific args (only for target task)
274
386
  task_args = args_dict if name == task_name else {}
275
387
 
276
- status = self.check_task_status(task, task_args, dep_statuses, force=force)
388
+ # Check if task needs to run (based on CURRENT filesystem state)
389
+ status = self.check_task_status(task, task_args, force=force)
277
390
  statuses[name] = status
278
391
 
279
- # Execute tasks that need to run
280
- for name in execution_order:
281
- status = statuses[name]
392
+ # Execute immediately if needed
282
393
  if status.will_run:
283
394
  # Warn if re-running due to missing outputs
284
395
  if status.reason == "outputs_missing":
@@ -288,8 +399,6 @@ class Executor:
288
399
  file=sys.stderr,
289
400
  )
290
401
 
291
- task = self.recipe.tasks[name]
292
- task_args = args_dict if name == task_name else {}
293
402
  self._run_task(task, task_args)
294
403
 
295
404
  return statuses
@@ -304,29 +413,77 @@ class Executor:
304
413
  Raises:
305
414
  ExecutionError: If task execution fails
306
415
  """
307
- # Substitute arguments in command
308
- cmd = self._substitute_args(task.cmd, args_dict)
309
-
310
- # Determine working directory
311
- working_dir = self.recipe.project_root / task.working_dir
312
-
313
- # Resolve environment for this task
314
- shell, shell_args, preamble = self._resolve_environment(task)
416
+ # Capture timestamp at task start for consistency (in UTC)
417
+ task_start_time = datetime.now(timezone.utc)
418
+
419
+ # Parse task arguments to identify exported args
420
+ # Note: args_dict already has defaults applied by CLI (cli.py:413-424)
421
+ from tasktree.parser import parse_arg_spec
422
+ exported_args = set()
423
+ regular_args = {}
424
+ exported_env_vars = {}
425
+
426
+ for arg_spec in task.args:
427
+ parsed = parse_arg_spec(arg_spec)
428
+ if parsed.is_exported:
429
+ exported_args.add(parsed.name)
430
+ # Get value and convert to string for environment variable
431
+ # Value should always be in args_dict (CLI applies defaults)
432
+ if parsed.name in args_dict:
433
+ exported_env_vars[parsed.name] = str(args_dict[parsed.name])
434
+ else:
435
+ if parsed.name in args_dict:
436
+ regular_args[parsed.name] = args_dict[parsed.name]
437
+
438
+ # Collect early built-in variables (those that don't depend on working_dir)
439
+ # These can be used in the working_dir field itself
440
+ early_builtin_vars = self._collect_early_builtin_variables(task, task_start_time)
441
+
442
+ # Resolve working directory
443
+ # Validate that working_dir doesn't contain {{ tt.working_dir }} (circular dependency)
444
+ self._validate_no_working_dir_circular_ref(task.working_dir)
445
+ working_dir_str = self._substitute_builtin(task.working_dir, early_builtin_vars)
446
+ working_dir_str = self._substitute_args(working_dir_str, regular_args, exported_args)
447
+ working_dir_str = self._substitute_env(working_dir_str)
448
+ working_dir = self.recipe.project_root / working_dir_str
449
+
450
+ # Collect all built-in variables (including tt.working_dir now that it's resolved)
451
+ builtin_vars = self._collect_builtin_variables(task, working_dir, task_start_time)
452
+
453
+ # Substitute built-in variables, arguments, and environment variables in command
454
+ cmd = self._substitute_builtin(task.cmd, builtin_vars)
455
+ cmd = self._substitute_args(cmd, regular_args, exported_args)
456
+ cmd = self._substitute_env(cmd)
457
+
458
+ # Check if task uses Docker environment
459
+ env_name = self._get_effective_env_name(task)
460
+ env = None
461
+ if env_name:
462
+ env = self.recipe.get_environment(env_name)
315
463
 
316
464
  # Execute command
317
465
  print(f"Running: {task.name}")
318
466
 
319
- # Detect multi-line commands (ignore trailing newlines from YAML folded blocks)
320
- if "\n" in cmd.rstrip():
321
- self._run_multiline_command(cmd, working_dir, task.name, shell, preamble)
467
+ # Route to Docker execution or regular execution
468
+ if env and env.dockerfile:
469
+ # Docker execution path
470
+ self._run_task_in_docker(task, env, cmd, working_dir, exported_env_vars)
322
471
  else:
323
- self._run_single_line_command(cmd, working_dir, task.name, shell, shell_args)
472
+ # Regular execution path
473
+ shell, shell_args, preamble = self._resolve_environment(task)
474
+
475
+ # Detect multi-line commands (ignore trailing newlines from YAML folded blocks)
476
+ if "\n" in cmd.rstrip():
477
+ self._run_multiline_command(cmd, working_dir, task.name, shell, preamble, exported_env_vars)
478
+ else:
479
+ self._run_single_line_command(cmd, working_dir, task.name, shell, shell_args, exported_env_vars)
324
480
 
325
481
  # Update state
326
482
  self._update_state(task, args_dict)
327
483
 
328
484
  def _run_single_line_command(
329
- self, cmd: str, working_dir: Path, task_name: str, shell: str, shell_args: list[str]
485
+ self, cmd: str, working_dir: Path, task_name: str, shell: str, shell_args: list[str],
486
+ exported_env_vars: dict[str, str] | None = None
330
487
  ) -> None:
331
488
  """Execute a single-line command via shell.
332
489
 
@@ -336,10 +493,14 @@ class Executor:
336
493
  task_name: Task name (for error messages)
337
494
  shell: Shell executable to use
338
495
  shell_args: Arguments to pass to shell
496
+ exported_env_vars: Exported arguments to set as environment variables
339
497
 
340
498
  Raises:
341
499
  ExecutionError: If command execution fails
342
500
  """
501
+ # Prepare environment with exported args
502
+ env = self._prepare_env_with_exports(exported_env_vars)
503
+
343
504
  try:
344
505
  # Build command: shell + args + cmd
345
506
  full_cmd = [shell] + shell_args + [cmd]
@@ -348,6 +509,7 @@ class Executor:
348
509
  cwd=working_dir,
349
510
  check=True,
350
511
  capture_output=False,
512
+ env=env,
351
513
  )
352
514
  except subprocess.CalledProcessError as e:
353
515
  raise ExecutionError(
@@ -355,7 +517,8 @@ class Executor:
355
517
  )
356
518
 
357
519
  def _run_multiline_command(
358
- self, cmd: str, working_dir: Path, task_name: str, shell: str, preamble: str
520
+ self, cmd: str, working_dir: Path, task_name: str, shell: str, preamble: str,
521
+ exported_env_vars: dict[str, str] | None = None
359
522
  ) -> None:
360
523
  """Execute a multi-line command via temporary script file.
361
524
 
@@ -365,10 +528,14 @@ class Executor:
365
528
  task_name: Task name (for error messages)
366
529
  shell: Shell to use for script execution
367
530
  preamble: Preamble text to prepend to script
531
+ exported_env_vars: Exported arguments to set as environment variables
368
532
 
369
533
  Raises:
370
534
  ExecutionError: If command execution fails
371
535
  """
536
+ # Prepare environment with exported args
537
+ env = self._prepare_env_with_exports(exported_env_vars)
538
+
372
539
  # Determine file extension based on platform
373
540
  is_windows = platform.system() == "Windows"
374
541
  script_ext = ".bat" if is_windows else ".sh"
@@ -409,6 +576,7 @@ class Executor:
409
576
  cwd=working_dir,
410
577
  check=True,
411
578
  capture_output=False,
579
+ env=env,
412
580
  )
413
581
  except subprocess.CalledProcessError as e:
414
582
  raise ExecutionError(
@@ -421,21 +589,130 @@ class Executor:
421
589
  except OSError:
422
590
  pass # Ignore cleanup errors
423
591
 
424
- def _substitute_args(self, cmd: str, args_dict: dict[str, Any]) -> str:
425
- """Substitute arguments in command string.
592
+ def _run_task_in_docker(
593
+ self, task: Task, env: Any, cmd: str, working_dir: Path,
594
+ exported_env_vars: dict[str, str] | None = None
595
+ ) -> None:
596
+ """Execute task inside Docker container.
426
597
 
427
598
  Args:
428
- cmd: Command template with {{arg}} placeholders
429
- args_dict: Arguments to substitute
599
+ task: Task to execute
600
+ env: Docker environment configuration
601
+ cmd: Command to execute
602
+ working_dir: Host working directory
603
+ exported_env_vars: Exported arguments to set as environment variables
604
+
605
+ Raises:
606
+ ExecutionError: If Docker execution fails
607
+ """
608
+ # Resolve container working directory
609
+ container_working_dir = docker_module.resolve_container_working_dir(
610
+ env.working_dir, task.working_dir
611
+ )
612
+
613
+ # Validate and merge exported args with env vars (exported args take precedence)
614
+ docker_env_vars = env.env_vars.copy() if env.env_vars else {}
615
+ if exported_env_vars:
616
+ # Check for protected environment variable overrides
617
+ for key in exported_env_vars:
618
+ if key in self.PROTECTED_ENV_VARS:
619
+ raise ValueError(
620
+ f"Cannot override protected environment variable: {key}\n"
621
+ f"Protected variables are: {', '.join(sorted(self.PROTECTED_ENV_VARS))}"
622
+ )
623
+ docker_env_vars.update(exported_env_vars)
624
+
625
+ # Create modified environment with merged env vars using dataclass replace
626
+ from dataclasses import replace
627
+ modified_env = replace(env, env_vars=docker_env_vars)
628
+
629
+ # Execute in container
630
+ try:
631
+ self.docker_manager.run_in_container(
632
+ env=modified_env,
633
+ cmd=cmd,
634
+ working_dir=working_dir,
635
+ container_working_dir=container_working_dir,
636
+ )
637
+ except docker_module.DockerError as e:
638
+ raise ExecutionError(str(e)) from e
639
+
640
+ def _validate_no_working_dir_circular_ref(self, text: str) -> None:
641
+ """Validate that working_dir field does not contain {{ tt.working_dir }}.
642
+
643
+ Using {{ tt.working_dir }} in the working_dir field creates a circular dependency.
644
+
645
+ Args:
646
+ text: The working_dir field value to validate
647
+
648
+ Raises:
649
+ ExecutionError: If {{ tt.working_dir }} placeholder is found
650
+ """
651
+ import re
652
+ # Pattern to match {{ tt.working_dir }} specifically
653
+ pattern = re.compile(r'\{\{\s*tt\s*\.\s*working_dir\s*\}\}')
654
+
655
+ if pattern.search(text):
656
+ raise ExecutionError(
657
+ f"Cannot use {{{{ tt.working_dir }}}} in the 'working_dir' field.\n\n"
658
+ f"This creates a circular dependency (working_dir cannot reference itself).\n"
659
+ f"Other built-in variables like {{{{ tt.task_name }}}} or {{{{ tt.timestamp }}}} are allowed."
660
+ )
661
+
662
+ def _substitute_builtin(self, text: str, builtin_vars: dict[str, str]) -> str:
663
+ """Substitute {{ tt.name }} placeholders in text.
664
+
665
+ Built-in variables are resolved at execution time.
666
+
667
+ Args:
668
+ text: Text with {{ tt.name }} placeholders
669
+ builtin_vars: Built-in variable values
670
+
671
+ Returns:
672
+ Text with built-in variables substituted
673
+
674
+ Raises:
675
+ ValueError: If built-in variable is not defined
676
+ """
677
+ from tasktree.substitution import substitute_builtin_variables
678
+ return substitute_builtin_variables(text, builtin_vars)
679
+
680
+ def _substitute_args(self, cmd: str, args_dict: dict[str, Any], exported_args: set[str] | None = None) -> str:
681
+ """Substitute {{ arg.name }} placeholders in command string.
682
+
683
+ Variables are already substituted at parse time by the parser.
684
+ This only handles runtime argument substitution.
685
+
686
+ Args:
687
+ cmd: Command with {{ arg.name }} placeholders
688
+ args_dict: Argument values to substitute (only regular args)
689
+ exported_args: Set of argument names that are exported (not available for substitution)
430
690
 
431
691
  Returns:
432
692
  Command with arguments substituted
693
+
694
+ Raises:
695
+ ValueError: If an exported argument is used in template substitution
696
+ """
697
+ from tasktree.substitution import substitute_arguments
698
+ return substitute_arguments(cmd, args_dict, exported_args)
699
+
700
+ def _substitute_env(self, text: str) -> str:
701
+ """Substitute {{ env.NAME }} placeholders in text.
702
+
703
+ Environment variables are resolved at execution time from os.environ.
704
+
705
+ Args:
706
+ text: Text with {{ env.NAME }} placeholders
707
+
708
+ Returns:
709
+ Text with environment variables substituted
710
+
711
+ Raises:
712
+ ValueError: If environment variable is not set
433
713
  """
434
- result = cmd
435
- for key, value in args_dict.items():
436
- placeholder = f"{{{{{key}}}}}"
437
- result = result.replace(placeholder, str(value))
438
- return result
714
+ from tasktree.substitution import substitute_environment
715
+ return substitute_environment(text)
439
716
 
440
717
  def _get_all_inputs(self, task: Task) -> list[str]:
441
718
  """Get all inputs for a task (explicit + implicit from dependencies).
@@ -451,11 +728,100 @@ class Executor:
451
728
  all_inputs.extend(implicit_inputs)
452
729
  return all_inputs
453
730
 
731
+ def _check_environment_changed(
732
+ self, task: Task, cached_state: TaskState, env_name: str
733
+ ) -> bool:
734
+ """Check if environment definition has changed since last run.
735
+
736
+ For shell environments: checks YAML definition hash
737
+ For Docker environments: checks YAML hash AND Docker image ID
738
+
739
+ Args:
740
+ task: Task to check
741
+ cached_state: Cached state from previous run
742
+ env_name: Effective environment name (from _get_effective_env_name)
743
+
744
+ Returns:
745
+ True if environment definition changed, False otherwise
746
+ """
747
+ # If using platform default (no environment), no definition to track
748
+ if not env_name:
749
+ return False
750
+
751
+ # Get environment definition
752
+ env = self.recipe.get_environment(env_name)
753
+ if env is None:
754
+ # Environment was deleted - treat as changed
755
+ return True
756
+
757
+ # Compute current environment hash (YAML definition)
758
+ from tasktree.hasher import hash_environment_definition
759
+
760
+ current_env_hash = hash_environment_definition(env)
761
+
762
+ # Get cached environment hash
763
+ marker_key = f"_env_hash_{env_name}"
764
+ cached_env_hash = cached_state.input_state.get(marker_key)
765
+
766
+ # If no cached hash (old state file), treat as changed to establish baseline
767
+ if cached_env_hash is None:
768
+ return True
769
+
770
+ # Check if YAML definition changed
771
+ if current_env_hash != cached_env_hash:
772
+ return True # YAML changed, no need to check image
773
+
774
+ # For Docker environments, also check if image ID changed
775
+ if env.dockerfile:
776
+ return self._check_docker_image_changed(env, cached_state, env_name)
777
+
778
+ # Shell environment with unchanged hash
779
+ return False
780
+
781
+ def _check_docker_image_changed(
782
+ self, env: Environment, cached_state: TaskState, env_name: str
783
+ ) -> bool:
784
+ """Check if Docker image ID has changed.
785
+
786
+ Builds the image and compares the resulting image ID with the cached ID.
787
+ This detects changes from unpinned base images, network-dependent builds, etc.
788
+
789
+ Args:
790
+ env: Docker environment definition
791
+ cached_state: Cached state from previous run
792
+ env_name: Environment name
793
+
794
+ Returns:
795
+ True if image ID changed, False otherwise
796
+ """
797
+ # Build/ensure image is built and get its ID
798
+ try:
799
+ image_tag, current_image_id = self.docker_manager.ensure_image_built(env)
800
+ except Exception as e:
801
+ # If we can't build, treat as changed (will fail later with better error)
802
+ return True
803
+
804
+ # Get cached image ID
805
+ image_id_key = f"_docker_image_id_{env_name}"
806
+ cached_image_id = cached_state.input_state.get(image_id_key)
807
+
808
+ # If no cached ID (first run or old state), treat as changed
809
+ if cached_image_id is None:
810
+ return True
811
+
812
+ # Compare image IDs
813
+ return current_image_id != cached_image_id
814
+
454
815
  def _check_inputs_changed(
455
816
  self, task: Task, cached_state: TaskState, all_inputs: list[str]
456
817
  ) -> list[str]:
457
818
  """Check if any input files have changed since last run.
458
819
 
820
+ Handles both regular file inputs and Docker-specific inputs:
821
+ - Regular files: checked via mtime
822
+ - Docker context: checked via directory walk with early exit
823
+ - Dockerfile digests: checked via parsing and comparison
824
+
459
825
  Args:
460
826
  task: Task to check
461
827
  cached_state: Cached state from previous run
@@ -469,7 +835,66 @@ class Executor:
469
835
  # Expand glob patterns
470
836
  input_files = self._expand_globs(all_inputs, task.working_dir)
471
837
 
838
+ # Check if task uses Docker environment
839
+ env_name = self._get_effective_env_name(task)
840
+ docker_env = None
841
+ if env_name:
842
+ docker_env = self.recipe.get_environment(env_name)
843
+ if docker_env and not docker_env.dockerfile:
844
+ docker_env = None # Not a Docker environment
845
+
472
846
  for file_path in input_files:
847
+ # Handle Docker context directory check
848
+ if file_path.startswith("_docker_context_"):
849
+ if docker_env:
850
+ context_name = file_path.replace("_docker_context_", "")
851
+ context_path = self.recipe.project_root / context_name
852
+ dockerignore_path = context_path / ".dockerignore"
853
+
854
+ # Get last context check time
855
+ cached_context_time = cached_state.input_state.get(
856
+ f"_context_{context_name}"
857
+ )
858
+ if cached_context_time is None:
859
+ # Never checked before - consider changed
860
+ changed_files.append(f"Docker context: {context_name}")
861
+ continue
862
+
863
+ # Check if context changed (with early exit optimization)
864
+ if docker_module.context_changed_since(
865
+ context_path, dockerignore_path, cached_context_time
866
+ ):
867
+ changed_files.append(f"Docker context: {context_name}")
868
+ continue
869
+
870
+ # Handle Docker Dockerfile digest check
871
+ if file_path.startswith("_docker_dockerfile_"):
872
+ if docker_env:
873
+ dockerfile_name = file_path.replace("_docker_dockerfile_", "")
874
+ dockerfile_path = self.recipe.project_root / dockerfile_name
875
+
876
+ try:
877
+ dockerfile_content = dockerfile_path.read_text()
878
+ current_digests = set(
879
+ docker_module.parse_base_image_digests(dockerfile_content)
880
+ )
881
+
882
+ # Get cached digests
883
+ cached_digests = set()
884
+ for key in cached_state.input_state:
885
+ if key.startswith("_digest_"):
886
+ digest = key.replace("_digest_", "")
887
+ cached_digests.add(digest)
888
+
889
+ # Check if digests changed
890
+ if current_digests != cached_digests:
891
+ changed_files.append(f"Docker base image digests in {dockerfile_name}")
892
+ except (OSError, IOError):
893
+ # Can't read Dockerfile - consider changed
894
+ changed_files.append(f"Dockerfile: {dockerfile_name}")
895
+ continue
896
+
897
+ # Regular file check
473
898
  file_path_obj = self.recipe.project_root / task.working_dir / file_path
474
899
  if not file_path_obj.exists():
475
900
  continue
@@ -549,10 +974,61 @@ class Executor:
549
974
 
550
975
  input_state = {}
551
976
  for file_path in input_files:
977
+ # Skip Docker special markers (handled separately below)
978
+ if file_path.startswith("_docker_"):
979
+ continue
980
+
552
981
  file_path_obj = self.recipe.project_root / task.working_dir / file_path
553
982
  if file_path_obj.exists():
554
983
  input_state[file_path] = file_path_obj.stat().st_mtime
555
984
 
985
+ # Record Docker-specific inputs if task uses Docker environment
986
+ env_name = self._get_effective_env_name(task)
987
+ if env_name:
988
+ env = self.recipe.get_environment(env_name)
989
+ if env and env.dockerfile:
990
+ # Record Dockerfile mtime
991
+ dockerfile_path = self.recipe.project_root / env.dockerfile
992
+ if dockerfile_path.exists():
993
+ input_state[env.dockerfile] = dockerfile_path.stat().st_mtime
994
+
995
+ # Record .dockerignore mtime if exists
996
+ context_path = self.recipe.project_root / env.context
997
+ dockerignore_path = context_path / ".dockerignore"
998
+ if dockerignore_path.exists():
999
+ relative_dockerignore = str(
1000
+ dockerignore_path.relative_to(self.recipe.project_root)
1001
+ )
1002
+ input_state[relative_dockerignore] = dockerignore_path.stat().st_mtime
1003
+
1004
+ # Record context check timestamp
1005
+ input_state[f"_context_{env.context}"] = time.time()
1006
+
1007
+ # Parse and record base image digests from Dockerfile
1008
+ try:
1009
+ dockerfile_content = dockerfile_path.read_text()
1010
+ digests = docker_module.parse_base_image_digests(dockerfile_content)
1011
+ for digest in digests:
1012
+ # Store digest with Dockerfile's mtime
1013
+ input_state[f"_digest_{digest}"] = dockerfile_path.stat().st_mtime
1014
+ except (OSError, IOError):
1015
+ # If we can't read Dockerfile, skip digest tracking
1016
+ pass
1017
+
1018
+ # Record environment definition hash for all environments (shell and Docker)
1019
+ if env:
1020
+ from tasktree.hasher import hash_environment_definition
1021
+
1022
+ env_hash = hash_environment_definition(env)
1023
+ input_state[f"_env_hash_{env_name}"] = env_hash
1024
+
1025
+ # For Docker environments, also store the image ID
1026
+ if env.dockerfile:
1027
+ # Image was already built during check phase or task execution
1028
+ if env_name in self.docker_manager._built_images:
1029
+ image_tag, image_id = self.docker_manager._built_images[env_name]
1030
+ input_state[f"_docker_image_id_{env_name}"] = image_id
1031
+
556
1032
  # Create new state
557
1033
  state = TaskState(
558
1034
  last_run=time.time(),