tasktree 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tasktree/executor.py CHANGED
@@ -13,6 +13,7 @@ from datetime import datetime
13
13
  from pathlib import Path
14
14
  from typing import Any
15
15
 
16
+ from tasktree import docker as docker_module
16
17
  from tasktree.graph import get_implicit_inputs, resolve_execution_order
17
18
  from tasktree.hasher import hash_args, hash_task, make_cache_key
18
19
  from tasktree.parser import Recipe, Task
@@ -26,7 +27,7 @@ class TaskStatus:
26
27
  task_name: str
27
28
  will_run: bool
28
29
  reason: str # "fresh", "inputs_changed", "definition_changed",
29
- # "never_run", "dependency_triggered", "no_outputs"
30
+ # "never_run", "no_outputs", "outputs_missing", "forced", "environment_changed"
30
31
  changed_files: list[str] = field(default_factory=list)
31
32
  last_run: datetime | None = None
32
33
 
@@ -49,6 +50,7 @@ class Executor:
49
50
  """
50
51
  self.recipe = recipe
51
52
  self.state = state_manager
53
+ self.docker_manager = docker_module.DockerManager(recipe.project_root)
52
54
 
53
55
  def _get_platform_default_environment(self) -> tuple[str, list[str]]:
54
56
  """Get default shell and args for current platform.
@@ -133,7 +135,6 @@ class Executor:
133
135
  self,
134
136
  task: Task,
135
137
  args_dict: dict[str, Any],
136
- dep_statuses: dict[str, TaskStatus],
137
138
  force: bool = False,
138
139
  ) -> TaskStatus:
139
140
  """Check if a task needs to run.
@@ -141,16 +142,16 @@ class Executor:
141
142
  A task executes if ANY of these conditions are met:
142
143
  1. Force flag is set (--force)
143
144
  2. Task definition hash differs from cached state
144
- 3. Any explicit inputs have newer mtime than last_run
145
- 4. Any implicit inputs (from deps) have changed
146
- 5. No cached state exists for this task+args combination
147
- 6. Task has no inputs AND no outputs (always runs)
148
- 7. Different arguments than any cached execution
145
+ 3. Environment definition has changed
146
+ 4. Any explicit inputs have newer mtime than last_run
147
+ 5. Any implicit inputs (from deps) have changed
148
+ 6. No cached state exists for this task+args combination
149
+ 7. Task has no inputs AND no outputs (always runs)
150
+ 8. Different arguments than any cached execution
149
151
 
150
152
  Args:
151
153
  task: Task to check
152
154
  args_dict: Arguments for this task execution
153
- dep_statuses: Status of dependencies
154
155
  force: If True, ignore freshness and force execution
155
156
 
156
157
  Returns:
@@ -179,21 +180,23 @@ class Executor:
179
180
  reason="no_outputs",
180
181
  )
181
182
 
182
- # Check if any dependency triggered
183
- if any(status.will_run for status in dep_statuses.values()):
183
+ # Check cached state
184
+ cached_state = self.state.get(cache_key)
185
+ if cached_state is None:
184
186
  return TaskStatus(
185
187
  task_name=task.name,
186
188
  will_run=True,
187
- reason="dependency_triggered",
189
+ reason="never_run",
188
190
  )
189
191
 
190
- # Check cached state
191
- cached_state = self.state.get(cache_key)
192
- if cached_state is None:
192
+ # Check if environment definition has changed
193
+ env_changed = self._check_environment_changed(task, cached_state, effective_env)
194
+ if env_changed:
193
195
  return TaskStatus(
194
196
  task_name=task.name,
195
197
  will_run=True,
196
- reason="never_run",
198
+ reason="environment_changed",
199
+ last_run=datetime.fromtimestamp(cached_state.last_run),
197
200
  )
198
201
 
199
202
  # Check if inputs have changed
@@ -262,23 +265,19 @@ class Executor:
262
265
  # Execute task and all dependencies
263
266
  execution_order = resolve_execution_order(self.recipe, task_name)
264
267
 
265
- # Check status of all tasks
268
+ # Single phase: Check and execute incrementally
266
269
  statuses: dict[str, TaskStatus] = {}
267
270
  for name in execution_order:
268
271
  task = self.recipe.tasks[name]
269
272
 
270
- # Get status of dependencies
271
- dep_statuses = {dep: statuses[dep] for dep in task.deps if dep in statuses}
272
-
273
273
  # Determine task-specific args (only for target task)
274
274
  task_args = args_dict if name == task_name else {}
275
275
 
276
- status = self.check_task_status(task, task_args, dep_statuses, force=force)
276
+ # Check if task needs to run (based on CURRENT filesystem state)
277
+ status = self.check_task_status(task, task_args, force=force)
277
278
  statuses[name] = status
278
279
 
279
- # Execute tasks that need to run
280
- for name in execution_order:
281
- status = statuses[name]
280
+ # Execute immediately if needed
282
281
  if status.will_run:
283
282
  # Warn if re-running due to missing outputs
284
283
  if status.reason == "outputs_missing":
@@ -288,8 +287,6 @@ class Executor:
288
287
  file=sys.stderr,
289
288
  )
290
289
 
291
- task = self.recipe.tasks[name]
292
- task_args = args_dict if name == task_name else {}
293
290
  self._run_task(task, task_args)
294
291
 
295
292
  return statuses
@@ -310,17 +307,28 @@ class Executor:
310
307
  # Determine working directory
311
308
  working_dir = self.recipe.project_root / task.working_dir
312
309
 
313
- # Resolve environment for this task
314
- shell, shell_args, preamble = self._resolve_environment(task)
310
+ # Check if task uses Docker environment
311
+ env_name = self._get_effective_env_name(task)
312
+ env = None
313
+ if env_name:
314
+ env = self.recipe.get_environment(env_name)
315
315
 
316
316
  # Execute command
317
317
  print(f"Running: {task.name}")
318
318
 
319
- # Detect multi-line commands (ignore trailing newlines from YAML folded blocks)
320
- if "\n" in cmd.rstrip():
321
- self._run_multiline_command(cmd, working_dir, task.name, shell, preamble)
319
+ # Route to Docker execution or regular execution
320
+ if env and env.dockerfile:
321
+ # Docker execution path
322
+ self._run_task_in_docker(task, env, cmd, working_dir)
322
323
  else:
323
- self._run_single_line_command(cmd, working_dir, task.name, shell, shell_args)
324
+ # Regular execution path
325
+ shell, shell_args, preamble = self._resolve_environment(task)
326
+
327
+ # Detect multi-line commands (ignore trailing newlines from YAML folded blocks)
328
+ if "\n" in cmd.rstrip():
329
+ self._run_multiline_command(cmd, working_dir, task.name, shell, preamble)
330
+ else:
331
+ self._run_single_line_command(cmd, working_dir, task.name, shell, shell_args)
324
332
 
325
333
  # Update state
326
334
  self._update_state(task, args_dict)
@@ -421,6 +429,36 @@ class Executor:
421
429
  except OSError:
422
430
  pass # Ignore cleanup errors
423
431
 
432
+ def _run_task_in_docker(
433
+ self, task: Task, env: Any, cmd: str, working_dir: Path
434
+ ) -> None:
435
+ """Execute task inside Docker container.
436
+
437
+ Args:
438
+ task: Task to execute
439
+ env: Docker environment configuration
440
+ cmd: Command to execute
441
+ working_dir: Host working directory
442
+
443
+ Raises:
444
+ ExecutionError: If Docker execution fails
445
+ """
446
+ # Resolve container working directory
447
+ container_working_dir = docker_module.resolve_container_working_dir(
448
+ env.working_dir, task.working_dir
449
+ )
450
+
451
+ # Execute in container
452
+ try:
453
+ self.docker_manager.run_in_container(
454
+ env=env,
455
+ cmd=cmd,
456
+ working_dir=working_dir,
457
+ container_working_dir=container_working_dir,
458
+ )
459
+ except docker_module.DockerError as e:
460
+ raise ExecutionError(str(e)) from e
461
+
424
462
  def _substitute_args(self, cmd: str, args_dict: dict[str, Any]) -> str:
425
463
  """Substitute arguments in command string.
426
464
 
@@ -451,11 +489,100 @@ class Executor:
451
489
  all_inputs.extend(implicit_inputs)
452
490
  return all_inputs
453
491
 
492
+ def _check_environment_changed(
493
+ self, task: Task, cached_state: TaskState, env_name: str
494
+ ) -> bool:
495
+ """Check if environment definition has changed since last run.
496
+
497
+ For shell environments: checks YAML definition hash
498
+ For Docker environments: checks YAML hash AND Docker image ID
499
+
500
+ Args:
501
+ task: Task to check
502
+ cached_state: Cached state from previous run
503
+ env_name: Effective environment name (from _get_effective_env_name)
504
+
505
+ Returns:
506
+ True if environment definition changed, False otherwise
507
+ """
508
+ # If using platform default (no environment), no definition to track
509
+ if not env_name:
510
+ return False
511
+
512
+ # Get environment definition
513
+ env = self.recipe.get_environment(env_name)
514
+ if env is None:
515
+ # Environment was deleted - treat as changed
516
+ return True
517
+
518
+ # Compute current environment hash (YAML definition)
519
+ from tasktree.hasher import hash_environment_definition
520
+
521
+ current_env_hash = hash_environment_definition(env)
522
+
523
+ # Get cached environment hash
524
+ marker_key = f"_env_hash_{env_name}"
525
+ cached_env_hash = cached_state.input_state.get(marker_key)
526
+
527
+ # If no cached hash (old state file), treat as changed to establish baseline
528
+ if cached_env_hash is None:
529
+ return True
530
+
531
+ # Check if YAML definition changed
532
+ if current_env_hash != cached_env_hash:
533
+ return True # YAML changed, no need to check image
534
+
535
+ # For Docker environments, also check if image ID changed
536
+ if env.dockerfile:
537
+ return self._check_docker_image_changed(env, cached_state, env_name)
538
+
539
+ # Shell environment with unchanged hash
540
+ return False
541
+
542
+ def _check_docker_image_changed(
543
+ self, env: Environment, cached_state: TaskState, env_name: str
544
+ ) -> bool:
545
+ """Check if Docker image ID has changed.
546
+
547
+ Builds the image and compares the resulting image ID with the cached ID.
548
+ This detects changes from unpinned base images, network-dependent builds, etc.
549
+
550
+ Args:
551
+ env: Docker environment definition
552
+ cached_state: Cached state from previous run
553
+ env_name: Environment name
554
+
555
+ Returns:
556
+ True if image ID changed, False otherwise
557
+ """
558
+ # Build/ensure image is built and get its ID
559
+ try:
560
+ image_tag, current_image_id = self.docker_manager.ensure_image_built(env)
561
+ except Exception as e:
562
+ # If we can't build, treat as changed (will fail later with better error)
563
+ return True
564
+
565
+ # Get cached image ID
566
+ image_id_key = f"_docker_image_id_{env_name}"
567
+ cached_image_id = cached_state.input_state.get(image_id_key)
568
+
569
+ # If no cached ID (first run or old state), treat as changed
570
+ if cached_image_id is None:
571
+ return True
572
+
573
+ # Compare image IDs
574
+ return current_image_id != cached_image_id
575
+
454
576
  def _check_inputs_changed(
455
577
  self, task: Task, cached_state: TaskState, all_inputs: list[str]
456
578
  ) -> list[str]:
457
579
  """Check if any input files have changed since last run.
458
580
 
581
+ Handles both regular file inputs and Docker-specific inputs:
582
+ - Regular files: checked via mtime
583
+ - Docker context: checked via directory walk with early exit
584
+ - Dockerfile digests: checked via parsing and comparison
585
+
459
586
  Args:
460
587
  task: Task to check
461
588
  cached_state: Cached state from previous run
@@ -469,7 +596,66 @@ class Executor:
469
596
  # Expand glob patterns
470
597
  input_files = self._expand_globs(all_inputs, task.working_dir)
471
598
 
599
+ # Check if task uses Docker environment
600
+ env_name = self._get_effective_env_name(task)
601
+ docker_env = None
602
+ if env_name:
603
+ docker_env = self.recipe.get_environment(env_name)
604
+ if docker_env and not docker_env.dockerfile:
605
+ docker_env = None # Not a Docker environment
606
+
472
607
  for file_path in input_files:
608
+ # Handle Docker context directory check
609
+ if file_path.startswith("_docker_context_"):
610
+ if docker_env:
611
+ context_name = file_path.replace("_docker_context_", "")
612
+ context_path = self.recipe.project_root / context_name
613
+ dockerignore_path = context_path / ".dockerignore"
614
+
615
+ # Get last context check time
616
+ cached_context_time = cached_state.input_state.get(
617
+ f"_context_{context_name}"
618
+ )
619
+ if cached_context_time is None:
620
+ # Never checked before - consider changed
621
+ changed_files.append(f"Docker context: {context_name}")
622
+ continue
623
+
624
+ # Check if context changed (with early exit optimization)
625
+ if docker_module.context_changed_since(
626
+ context_path, dockerignore_path, cached_context_time
627
+ ):
628
+ changed_files.append(f"Docker context: {context_name}")
629
+ continue
630
+
631
+ # Handle Docker Dockerfile digest check
632
+ if file_path.startswith("_docker_dockerfile_"):
633
+ if docker_env:
634
+ dockerfile_name = file_path.replace("_docker_dockerfile_", "")
635
+ dockerfile_path = self.recipe.project_root / dockerfile_name
636
+
637
+ try:
638
+ dockerfile_content = dockerfile_path.read_text()
639
+ current_digests = set(
640
+ docker_module.parse_base_image_digests(dockerfile_content)
641
+ )
642
+
643
+ # Get cached digests
644
+ cached_digests = set()
645
+ for key in cached_state.input_state:
646
+ if key.startswith("_digest_"):
647
+ digest = key.replace("_digest_", "")
648
+ cached_digests.add(digest)
649
+
650
+ # Check if digests changed
651
+ if current_digests != cached_digests:
652
+ changed_files.append(f"Docker base image digests in {dockerfile_name}")
653
+ except (OSError, IOError):
654
+ # Can't read Dockerfile - consider changed
655
+ changed_files.append(f"Dockerfile: {dockerfile_name}")
656
+ continue
657
+
658
+ # Regular file check
473
659
  file_path_obj = self.recipe.project_root / task.working_dir / file_path
474
660
  if not file_path_obj.exists():
475
661
  continue
@@ -549,10 +735,61 @@ class Executor:
549
735
 
550
736
  input_state = {}
551
737
  for file_path in input_files:
738
+ # Skip Docker special markers (handled separately below)
739
+ if file_path.startswith("_docker_"):
740
+ continue
741
+
552
742
  file_path_obj = self.recipe.project_root / task.working_dir / file_path
553
743
  if file_path_obj.exists():
554
744
  input_state[file_path] = file_path_obj.stat().st_mtime
555
745
 
746
+ # Record Docker-specific inputs if task uses Docker environment
747
+ env_name = self._get_effective_env_name(task)
748
+ if env_name:
749
+ env = self.recipe.get_environment(env_name)
750
+ if env and env.dockerfile:
751
+ # Record Dockerfile mtime
752
+ dockerfile_path = self.recipe.project_root / env.dockerfile
753
+ if dockerfile_path.exists():
754
+ input_state[env.dockerfile] = dockerfile_path.stat().st_mtime
755
+
756
+ # Record .dockerignore mtime if exists
757
+ context_path = self.recipe.project_root / env.context
758
+ dockerignore_path = context_path / ".dockerignore"
759
+ if dockerignore_path.exists():
760
+ relative_dockerignore = str(
761
+ dockerignore_path.relative_to(self.recipe.project_root)
762
+ )
763
+ input_state[relative_dockerignore] = dockerignore_path.stat().st_mtime
764
+
765
+ # Record context check timestamp
766
+ input_state[f"_context_{env.context}"] = time.time()
767
+
768
+ # Parse and record base image digests from Dockerfile
769
+ try:
770
+ dockerfile_content = dockerfile_path.read_text()
771
+ digests = docker_module.parse_base_image_digests(dockerfile_content)
772
+ for digest in digests:
773
+ # Store digest with Dockerfile's mtime
774
+ input_state[f"_digest_{digest}"] = dockerfile_path.stat().st_mtime
775
+ except (OSError, IOError):
776
+ # If we can't read Dockerfile, skip digest tracking
777
+ pass
778
+
779
+ # Record environment definition hash for all environments (shell and Docker)
780
+ if env:
781
+ from tasktree.hasher import hash_environment_definition
782
+
783
+ env_hash = hash_environment_definition(env)
784
+ input_state[f"_env_hash_{env_name}"] = env_hash
785
+
786
+ # For Docker environments, also store the image ID
787
+ if env.dockerfile:
788
+ # Image was already built during check phase or task execution
789
+ if env_name in self.docker_manager._built_images:
790
+ image_tag, image_id = self.docker_manager._built_images[env_name]
791
+ input_state[f"_docker_image_id_{env_name}"] = image_id
792
+
556
793
  # Create new state
557
794
  state = TaskState(
558
795
  last_run=time.time(),
tasktree/graph.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """Dependency resolution using topological sorting."""
2
2
 
3
3
  from graphlib import TopologicalSorter
4
+ from pathlib import Path
4
5
 
5
6
  from tasktree.parser import Recipe, Task
6
7
 
@@ -71,16 +72,21 @@ def get_implicit_inputs(recipe: Recipe, task: Task) -> list[str]:
71
72
  Tasks automatically inherit inputs from dependencies:
72
73
  1. All outputs from dependency tasks become implicit inputs
73
74
  2. All inputs from dependency tasks that don't declare outputs are inherited
75
+ 3. If task uses a Docker environment, Docker artifacts become implicit inputs:
76
+ - Dockerfile
77
+ - .dockerignore (if present)
78
+ - Special markers for context directory and base image digests
74
79
 
75
80
  Args:
76
81
  recipe: Parsed recipe containing all tasks
77
82
  task: Task to get implicit inputs for
78
83
 
79
84
  Returns:
80
- List of glob patterns for implicit inputs
85
+ List of glob patterns for implicit inputs, including Docker-specific markers
81
86
  """
82
87
  implicit_inputs = []
83
88
 
89
+ # Inherit from dependencies
84
90
  for dep_name in task.deps:
85
91
  dep_task = recipe.tasks.get(dep_name)
86
92
  if dep_task is None:
@@ -93,6 +99,29 @@ def get_implicit_inputs(recipe: Recipe, task: Task) -> list[str]:
93
99
  elif dep_task.inputs:
94
100
  implicit_inputs.extend(dep_task.inputs)
95
101
 
102
+ # Add Docker-specific implicit inputs if task uses Docker environment
103
+ env_name = task.env or recipe.default_env
104
+ if env_name:
105
+ env = recipe.get_environment(env_name)
106
+ if env and env.dockerfile:
107
+ # Add Dockerfile as input
108
+ implicit_inputs.append(env.dockerfile)
109
+
110
+ # Add .dockerignore if it exists in context directory
111
+ context_path = recipe.project_root / env.context
112
+ dockerignore_path = context_path / ".dockerignore"
113
+ if dockerignore_path.exists():
114
+ relative_dockerignore = str(
115
+ dockerignore_path.relative_to(recipe.project_root)
116
+ )
117
+ implicit_inputs.append(relative_dockerignore)
118
+
119
+ # Add special markers for context directory and digest tracking
120
+ # These are tracked differently in state management (not file paths)
121
+ # The executor will handle these specially
122
+ implicit_inputs.append(f"_docker_context_{env.context}")
123
+ implicit_inputs.append(f"_docker_dockerfile_{env.dockerfile}")
124
+
96
125
  return implicit_inputs
97
126
 
98
127
 
tasktree/hasher.py CHANGED
@@ -21,6 +21,33 @@ def hash_args(args_dict: dict[str, Any]) -> str:
21
21
  return hashlib.sha256(serialized.encode()).hexdigest()[:8]
22
22
 
23
23
 
24
+ def hash_environment_definition(env) -> str:
25
+ """Hash environment definition fields that affect task execution.
26
+
27
+ Args:
28
+ env: Environment to hash
29
+
30
+ Returns:
31
+ 16-character hash of environment definition
32
+ """
33
+ # Import inside function to avoid circular dependency
34
+ from tasktree.parser import Environment
35
+
36
+ data = {
37
+ "shell": env.shell,
38
+ "args": sorted(env.args), # Sort for determinism
39
+ "preamble": env.preamble,
40
+ "dockerfile": env.dockerfile,
41
+ "context": env.context,
42
+ "volumes": sorted(env.volumes),
43
+ "ports": sorted(env.ports),
44
+ "env_vars": dict(sorted(env.env_vars.items())),
45
+ "working_dir": env.working_dir,
46
+ }
47
+ serialized = json.dumps(data, sort_keys=True, separators=(",", ":"))
48
+ return hashlib.sha256(serialized.encode()).hexdigest()[:16]
49
+
50
+
24
51
  def make_cache_key(task_hash: str, args_hash: Optional[str] = None) -> str:
25
52
  if args_hash:
26
53
  return f"{task_hash}__{args_hash}"