tasktree 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tasktree/docker.py ADDED
@@ -0,0 +1,413 @@
1
+ """Docker integration for Task Tree.
2
+
3
+ Provides Docker image building and container execution capabilities.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import os
9
+ import re
10
+ import subprocess
11
+ import time
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING
14
+
15
+ try:
16
+ import pathspec
17
+ except ImportError:
18
+ pathspec = None # type: ignore
19
+
20
+ if TYPE_CHECKING:
21
+ from tasktree.parser import Environment
22
+
23
+
24
+ class DockerError(Exception):
25
+ """Raised when Docker operations fail."""
26
+
27
+ pass
28
+
29
+
30
+ class DockerManager:
31
+ """Manages Docker image building and container execution."""
32
+
33
+ def __init__(self, project_root: Path):
34
+ """Initialize Docker manager.
35
+
36
+ Args:
37
+ project_root: Root directory of the project (where tasktree.yaml is located)
38
+ """
39
+ self._project_root = project_root
40
+ self._built_images: dict[str, tuple[str, str]] = {} # env_name -> (image_tag, image_id) cache
41
+
42
+ def ensure_image_built(self, env: Environment) -> tuple[str, str]:
43
+ """Build Docker image if not already built this invocation.
44
+
45
+ Args:
46
+ env: Environment definition with dockerfile and context
47
+
48
+ Returns:
49
+ Tuple of (image_tag, image_id)
50
+ - image_tag: Tag like "tt-env-builder"
51
+ - image_id: Full image ID like "sha256:abc123..."
52
+
53
+ Raises:
54
+ DockerError: If docker command not available or build fails
55
+ """
56
+ # Check if already built this invocation
57
+ if env.name in self._built_images:
58
+ tag, image_id = self._built_images[env.name]
59
+ return tag, image_id
60
+
61
+ # Check if docker is available
62
+ self._check_docker_available()
63
+
64
+ # Resolve paths
65
+ dockerfile_path = self._project_root / env.dockerfile
66
+ context_path = self._project_root / env.context
67
+
68
+ # Generate image tag
69
+ image_tag = f"tt-env-{env.name}"
70
+
71
+ # Build the image
72
+ try:
73
+ subprocess.run(
74
+ [
75
+ "docker",
76
+ "build",
77
+ "-t",
78
+ image_tag,
79
+ "-f",
80
+ str(dockerfile_path),
81
+ str(context_path),
82
+ ],
83
+ check=True,
84
+ capture_output=False, # Show build output to user
85
+ )
86
+ except subprocess.CalledProcessError as e:
87
+ raise DockerError(
88
+ f"Failed to build Docker image for environment '{env.name}': "
89
+ f"docker build exited with code {e.returncode}"
90
+ ) from e
91
+ except FileNotFoundError:
92
+ raise DockerError(
93
+ "Docker command not found. Please install Docker and ensure it's in your PATH."
94
+ )
95
+
96
+ # Get the image ID
97
+ image_id = self._get_image_id(image_tag)
98
+
99
+ # Cache both tag and ID
100
+ self._built_images[env.name] = (image_tag, image_id)
101
+ return image_tag, image_id
102
+
103
+ def run_in_container(
104
+ self,
105
+ env: Environment,
106
+ cmd: str,
107
+ working_dir: Path,
108
+ container_working_dir: str,
109
+ ) -> subprocess.CompletedProcess:
110
+ """Execute command inside Docker container.
111
+
112
+ Args:
113
+ env: Environment definition
114
+ cmd: Command to execute
115
+ working_dir: Host working directory (for resolving relative volume paths)
116
+ container_working_dir: Working directory inside container
117
+
118
+ Returns:
119
+ CompletedProcess from subprocess.run
120
+
121
+ Raises:
122
+ DockerError: If docker run fails
123
+ """
124
+ # Ensure image is built (returns tag and ID)
125
+ image_tag, image_id = self.ensure_image_built(env)
126
+
127
+ # Build docker run command
128
+ docker_cmd = ["docker", "run", "--rm"]
129
+
130
+ # Add volume mounts
131
+ for volume in env.volumes:
132
+ # Resolve volume paths
133
+ resolved_volume = self._resolve_volume_mount(volume)
134
+ docker_cmd.extend(["-v", resolved_volume])
135
+
136
+ # Add port mappings
137
+ for port in env.ports:
138
+ docker_cmd.extend(["-p", port])
139
+
140
+ # Add environment variables
141
+ for var_name, var_value in env.env_vars.items():
142
+ docker_cmd.extend(["-e", f"{var_name}={var_value}"])
143
+
144
+ # Add working directory
145
+ if container_working_dir:
146
+ docker_cmd.extend(["-w", container_working_dir])
147
+
148
+ # Add image tag
149
+ docker_cmd.append(image_tag)
150
+
151
+ # Add shell and command
152
+ shell = env.shell or "sh"
153
+ docker_cmd.extend([shell, "-c", cmd])
154
+
155
+ # Execute
156
+ try:
157
+ result = subprocess.run(
158
+ docker_cmd,
159
+ cwd=working_dir,
160
+ check=True,
161
+ capture_output=False, # Stream output to terminal
162
+ )
163
+ return result
164
+ except subprocess.CalledProcessError as e:
165
+ raise DockerError(
166
+ f"Docker container execution failed with exit code {e.returncode}"
167
+ ) from e
168
+
169
+ def _resolve_volume_mount(self, volume: str) -> str:
170
+ """Resolve volume mount specification.
171
+
172
+ Handles:
173
+ - Relative paths (resolved relative to project_root)
174
+ - Home directory expansion (~)
175
+ - Absolute paths (used as-is)
176
+
177
+ Args:
178
+ volume: Volume specification (e.g., "./src:/workspace/src" or "~/.cargo:/root/.cargo")
179
+
180
+ Returns:
181
+ Resolved volume specification with absolute host path
182
+ """
183
+ if ":" not in volume:
184
+ raise ValueError(
185
+ f"Invalid volume specification: '{volume}'. "
186
+ f"Format should be 'host_path:container_path'"
187
+ )
188
+
189
+ host_path, container_path = volume.split(":", 1)
190
+
191
+ # Expand home directory
192
+ if host_path.startswith("~"):
193
+ host_path = os.path.expanduser(host_path)
194
+ resolved_host_path = Path(host_path)
195
+ # Resolve relative paths
196
+ elif not Path(host_path).is_absolute():
197
+ resolved_host_path = self._project_root / host_path
198
+ # Absolute paths used as-is
199
+ else:
200
+ resolved_host_path = Path(host_path)
201
+
202
+ return f"{resolved_host_path}:{container_path}"
203
+
204
+ def _check_docker_available(self) -> None:
205
+ """Check if docker command is available.
206
+
207
+ Raises:
208
+ DockerError: If docker is not available
209
+ """
210
+ try:
211
+ subprocess.run(
212
+ ["docker", "--version"],
213
+ check=True,
214
+ capture_output=True,
215
+ text=True,
216
+ )
217
+ except (subprocess.CalledProcessError, FileNotFoundError):
218
+ raise DockerError(
219
+ "Docker is not available. Please install Docker and ensure it's running.\n"
220
+ "Visit https://docs.docker.com/get-docker/ for installation instructions."
221
+ )
222
+
223
+ def _get_image_id(self, image_tag: str) -> str:
224
+ """Get the full image ID for a given tag.
225
+
226
+ Args:
227
+ image_tag: Docker image tag (e.g., "tt-env-builder")
228
+
229
+ Returns:
230
+ Full image ID (e.g., "sha256:abc123def456...")
231
+
232
+ Raises:
233
+ DockerError: If cannot inspect image
234
+ """
235
+ try:
236
+ result = subprocess.run(
237
+ ["docker", "inspect", "--format={{.Id}}", image_tag],
238
+ check=True,
239
+ capture_output=True,
240
+ text=True,
241
+ )
242
+ image_id = result.stdout.strip()
243
+ return image_id
244
+ except subprocess.CalledProcessError as e:
245
+ raise DockerError(f"Failed to inspect image {image_tag}: {e.stderr}")
246
+
247
+
248
+ def is_docker_environment(env: Environment) -> bool:
249
+ """Check if environment is Docker-based.
250
+
251
+ Args:
252
+ env: Environment to check
253
+
254
+ Returns:
255
+ True if environment has a dockerfile field, False otherwise
256
+ """
257
+ return bool(env.dockerfile)
258
+
259
+
260
+ def resolve_container_working_dir(
261
+ env_working_dir: str, task_working_dir: str
262
+ ) -> str:
263
+ """Resolve working directory inside container.
264
+
265
+ Combines environment's working_dir with task's working_dir:
266
+ - If task specifies working_dir: container_dir = env_working_dir / task_working_dir
267
+ - If task doesn't specify: container_dir = env_working_dir
268
+ - If neither specify: container_dir = "/" (Docker default)
269
+
270
+ Args:
271
+ env_working_dir: Working directory from environment definition
272
+ task_working_dir: Working directory from task definition
273
+
274
+ Returns:
275
+ Resolved working directory path
276
+ """
277
+ if not env_working_dir and not task_working_dir:
278
+ return "/"
279
+
280
+ if not task_working_dir:
281
+ return env_working_dir
282
+
283
+ # Combine paths
284
+ if env_working_dir:
285
+ # Join paths using POSIX separator (works inside Linux containers)
286
+ return f"{env_working_dir.rstrip('/')}/{task_working_dir.lstrip('/')}"
287
+ else:
288
+ return f"/{task_working_dir.lstrip('/')}"
289
+
290
+
291
+ def parse_dockerignore(dockerignore_path: Path) -> "pathspec.PathSpec | None":
292
+ """Parse .dockerignore file into pathspec matcher.
293
+
294
+ Args:
295
+ dockerignore_path: Path to .dockerignore file
296
+
297
+ Returns:
298
+ PathSpec object for matching, or None if file doesn't exist or pathspec not available
299
+ """
300
+ if pathspec is None:
301
+ # pathspec library not available - can't parse .dockerignore
302
+ return None
303
+
304
+ if not dockerignore_path.exists():
305
+ return pathspec.PathSpec([]) # Empty matcher
306
+
307
+ try:
308
+ with open(dockerignore_path, "r") as f:
309
+ spec = pathspec.PathSpec.from_lines("gitwildmatch", f)
310
+ return spec
311
+ except Exception:
312
+ # Invalid patterns - return empty matcher rather than failing
313
+ return pathspec.PathSpec([])
314
+
315
+
316
+ def context_changed_since(
317
+ context_path: Path,
318
+ dockerignore_path: Path | None,
319
+ last_run_time: float,
320
+ ) -> bool:
321
+ """Check if any file in Docker build context has changed since last run.
322
+
323
+ Uses early-exit optimization: stops on first changed file found.
324
+
325
+ Args:
326
+ context_path: Path to Docker build context directory
327
+ dockerignore_path: Optional path to .dockerignore file
328
+ last_run_time: Unix timestamp of last task run
329
+
330
+ Returns:
331
+ True if any file changed, False otherwise
332
+ """
333
+ # Parse .dockerignore
334
+ dockerignore_spec = None
335
+ if dockerignore_path:
336
+ dockerignore_spec = parse_dockerignore(dockerignore_path)
337
+
338
+ # Walk context directory
339
+ for file_path in context_path.rglob("*"):
340
+ if not file_path.is_file():
341
+ continue
342
+
343
+ # Check if file matches .dockerignore patterns
344
+ if dockerignore_spec:
345
+ try:
346
+ relative_path = file_path.relative_to(context_path)
347
+ if dockerignore_spec.match_file(str(relative_path)):
348
+ continue # Skip ignored files
349
+ except ValueError:
350
+ # File not relative to context (shouldn't happen with rglob)
351
+ continue
352
+
353
+ # Check if file changed (early exit)
354
+ try:
355
+ if file_path.stat().st_mtime > last_run_time:
356
+ return True # Found a changed file
357
+ except (OSError, FileNotFoundError):
358
+ # File might have been deleted - consider it changed
359
+ return True
360
+
361
+ return False # No changes found
362
+
363
+
364
+ def extract_from_images(dockerfile_content: str) -> list[tuple[str, str | None]]:
365
+ """Extract image references from FROM lines in Dockerfile.
366
+
367
+ Args:
368
+ dockerfile_content: Content of Dockerfile
369
+
370
+ Returns:
371
+ List of (image_reference, digest) tuples where digest may be None for unpinned images
372
+ Example: [("rust:1.75", None), ("rust", "sha256:abc123...")]
373
+ """
374
+ # Regex pattern to match FROM lines
375
+ # Handles: FROM [--platform=...] image[:tag][@digest] [AS alias]
376
+ from_pattern = re.compile(
377
+ r"^\s*FROM\s+" # FROM keyword
378
+ r"(?:--platform=[^\s]+\s+)?" # Optional platform flag
379
+ r"([^\s@]+)" # Image name (possibly with :tag)
380
+ r"(?:@(sha256:[a-f0-9]+))?" # Optional @digest
381
+ r"(?:\s+AS\s+\w+)?" # Optional AS alias
382
+ r"\s*$",
383
+ re.MULTILINE | re.IGNORECASE,
384
+ )
385
+
386
+ matches = from_pattern.findall(dockerfile_content)
387
+ return [(image, digest if digest else None) for image, digest in matches]
388
+
389
+
390
+ def check_unpinned_images(dockerfile_content: str) -> list[str]:
391
+ """Check for unpinned base images in Dockerfile.
392
+
393
+ Args:
394
+ dockerfile_content: Content of Dockerfile
395
+
396
+ Returns:
397
+ List of unpinned image references (images without @sha256:... digests)
398
+ """
399
+ images = extract_from_images(dockerfile_content)
400
+ return [image for image, digest in images if digest is None]
401
+
402
+
403
+ def parse_base_image_digests(dockerfile_content: str) -> list[str]:
404
+ """Parse pinned base image digests from Dockerfile.
405
+
406
+ Args:
407
+ dockerfile_content: Content of Dockerfile
408
+
409
+ Returns:
410
+ List of digests (e.g., ["sha256:abc123...", "sha256:def456..."])
411
+ """
412
+ images = extract_from_images(dockerfile_content)
413
+ return [digest for _image, digest in images if digest is not None]
tasktree/executor.py CHANGED
@@ -13,6 +13,7 @@ from datetime import datetime
13
13
  from pathlib import Path
14
14
  from typing import Any
15
15
 
16
+ from tasktree import docker as docker_module
16
17
  from tasktree.graph import get_implicit_inputs, resolve_execution_order
17
18
  from tasktree.hasher import hash_args, hash_task, make_cache_key
18
19
  from tasktree.parser import Recipe, Task
@@ -26,7 +27,7 @@ class TaskStatus:
26
27
  task_name: str
27
28
  will_run: bool
28
29
  reason: str # "fresh", "inputs_changed", "definition_changed",
29
- # "never_run", "dependency_triggered", "no_outputs"
30
+ # "never_run", "no_outputs", "outputs_missing", "forced", "environment_changed"
30
31
  changed_files: list[str] = field(default_factory=list)
31
32
  last_run: datetime | None = None
32
33
 
@@ -49,6 +50,7 @@ class Executor:
49
50
  """
50
51
  self.recipe = recipe
51
52
  self.state = state_manager
53
+ self.docker_manager = docker_module.DockerManager(recipe.project_root)
52
54
 
53
55
  def _get_platform_default_environment(self) -> tuple[str, list[str]]:
54
56
  """Get default shell and args for current platform.
@@ -133,7 +135,6 @@ class Executor:
133
135
  self,
134
136
  task: Task,
135
137
  args_dict: dict[str, Any],
136
- dep_statuses: dict[str, TaskStatus],
137
138
  force: bool = False,
138
139
  ) -> TaskStatus:
139
140
  """Check if a task needs to run.
@@ -141,16 +142,16 @@ class Executor:
141
142
  A task executes if ANY of these conditions are met:
142
143
  1. Force flag is set (--force)
143
144
  2. Task definition hash differs from cached state
144
- 3. Any explicit inputs have newer mtime than last_run
145
- 4. Any implicit inputs (from deps) have changed
146
- 5. No cached state exists for this task+args combination
147
- 6. Task has no inputs AND no outputs (always runs)
148
- 7. Different arguments than any cached execution
145
+ 3. Environment definition has changed
146
+ 4. Any explicit inputs have newer mtime than last_run
147
+ 5. Any implicit inputs (from deps) have changed
148
+ 6. No cached state exists for this task+args combination
149
+ 7. Task has no inputs AND no outputs (always runs)
150
+ 8. Different arguments than any cached execution
149
151
 
150
152
  Args:
151
153
  task: Task to check
152
154
  args_dict: Arguments for this task execution
153
- dep_statuses: Status of dependencies
154
155
  force: If True, ignore freshness and force execution
155
156
 
156
157
  Returns:
@@ -179,21 +180,23 @@ class Executor:
179
180
  reason="no_outputs",
180
181
  )
181
182
 
182
- # Check if any dependency triggered
183
- if any(status.will_run for status in dep_statuses.values()):
183
+ # Check cached state
184
+ cached_state = self.state.get(cache_key)
185
+ if cached_state is None:
184
186
  return TaskStatus(
185
187
  task_name=task.name,
186
188
  will_run=True,
187
- reason="dependency_triggered",
189
+ reason="never_run",
188
190
  )
189
191
 
190
- # Check cached state
191
- cached_state = self.state.get(cache_key)
192
- if cached_state is None:
192
+ # Check if environment definition has changed
193
+ env_changed = self._check_environment_changed(task, cached_state, effective_env)
194
+ if env_changed:
193
195
  return TaskStatus(
194
196
  task_name=task.name,
195
197
  will_run=True,
196
- reason="never_run",
198
+ reason="environment_changed",
199
+ last_run=datetime.fromtimestamp(cached_state.last_run),
197
200
  )
198
201
 
199
202
  # Check if inputs have changed
@@ -262,23 +265,19 @@ class Executor:
262
265
  # Execute task and all dependencies
263
266
  execution_order = resolve_execution_order(self.recipe, task_name)
264
267
 
265
- # Check status of all tasks
268
+ # Single phase: Check and execute incrementally
266
269
  statuses: dict[str, TaskStatus] = {}
267
270
  for name in execution_order:
268
271
  task = self.recipe.tasks[name]
269
272
 
270
- # Get status of dependencies
271
- dep_statuses = {dep: statuses[dep] for dep in task.deps if dep in statuses}
272
-
273
273
  # Determine task-specific args (only for target task)
274
274
  task_args = args_dict if name == task_name else {}
275
275
 
276
- status = self.check_task_status(task, task_args, dep_statuses, force=force)
276
+ # Check if task needs to run (based on CURRENT filesystem state)
277
+ status = self.check_task_status(task, task_args, force=force)
277
278
  statuses[name] = status
278
279
 
279
- # Execute tasks that need to run
280
- for name in execution_order:
281
- status = statuses[name]
280
+ # Execute immediately if needed
282
281
  if status.will_run:
283
282
  # Warn if re-running due to missing outputs
284
283
  if status.reason == "outputs_missing":
@@ -288,8 +287,6 @@ class Executor:
288
287
  file=sys.stderr,
289
288
  )
290
289
 
291
- task = self.recipe.tasks[name]
292
- task_args = args_dict if name == task_name else {}
293
290
  self._run_task(task, task_args)
294
291
 
295
292
  return statuses
@@ -310,17 +307,28 @@ class Executor:
310
307
  # Determine working directory
311
308
  working_dir = self.recipe.project_root / task.working_dir
312
309
 
313
- # Resolve environment for this task
314
- shell, shell_args, preamble = self._resolve_environment(task)
310
+ # Check if task uses Docker environment
311
+ env_name = self._get_effective_env_name(task)
312
+ env = None
313
+ if env_name:
314
+ env = self.recipe.get_environment(env_name)
315
315
 
316
316
  # Execute command
317
317
  print(f"Running: {task.name}")
318
318
 
319
- # Detect multi-line commands (ignore trailing newlines from YAML folded blocks)
320
- if "\n" in cmd.rstrip():
321
- self._run_multiline_command(cmd, working_dir, task.name, shell, preamble)
319
+ # Route to Docker execution or regular execution
320
+ if env and env.dockerfile:
321
+ # Docker execution path
322
+ self._run_task_in_docker(task, env, cmd, working_dir)
322
323
  else:
323
- self._run_single_line_command(cmd, working_dir, task.name, shell, shell_args)
324
+ # Regular execution path
325
+ shell, shell_args, preamble = self._resolve_environment(task)
326
+
327
+ # Detect multi-line commands (ignore trailing newlines from YAML folded blocks)
328
+ if "\n" in cmd.rstrip():
329
+ self._run_multiline_command(cmd, working_dir, task.name, shell, preamble)
330
+ else:
331
+ self._run_single_line_command(cmd, working_dir, task.name, shell, shell_args)
324
332
 
325
333
  # Update state
326
334
  self._update_state(task, args_dict)
@@ -421,6 +429,36 @@ class Executor:
421
429
  except OSError:
422
430
  pass # Ignore cleanup errors
423
431
 
432
+ def _run_task_in_docker(
433
+ self, task: Task, env: Any, cmd: str, working_dir: Path
434
+ ) -> None:
435
+ """Execute task inside Docker container.
436
+
437
+ Args:
438
+ task: Task to execute
439
+ env: Docker environment configuration
440
+ cmd: Command to execute
441
+ working_dir: Host working directory
442
+
443
+ Raises:
444
+ ExecutionError: If Docker execution fails
445
+ """
446
+ # Resolve container working directory
447
+ container_working_dir = docker_module.resolve_container_working_dir(
448
+ env.working_dir, task.working_dir
449
+ )
450
+
451
+ # Execute in container
452
+ try:
453
+ self.docker_manager.run_in_container(
454
+ env=env,
455
+ cmd=cmd,
456
+ working_dir=working_dir,
457
+ container_working_dir=container_working_dir,
458
+ )
459
+ except docker_module.DockerError as e:
460
+ raise ExecutionError(str(e)) from e
461
+
424
462
  def _substitute_args(self, cmd: str, args_dict: dict[str, Any]) -> str:
425
463
  """Substitute arguments in command string.
426
464
 
@@ -451,11 +489,100 @@ class Executor:
451
489
  all_inputs.extend(implicit_inputs)
452
490
  return all_inputs
453
491
 
492
+ def _check_environment_changed(
493
+ self, task: Task, cached_state: TaskState, env_name: str
494
+ ) -> bool:
495
+ """Check if environment definition has changed since last run.
496
+
497
+ For shell environments: checks YAML definition hash
498
+ For Docker environments: checks YAML hash AND Docker image ID
499
+
500
+ Args:
501
+ task: Task to check
502
+ cached_state: Cached state from previous run
503
+ env_name: Effective environment name (from _get_effective_env_name)
504
+
505
+ Returns:
506
+ True if environment definition changed, False otherwise
507
+ """
508
+ # If using platform default (no environment), no definition to track
509
+ if not env_name:
510
+ return False
511
+
512
+ # Get environment definition
513
+ env = self.recipe.get_environment(env_name)
514
+ if env is None:
515
+ # Environment was deleted - treat as changed
516
+ return True
517
+
518
+ # Compute current environment hash (YAML definition)
519
+ from tasktree.hasher import hash_environment_definition
520
+
521
+ current_env_hash = hash_environment_definition(env)
522
+
523
+ # Get cached environment hash
524
+ marker_key = f"_env_hash_{env_name}"
525
+ cached_env_hash = cached_state.input_state.get(marker_key)
526
+
527
+ # If no cached hash (old state file), treat as changed to establish baseline
528
+ if cached_env_hash is None:
529
+ return True
530
+
531
+ # Check if YAML definition changed
532
+ if current_env_hash != cached_env_hash:
533
+ return True # YAML changed, no need to check image
534
+
535
+ # For Docker environments, also check if image ID changed
536
+ if env.dockerfile:
537
+ return self._check_docker_image_changed(env, cached_state, env_name)
538
+
539
+ # Shell environment with unchanged hash
540
+ return False
541
+
542
+ def _check_docker_image_changed(
543
+ self, env: Environment, cached_state: TaskState, env_name: str
544
+ ) -> bool:
545
+ """Check if Docker image ID has changed.
546
+
547
+ Builds the image and compares the resulting image ID with the cached ID.
548
+ This detects changes from unpinned base images, network-dependent builds, etc.
549
+
550
+ Args:
551
+ env: Docker environment definition
552
+ cached_state: Cached state from previous run
553
+ env_name: Environment name
554
+
555
+ Returns:
556
+ True if image ID changed, False otherwise
557
+ """
558
+ # Build/ensure image is built and get its ID
559
+ try:
560
+ image_tag, current_image_id = self.docker_manager.ensure_image_built(env)
561
+ except Exception as e:
562
+ # If we can't build, treat as changed (will fail later with better error)
563
+ return True
564
+
565
+ # Get cached image ID
566
+ image_id_key = f"_docker_image_id_{env_name}"
567
+ cached_image_id = cached_state.input_state.get(image_id_key)
568
+
569
+ # If no cached ID (first run or old state), treat as changed
570
+ if cached_image_id is None:
571
+ return True
572
+
573
+ # Compare image IDs
574
+ return current_image_id != cached_image_id
575
+
454
576
  def _check_inputs_changed(
455
577
  self, task: Task, cached_state: TaskState, all_inputs: list[str]
456
578
  ) -> list[str]:
457
579
  """Check if any input files have changed since last run.
458
580
 
581
+ Handles both regular file inputs and Docker-specific inputs:
582
+ - Regular files: checked via mtime
583
+ - Docker context: checked via directory walk with early exit
584
+ - Dockerfile digests: checked via parsing and comparison
585
+
459
586
  Args:
460
587
  task: Task to check
461
588
  cached_state: Cached state from previous run
@@ -469,7 +596,66 @@ class Executor:
469
596
  # Expand glob patterns
470
597
  input_files = self._expand_globs(all_inputs, task.working_dir)
471
598
 
599
+ # Check if task uses Docker environment
600
+ env_name = self._get_effective_env_name(task)
601
+ docker_env = None
602
+ if env_name:
603
+ docker_env = self.recipe.get_environment(env_name)
604
+ if docker_env and not docker_env.dockerfile:
605
+ docker_env = None # Not a Docker environment
606
+
472
607
  for file_path in input_files:
608
+ # Handle Docker context directory check
609
+ if file_path.startswith("_docker_context_"):
610
+ if docker_env:
611
+ context_name = file_path.replace("_docker_context_", "")
612
+ context_path = self.recipe.project_root / context_name
613
+ dockerignore_path = context_path / ".dockerignore"
614
+
615
+ # Get last context check time
616
+ cached_context_time = cached_state.input_state.get(
617
+ f"_context_{context_name}"
618
+ )
619
+ if cached_context_time is None:
620
+ # Never checked before - consider changed
621
+ changed_files.append(f"Docker context: {context_name}")
622
+ continue
623
+
624
+ # Check if context changed (with early exit optimization)
625
+ if docker_module.context_changed_since(
626
+ context_path, dockerignore_path, cached_context_time
627
+ ):
628
+ changed_files.append(f"Docker context: {context_name}")
629
+ continue
630
+
631
+ # Handle Docker Dockerfile digest check
632
+ if file_path.startswith("_docker_dockerfile_"):
633
+ if docker_env:
634
+ dockerfile_name = file_path.replace("_docker_dockerfile_", "")
635
+ dockerfile_path = self.recipe.project_root / dockerfile_name
636
+
637
+ try:
638
+ dockerfile_content = dockerfile_path.read_text()
639
+ current_digests = set(
640
+ docker_module.parse_base_image_digests(dockerfile_content)
641
+ )
642
+
643
+ # Get cached digests
644
+ cached_digests = set()
645
+ for key in cached_state.input_state:
646
+ if key.startswith("_digest_"):
647
+ digest = key.replace("_digest_", "")
648
+ cached_digests.add(digest)
649
+
650
+ # Check if digests changed
651
+ if current_digests != cached_digests:
652
+ changed_files.append(f"Docker base image digests in {dockerfile_name}")
653
+ except (OSError, IOError):
654
+ # Can't read Dockerfile - consider changed
655
+ changed_files.append(f"Dockerfile: {dockerfile_name}")
656
+ continue
657
+
658
+ # Regular file check
473
659
  file_path_obj = self.recipe.project_root / task.working_dir / file_path
474
660
  if not file_path_obj.exists():
475
661
  continue
@@ -549,10 +735,61 @@ class Executor:
549
735
 
550
736
  input_state = {}
551
737
  for file_path in input_files:
738
+ # Skip Docker special markers (handled separately below)
739
+ if file_path.startswith("_docker_"):
740
+ continue
741
+
552
742
  file_path_obj = self.recipe.project_root / task.working_dir / file_path
553
743
  if file_path_obj.exists():
554
744
  input_state[file_path] = file_path_obj.stat().st_mtime
555
745
 
746
+ # Record Docker-specific inputs if task uses Docker environment
747
+ env_name = self._get_effective_env_name(task)
748
+ if env_name:
749
+ env = self.recipe.get_environment(env_name)
750
+ if env and env.dockerfile:
751
+ # Record Dockerfile mtime
752
+ dockerfile_path = self.recipe.project_root / env.dockerfile
753
+ if dockerfile_path.exists():
754
+ input_state[env.dockerfile] = dockerfile_path.stat().st_mtime
755
+
756
+ # Record .dockerignore mtime if exists
757
+ context_path = self.recipe.project_root / env.context
758
+ dockerignore_path = context_path / ".dockerignore"
759
+ if dockerignore_path.exists():
760
+ relative_dockerignore = str(
761
+ dockerignore_path.relative_to(self.recipe.project_root)
762
+ )
763
+ input_state[relative_dockerignore] = dockerignore_path.stat().st_mtime
764
+
765
+ # Record context check timestamp
766
+ input_state[f"_context_{env.context}"] = time.time()
767
+
768
+ # Parse and record base image digests from Dockerfile
769
+ try:
770
+ dockerfile_content = dockerfile_path.read_text()
771
+ digests = docker_module.parse_base_image_digests(dockerfile_content)
772
+ for digest in digests:
773
+ # Store digest with Dockerfile's mtime
774
+ input_state[f"_digest_{digest}"] = dockerfile_path.stat().st_mtime
775
+ except (OSError, IOError):
776
+ # If we can't read Dockerfile, skip digest tracking
777
+ pass
778
+
779
+ # Record environment definition hash for all environments (shell and Docker)
780
+ if env:
781
+ from tasktree.hasher import hash_environment_definition
782
+
783
+ env_hash = hash_environment_definition(env)
784
+ input_state[f"_env_hash_{env_name}"] = env_hash
785
+
786
+ # For Docker environments, also store the image ID
787
+ if env.dockerfile:
788
+ # Image was already built during check phase or task execution
789
+ if env_name in self.docker_manager._built_images:
790
+ image_tag, image_id = self.docker_manager._built_images[env_name]
791
+ input_state[f"_docker_image_id_{env_name}"] = image_id
792
+
556
793
  # Create new state
557
794
  state = TaskState(
558
795
  last_run=time.time(),
tasktree/graph.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """Dependency resolution using topological sorting."""
2
2
 
3
3
  from graphlib import TopologicalSorter
4
+ from pathlib import Path
4
5
 
5
6
  from tasktree.parser import Recipe, Task
6
7
 
@@ -71,16 +72,21 @@ def get_implicit_inputs(recipe: Recipe, task: Task) -> list[str]:
71
72
  Tasks automatically inherit inputs from dependencies:
72
73
  1. All outputs from dependency tasks become implicit inputs
73
74
  2. All inputs from dependency tasks that don't declare outputs are inherited
75
+ 3. If task uses a Docker environment, Docker artifacts become implicit inputs:
76
+ - Dockerfile
77
+ - .dockerignore (if present)
78
+ - Special markers for context directory and base image digests
74
79
 
75
80
  Args:
76
81
  recipe: Parsed recipe containing all tasks
77
82
  task: Task to get implicit inputs for
78
83
 
79
84
  Returns:
80
- List of glob patterns for implicit inputs
85
+ List of glob patterns for implicit inputs, including Docker-specific markers
81
86
  """
82
87
  implicit_inputs = []
83
88
 
89
+ # Inherit from dependencies
84
90
  for dep_name in task.deps:
85
91
  dep_task = recipe.tasks.get(dep_name)
86
92
  if dep_task is None:
@@ -93,6 +99,29 @@ def get_implicit_inputs(recipe: Recipe, task: Task) -> list[str]:
93
99
  elif dep_task.inputs:
94
100
  implicit_inputs.extend(dep_task.inputs)
95
101
 
102
+ # Add Docker-specific implicit inputs if task uses Docker environment
103
+ env_name = task.env or recipe.default_env
104
+ if env_name:
105
+ env = recipe.get_environment(env_name)
106
+ if env and env.dockerfile:
107
+ # Add Dockerfile as input
108
+ implicit_inputs.append(env.dockerfile)
109
+
110
+ # Add .dockerignore if it exists in context directory
111
+ context_path = recipe.project_root / env.context
112
+ dockerignore_path = context_path / ".dockerignore"
113
+ if dockerignore_path.exists():
114
+ relative_dockerignore = str(
115
+ dockerignore_path.relative_to(recipe.project_root)
116
+ )
117
+ implicit_inputs.append(relative_dockerignore)
118
+
119
+ # Add special markers for context directory and digest tracking
120
+ # These are tracked differently in state management (not file paths)
121
+ # The executor will handle these specially
122
+ implicit_inputs.append(f"_docker_context_{env.context}")
123
+ implicit_inputs.append(f"_docker_dockerfile_{env.dockerfile}")
124
+
96
125
  return implicit_inputs
97
126
 
98
127
 
tasktree/hasher.py CHANGED
@@ -21,6 +21,33 @@ def hash_args(args_dict: dict[str, Any]) -> str:
21
21
  return hashlib.sha256(serialized.encode()).hexdigest()[:8]
22
22
 
23
23
 
24
+ def hash_environment_definition(env) -> str:
25
+ """Hash environment definition fields that affect task execution.
26
+
27
+ Args:
28
+ env: Environment to hash
29
+
30
+ Returns:
31
+ 16-character hash of environment definition
32
+ """
33
+ # Import inside function to avoid circular dependency
34
+ from tasktree.parser import Environment
35
+
36
+ data = {
37
+ "shell": env.shell,
38
+ "args": sorted(env.args), # Sort for determinism
39
+ "preamble": env.preamble,
40
+ "dockerfile": env.dockerfile,
41
+ "context": env.context,
42
+ "volumes": sorted(env.volumes),
43
+ "ports": sorted(env.ports),
44
+ "env_vars": dict(sorted(env.env_vars.items())),
45
+ "working_dir": env.working_dir,
46
+ }
47
+ serialized = json.dumps(data, sort_keys=True, separators=(",", ":"))
48
+ return hashlib.sha256(serialized.encode()).hexdigest()[:16]
49
+
50
+
24
51
  def make_cache_key(task_hash: str, args_hash: Optional[str] = None) -> str:
25
52
  if args_hash:
26
53
  return f"{task_hash}__{args_hash}"
tasktree/parser.py CHANGED
@@ -16,12 +16,24 @@ class CircularImportError(Exception):
16
16
 
17
17
  @dataclass
18
18
  class Environment:
19
- """Represents an execution environment configuration."""
19
+ """Represents an execution environment configuration.
20
+
21
+ Can be either a shell environment or a Docker environment:
22
+ - Shell environment: has 'shell' field, executes directly on host
23
+ - Docker environment: has 'dockerfile' field, executes in container
24
+ """
20
25
 
21
26
  name: str
22
- shell: str
27
+ shell: str = "" # Path to shell (required for shell envs, optional for Docker)
23
28
  args: list[str] = field(default_factory=list)
24
29
  preamble: str = ""
30
+ # Docker-specific fields (presence of dockerfile indicates Docker environment)
31
+ dockerfile: str = "" # Path to Dockerfile
32
+ context: str = "" # Path to build context directory
33
+ volumes: list[str] = field(default_factory=list) # Volume mounts
34
+ ports: list[str] = field(default_factory=list) # Port mappings
35
+ env_vars: dict[str, str] = field(default_factory=dict) # Environment variables
36
+ working_dir: str = "" # Working directory (container or host)
25
37
 
26
38
  def __post_init__(self):
27
39
  """Ensure args is always a list."""
@@ -201,18 +213,71 @@ def _parse_file_with_env(
201
213
  f"Environment '{env_name}' must be a dictionary"
202
214
  )
203
215
 
204
- # Parse environment configuration
216
+ # Parse common environment configuration
205
217
  shell = env_config.get("shell", "")
206
- if not shell:
218
+ args = env_config.get("args", [])
219
+ preamble = env_config.get("preamble", "")
220
+ working_dir = env_config.get("working_dir", "")
221
+
222
+ # Parse Docker-specific fields
223
+ dockerfile = env_config.get("dockerfile", "")
224
+ context = env_config.get("context", "")
225
+ volumes = env_config.get("volumes", [])
226
+ ports = env_config.get("ports", [])
227
+ env_vars = env_config.get("env_vars", {})
228
+
229
+ # Validate environment type
230
+ if not shell and not dockerfile:
207
231
  raise ValueError(
208
- f"Environment '{env_name}' must specify 'shell'"
232
+ f"Environment '{env_name}' must specify either 'shell' "
233
+ f"(for shell environments) or 'dockerfile' (for Docker environments)"
209
234
  )
210
235
 
211
- args = env_config.get("args", [])
212
- preamble = env_config.get("preamble", "")
236
+ # Validate Docker environment requirements
237
+ if dockerfile and not context:
238
+ raise ValueError(
239
+ f"Docker environment '{env_name}' must specify 'context' "
240
+ f"when 'dockerfile' is specified"
241
+ )
242
+
243
+ # Validate that Dockerfile exists if specified
244
+ if dockerfile:
245
+ dockerfile_path = project_root / dockerfile
246
+ if not dockerfile_path.exists():
247
+ raise ValueError(
248
+ f"Environment '{env_name}': Dockerfile not found at {dockerfile_path}"
249
+ )
250
+
251
+ # Validate that context directory exists if specified
252
+ if context:
253
+ context_path = project_root / context
254
+ if not context_path.exists():
255
+ raise ValueError(
256
+ f"Environment '{env_name}': context directory not found at {context_path}"
257
+ )
258
+ if not context_path.is_dir():
259
+ raise ValueError(
260
+ f"Environment '{env_name}': context must be a directory, got {context_path}"
261
+ )
262
+
263
+ # Validate environment name (must be valid Docker tag)
264
+ if not env_name.replace("-", "").replace("_", "").isalnum():
265
+ raise ValueError(
266
+ f"Environment name '{env_name}' must be alphanumeric "
267
+ f"(with optional hyphens and underscores)"
268
+ )
213
269
 
214
270
  environments[env_name] = Environment(
215
- name=env_name, shell=shell, args=args, preamble=preamble
271
+ name=env_name,
272
+ shell=shell,
273
+ args=args,
274
+ preamble=preamble,
275
+ dockerfile=dockerfile,
276
+ context=context,
277
+ volumes=volumes,
278
+ ports=ports,
279
+ env_vars=env_vars,
280
+ working_dir=working_dir,
216
281
  )
217
282
 
218
283
  return tasks, environments, default_env
@@ -425,6 +490,7 @@ def _parse_file(
425
490
  working_dir=working_dir,
426
491
  args=task_data.get("args", []),
427
492
  source_file=str(file_path),
493
+ env=task_data.get("env", ""),
428
494
  )
429
495
 
430
496
  tasks[full_name] = task
tasktree/state.py CHANGED
@@ -13,7 +13,7 @@ class TaskState:
13
13
  """State for a single task execution."""
14
14
 
15
15
  last_run: float
16
- input_state: dict[str, float] = field(default_factory=dict)
16
+ input_state: dict[str, float | str] = field(default_factory=dict)
17
17
 
18
18
  def to_dict(self) -> dict[str, Any]:
19
19
  """Convert to dictionary for JSON serialization."""
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tasktree
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: A task automation tool with incremental execution
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: click>=8.1.0
7
7
  Requires-Dist: colorama>=0.4.6
8
+ Requires-Dist: pathspec>=0.11.0
8
9
  Requires-Dist: pyyaml>=6.0
9
10
  Requires-Dist: rich>=13.0.0
10
11
  Requires-Dist: typer>=0.9.0
@@ -108,52 +109,6 @@ Boom! Done. `build` will always run, because there's no sensible way to know wha
108
109
 
109
110
  This is a toy example, but you can image how it plays out on a more complex project.
110
111
 
111
- ## Migrating from v1.x to v2.0
112
-
113
- Version 2.0 requires all task definitions to be under a top-level `tasks:` key.
114
-
115
- ### Quick Migration
116
-
117
- Wrap your existing tasks in a `tasks:` block:
118
-
119
- ```yaml
120
- # Before (v1.x)
121
- build:
122
- cmd: cargo build
123
-
124
- # After (v2.0)
125
- tasks:
126
- build:
127
- cmd: cargo build
128
- ```
129
-
130
- ### Why This Change?
131
-
132
- 1. **Clearer structure**: Explicit separation of tasks from configuration
133
- 2. **No naming conflicts**: You can now create tasks named "imports" or "environments"
134
- 3. **Better error messages**: More helpful validation errors
135
- 4. **Consistency**: All recipe files use the same format
136
-
137
- ### Error Messages
138
-
139
- If you forget to update, you'll see a clear error:
140
-
141
- ```
142
- Invalid recipe format in tasktree.yaml
143
-
144
- Task definitions must be under a top-level "tasks:" key.
145
-
146
- Found these keys at root level: build, test
147
-
148
- Did you mean:
149
-
150
- tasks:
151
- build:
152
- cmd: ...
153
- test:
154
- cmd: ...
155
- ```
156
-
157
112
  ## Installation
158
113
 
159
114
  ### From PyPI (Recommended)
@@ -0,0 +1,14 @@
1
+ tasktree/__init__.py,sha256=MVmdvKb3JdqLlo0x2_TPGMfgFC0HsDnP79HAzGnFnjI,1081
2
+ tasktree/cli.py,sha256=0xusNitT1AtLgR3guUsupnHSXJ0_C749Dx7dfYCENJA,15233
3
+ tasktree/docker.py,sha256=duIT5HkGBvLNOPdbgdXuqqUSwJgdWsb4Sxv_HVm8hzA,13118
4
+ tasktree/executor.py,sha256=8xNKPYkekhaGd_gCO7PT7E-n0JeHVtDgIbxACuvPUzU,28707
5
+ tasktree/graph.py,sha256=lA3ExNM_ag0AlC6iW20unseCjRg5wCZXbmXs2M6TnQw,5578
6
+ tasktree/hasher.py,sha256=dCyakihE4rHoOVCbt8hgTQZVuez3P1V0SrWUl-aM2Tw,1670
7
+ tasktree/parser.py,sha256=gkbzlTOwudJsU5gvgSCpVVY2GoYZlo_kLBVsIRbeZiU,19076
8
+ tasktree/state.py,sha256=Cktl4D8iDZVd55aO2LqVyPrc-BnljkesxxkcMcdcfOY,3541
9
+ tasktree/tasks.py,sha256=2QdQZtJAX2rSGbyXKG1z9VF_siz1DUzdvzCgPkykxtU,173
10
+ tasktree/types.py,sha256=w--sKjRTc8mGYkU5eAduqV86SolDqOYspAPuVKIuSQQ,3797
11
+ tasktree-0.0.7.dist-info/METADATA,sha256=YthtlFiUCaHTgO8n5jUsiJQ3py4pQ23_Zh3Ycshi4fk,17439
12
+ tasktree-0.0.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
13
+ tasktree-0.0.7.dist-info/entry_points.txt,sha256=lQINlvRYnimvteBbnhH84A9clTg8NnpEjCWqWkqg8KE,40
14
+ tasktree-0.0.7.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- tasktree/__init__.py,sha256=MVmdvKb3JdqLlo0x2_TPGMfgFC0HsDnP79HAzGnFnjI,1081
2
- tasktree/cli.py,sha256=0xusNitT1AtLgR3guUsupnHSXJ0_C749Dx7dfYCENJA,15233
3
- tasktree/executor.py,sha256=_E37tShHuiOj0Mvx2GbS9y3GIozC3hpzAVhAjbvYJqg,18638
4
- tasktree/graph.py,sha256=9ngfg93y7EkOIN_lUQa0u-JhnwiMN1UdQQvIFw8RYCE,4181
5
- tasktree/hasher.py,sha256=puJey9wF_p37k_xqjhYr_6ICsbAfrTBWHec6MqKV4BU,814
6
- tasktree/parser.py,sha256=SzWn-V4KMgjxNZrN0ERApb5dd39LPJTfkA2Ih2nYWcs,15580
7
- tasktree/state.py,sha256=rxKtS3SbsPtAuraHbN807RGWfoYYkQ3pe8CxUstwo2k,3535
8
- tasktree/tasks.py,sha256=2QdQZtJAX2rSGbyXKG1z9VF_siz1DUzdvzCgPkykxtU,173
9
- tasktree/types.py,sha256=w--sKjRTc8mGYkU5eAduqV86SolDqOYspAPuVKIuSQQ,3797
10
- tasktree-0.0.6.dist-info/METADATA,sha256=GQZHcFVOIXfWmgPbb9pAt7zB0iH22hyGToc0YEwUv34,18287
11
- tasktree-0.0.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
12
- tasktree-0.0.6.dist-info/entry_points.txt,sha256=lQINlvRYnimvteBbnhH84A9clTg8NnpEjCWqWkqg8KE,40
13
- tasktree-0.0.6.dist-info/RECORD,,