tasktree 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tasktree/executor.py ADDED
@@ -0,0 +1,365 @@
1
+ """Task execution and staleness detection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import subprocess
6
+ import time
7
+ from dataclasses import dataclass, field
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from tasktree.graph import get_implicit_inputs, resolve_execution_order
13
+ from tasktree.hasher import hash_args, hash_task, make_cache_key
14
+ from tasktree.parser import Recipe, Task
15
+ from tasktree.state import StateManager, TaskState
16
+
17
+
18
+ @dataclass
19
+ class TaskStatus:
20
+ """Status of a task for execution planning."""
21
+
22
+ task_name: str
23
+ will_run: bool
24
+ reason: str # "fresh", "inputs_changed", "definition_changed",
25
+ # "never_run", "dependency_triggered", "no_outputs"
26
+ changed_files: list[str] = field(default_factory=list)
27
+ last_run: datetime | None = None
28
+
29
+
30
+ class ExecutionError(Exception):
31
+ """Raised when task execution fails."""
32
+
33
+ pass
34
+
35
+
36
+ class Executor:
37
+ """Executes tasks with incremental execution logic."""
38
+
39
+ def __init__(self, recipe: Recipe, state_manager: StateManager):
40
+ """Initialize executor.
41
+
42
+ Args:
43
+ recipe: Parsed recipe containing all tasks
44
+ state_manager: State manager for tracking task execution
45
+ """
46
+ self.recipe = recipe
47
+ self.state = state_manager
48
+
49
+ def check_task_status(
50
+ self,
51
+ task: Task,
52
+ args_dict: dict[str, Any],
53
+ dep_statuses: dict[str, TaskStatus],
54
+ ) -> TaskStatus:
55
+ """Check if a task needs to run.
56
+
57
+ A task executes if ANY of these conditions are met:
58
+ 1. Task definition hash differs from cached state
59
+ 2. Any explicit inputs have newer mtime than last_run
60
+ 3. Any implicit inputs (from deps) have changed
61
+ 4. No cached state exists for this task+args combination
62
+ 5. Task has no inputs AND no outputs (always runs)
63
+ 6. Different arguments than any cached execution
64
+
65
+ Args:
66
+ task: Task to check
67
+ args_dict: Arguments for this task execution
68
+ dep_statuses: Status of dependencies
69
+
70
+ Returns:
71
+ TaskStatus indicating whether task will run and why
72
+ """
73
+ # Compute hashes
74
+ task_hash = hash_task(task.cmd, task.outputs, task.working_dir, task.args)
75
+ args_hash = hash_args(args_dict) if args_dict else None
76
+ cache_key = make_cache_key(task_hash, args_hash)
77
+
78
+ # Check if task has no inputs and no outputs (always runs)
79
+ all_inputs = self._get_all_inputs(task)
80
+ if not all_inputs and not task.outputs:
81
+ return TaskStatus(
82
+ task_name=task.name,
83
+ will_run=True,
84
+ reason="no_outputs",
85
+ )
86
+
87
+ # Check if any dependency triggered
88
+ if any(status.will_run for status in dep_statuses.values()):
89
+ return TaskStatus(
90
+ task_name=task.name,
91
+ will_run=True,
92
+ reason="dependency_triggered",
93
+ )
94
+
95
+ # Check cached state
96
+ cached_state = self.state.get(cache_key)
97
+ if cached_state is None:
98
+ return TaskStatus(
99
+ task_name=task.name,
100
+ will_run=True,
101
+ reason="never_run",
102
+ )
103
+
104
+ # Check if inputs have changed
105
+ changed_files = self._check_inputs_changed(task, cached_state, all_inputs)
106
+ if changed_files:
107
+ return TaskStatus(
108
+ task_name=task.name,
109
+ will_run=True,
110
+ reason="inputs_changed",
111
+ changed_files=changed_files,
112
+ last_run=datetime.fromtimestamp(cached_state.last_run),
113
+ )
114
+
115
+ # Check if declared outputs are missing
116
+ missing_outputs = self._check_outputs_missing(task)
117
+ if missing_outputs:
118
+ return TaskStatus(
119
+ task_name=task.name,
120
+ will_run=True,
121
+ reason="outputs_missing",
122
+ changed_files=missing_outputs,
123
+ last_run=datetime.fromtimestamp(cached_state.last_run),
124
+ )
125
+
126
+ # Task is fresh
127
+ return TaskStatus(
128
+ task_name=task.name,
129
+ will_run=False,
130
+ reason="fresh",
131
+ last_run=datetime.fromtimestamp(cached_state.last_run),
132
+ )
133
+
134
+ def execute_task(
135
+ self,
136
+ task_name: str,
137
+ args_dict: dict[str, Any] | None = None,
138
+ dry_run: bool = False,
139
+ ) -> dict[str, TaskStatus]:
140
+ """Execute a task and its dependencies.
141
+
142
+ Args:
143
+ task_name: Name of task to execute
144
+ args_dict: Arguments to pass to the task
145
+ dry_run: If True, only check what would run without executing
146
+
147
+ Returns:
148
+ Dictionary of task names to their execution status
149
+
150
+ Raises:
151
+ ExecutionError: If task execution fails
152
+ """
153
+ if args_dict is None:
154
+ args_dict = {}
155
+
156
+ # Resolve execution order
157
+ execution_order = resolve_execution_order(self.recipe, task_name)
158
+
159
+ # Check status of all tasks
160
+ statuses: dict[str, TaskStatus] = {}
161
+ for name in execution_order:
162
+ task = self.recipe.tasks[name]
163
+
164
+ # Get status of dependencies
165
+ dep_statuses = {dep: statuses[dep] for dep in task.deps if dep in statuses}
166
+
167
+ # Determine task-specific args (only for target task)
168
+ task_args = args_dict if name == task_name else {}
169
+
170
+ status = self.check_task_status(task, task_args, dep_statuses)
171
+ statuses[name] = status
172
+
173
+ if dry_run:
174
+ return statuses
175
+
176
+ # Execute tasks that need to run
177
+ for name in execution_order:
178
+ status = statuses[name]
179
+ if status.will_run:
180
+ # Warn if re-running due to missing outputs
181
+ if status.reason == "outputs_missing":
182
+ import sys
183
+ print(
184
+ f"Warning: Re-running task '{name}' because declared outputs are missing",
185
+ file=sys.stderr,
186
+ )
187
+
188
+ task = self.recipe.tasks[name]
189
+ task_args = args_dict if name == task_name else {}
190
+ self._run_task(task, task_args)
191
+
192
+ return statuses
193
+
194
+ def _run_task(self, task: Task, args_dict: dict[str, Any]) -> None:
195
+ """Execute a single task.
196
+
197
+ Args:
198
+ task: Task to execute
199
+ args_dict: Arguments to substitute in command
200
+
201
+ Raises:
202
+ ExecutionError: If task execution fails
203
+ """
204
+ # Substitute arguments in command
205
+ cmd = self._substitute_args(task.cmd, args_dict)
206
+
207
+ # Determine working directory
208
+ working_dir = self.recipe.project_root / task.working_dir
209
+
210
+ # Execute command
211
+ print(f"Running: {task.name}")
212
+ try:
213
+ result = subprocess.run(
214
+ cmd,
215
+ shell=True,
216
+ cwd=working_dir,
217
+ check=True,
218
+ capture_output=False,
219
+ )
220
+ except subprocess.CalledProcessError as e:
221
+ raise ExecutionError(f"Task '{task.name}' failed with exit code {e.returncode}")
222
+
223
+ # Update state
224
+ self._update_state(task, args_dict)
225
+
226
+ def _substitute_args(self, cmd: str, args_dict: dict[str, Any]) -> str:
227
+ """Substitute arguments in command string.
228
+
229
+ Args:
230
+ cmd: Command template with {{arg}} placeholders
231
+ args_dict: Arguments to substitute
232
+
233
+ Returns:
234
+ Command with arguments substituted
235
+ """
236
+ result = cmd
237
+ for key, value in args_dict.items():
238
+ placeholder = f"{{{{{key}}}}}"
239
+ result = result.replace(placeholder, str(value))
240
+ return result
241
+
242
+ def _get_all_inputs(self, task: Task) -> list[str]:
243
+ """Get all inputs for a task (explicit + implicit from dependencies).
244
+
245
+ Args:
246
+ task: Task to get inputs for
247
+
248
+ Returns:
249
+ List of input glob patterns
250
+ """
251
+ all_inputs = list(task.inputs)
252
+ implicit_inputs = get_implicit_inputs(self.recipe, task)
253
+ all_inputs.extend(implicit_inputs)
254
+ return all_inputs
255
+
256
+ def _check_inputs_changed(
257
+ self, task: Task, cached_state: TaskState, all_inputs: list[str]
258
+ ) -> list[str]:
259
+ """Check if any input files have changed since last run.
260
+
261
+ Args:
262
+ task: Task to check
263
+ cached_state: Cached state from previous run
264
+ all_inputs: All input glob patterns
265
+
266
+ Returns:
267
+ List of changed file paths
268
+ """
269
+ changed_files = []
270
+
271
+ # Expand glob patterns
272
+ input_files = self._expand_globs(all_inputs, task.working_dir)
273
+
274
+ for file_path in input_files:
275
+ file_path_obj = self.recipe.project_root / task.working_dir / file_path
276
+ if not file_path_obj.exists():
277
+ continue
278
+
279
+ current_mtime = file_path_obj.stat().st_mtime
280
+
281
+ # Check if file is in cached state
282
+ cached_mtime = cached_state.input_state.get(file_path)
283
+ if cached_mtime is None or current_mtime > cached_mtime:
284
+ changed_files.append(file_path)
285
+
286
+ return changed_files
287
+
288
+ def _check_outputs_missing(self, task: Task) -> list[str]:
289
+ """Check if any declared outputs are missing.
290
+
291
+ Args:
292
+ task: Task to check
293
+
294
+ Returns:
295
+ List of output patterns that have no matching files
296
+ """
297
+ if not task.outputs:
298
+ return []
299
+
300
+ missing_patterns = []
301
+ base_path = self.recipe.project_root / task.working_dir
302
+
303
+ for pattern in task.outputs:
304
+ # Check if pattern has any matches
305
+ matches = list(base_path.glob(pattern))
306
+ if not matches:
307
+ missing_patterns.append(pattern)
308
+
309
+ return missing_patterns
310
+
311
+ def _expand_globs(self, patterns: list[str], working_dir: str) -> list[str]:
312
+ """Expand glob patterns to actual file paths.
313
+
314
+ Args:
315
+ patterns: List of glob patterns
316
+ working_dir: Working directory to resolve patterns from
317
+
318
+ Returns:
319
+ List of file paths (relative to working_dir)
320
+ """
321
+ files = []
322
+ base_path = self.recipe.project_root / working_dir
323
+
324
+ for pattern in patterns:
325
+ # Use pathlib's glob
326
+ matches = base_path.glob(pattern)
327
+ for match in matches:
328
+ if match.is_file():
329
+ # Make relative to working_dir
330
+ rel_path = match.relative_to(base_path)
331
+ files.append(str(rel_path))
332
+
333
+ return files
334
+
335
+ def _update_state(self, task: Task, args_dict: dict[str, Any]) -> None:
336
+ """Update state after task execution.
337
+
338
+ Args:
339
+ task: Task that was executed
340
+ args_dict: Arguments used for execution
341
+ """
342
+ # Compute hashes
343
+ task_hash = hash_task(task.cmd, task.outputs, task.working_dir, task.args)
344
+ args_hash = hash_args(args_dict) if args_dict else None
345
+ cache_key = make_cache_key(task_hash, args_hash)
346
+
347
+ # Get all inputs and their current mtimes
348
+ all_inputs = self._get_all_inputs(task)
349
+ input_files = self._expand_globs(all_inputs, task.working_dir)
350
+
351
+ input_state = {}
352
+ for file_path in input_files:
353
+ file_path_obj = self.recipe.project_root / task.working_dir / file_path
354
+ if file_path_obj.exists():
355
+ input_state[file_path] = file_path_obj.stat().st_mtime
356
+
357
+ # Create new state
358
+ state = TaskState(
359
+ last_run=time.time(),
360
+ input_state=input_state,
361
+ )
362
+
363
+ # Save state
364
+ self.state.set(cache_key, state)
365
+ self.state.save()
tasktree/graph.py ADDED
@@ -0,0 +1,139 @@
1
+ """Dependency resolution using topological sorting."""
2
+
3
+ from graphlib import TopologicalSorter
4
+
5
+ from tasktree.parser import Recipe, Task
6
+
7
+
8
+ class CycleError(Exception):
9
+ """Raised when a dependency cycle is detected."""
10
+
11
+ pass
12
+
13
+
14
+ class TaskNotFoundError(Exception):
15
+ """Raised when a task dependency doesn't exist."""
16
+
17
+ pass
18
+
19
+
20
+ def resolve_execution_order(recipe: Recipe, target_task: str) -> list[str]:
21
+ """Resolve execution order for a task and its dependencies.
22
+
23
+ Args:
24
+ recipe: Parsed recipe containing all tasks
25
+ target_task: Name of the task to execute
26
+
27
+ Returns:
28
+ List of task names in execution order (dependencies first)
29
+
30
+ Raises:
31
+ TaskNotFoundError: If target task or any dependency doesn't exist
32
+ CycleError: If a dependency cycle is detected
33
+ """
34
+ if target_task not in recipe.tasks:
35
+ raise TaskNotFoundError(f"Task not found: {target_task}")
36
+
37
+ # Build dependency graph
38
+ graph: dict[str, set[str]] = {}
39
+
40
+ def build_graph(task_name: str) -> None:
41
+ """Recursively build dependency graph."""
42
+ if task_name in graph:
43
+ # Already processed
44
+ return
45
+
46
+ task = recipe.tasks.get(task_name)
47
+ if task is None:
48
+ raise TaskNotFoundError(f"Task not found: {task_name}")
49
+
50
+ # Add task to graph with its dependencies
51
+ graph[task_name] = set(task.deps)
52
+
53
+ # Recursively process dependencies
54
+ for dep in task.deps:
55
+ build_graph(dep)
56
+
57
+ # Build graph starting from target task
58
+ build_graph(target_task)
59
+
60
+ # Use TopologicalSorter to resolve execution order
61
+ try:
62
+ sorter = TopologicalSorter(graph)
63
+ return list(sorter.static_order())
64
+ except ValueError as e:
65
+ raise CycleError(f"Dependency cycle detected: {e}")
66
+
67
+
68
+ def get_implicit_inputs(recipe: Recipe, task: Task) -> list[str]:
69
+ """Get implicit inputs for a task based on its dependencies.
70
+
71
+ Tasks automatically inherit inputs from dependencies:
72
+ 1. All outputs from dependency tasks become implicit inputs
73
+ 2. All inputs from dependency tasks that don't declare outputs are inherited
74
+
75
+ Args:
76
+ recipe: Parsed recipe containing all tasks
77
+ task: Task to get implicit inputs for
78
+
79
+ Returns:
80
+ List of glob patterns for implicit inputs
81
+ """
82
+ implicit_inputs = []
83
+
84
+ for dep_name in task.deps:
85
+ dep_task = recipe.tasks.get(dep_name)
86
+ if dep_task is None:
87
+ continue
88
+
89
+ # If dependency has outputs, inherit them
90
+ if dep_task.outputs:
91
+ implicit_inputs.extend(dep_task.outputs)
92
+ # If dependency has no outputs, inherit its inputs
93
+ elif dep_task.inputs:
94
+ implicit_inputs.extend(dep_task.inputs)
95
+
96
+ return implicit_inputs
97
+
98
+
99
+ def build_dependency_tree(recipe: Recipe, target_task: str) -> dict:
100
+ """Build a tree structure representing dependencies for visualization.
101
+
102
+ Note: This builds a true tree representation where shared dependencies may
103
+ appear multiple times. Each dependency is shown in the context of its parent,
104
+ allowing the full dependency path to be visible from any node.
105
+
106
+ Args:
107
+ recipe: Parsed recipe containing all tasks
108
+ target_task: Name of the task to build tree for
109
+
110
+ Returns:
111
+ Nested dictionary representing the dependency tree
112
+ """
113
+ if target_task not in recipe.tasks:
114
+ raise TaskNotFoundError(f"Task not found: {target_task}")
115
+
116
+ current_path = set() # Track current recursion path for cycle detection
117
+
118
+ def build_tree(task_name: str) -> dict:
119
+ """Recursively build dependency tree."""
120
+ task = recipe.tasks.get(task_name)
121
+ if task is None:
122
+ raise TaskNotFoundError(f"Task not found: {task_name}")
123
+
124
+ # Detect cycles in current recursion path
125
+ if task_name in current_path:
126
+ return {"name": task_name, "deps": [], "cycle": True}
127
+
128
+ current_path.add(task_name)
129
+
130
+ tree = {
131
+ "name": task_name,
132
+ "deps": [build_tree(dep) for dep in task.deps],
133
+ }
134
+
135
+ current_path.remove(task_name)
136
+
137
+ return tree
138
+
139
+ return build_tree(target_task)
tasktree/hasher.py ADDED
@@ -0,0 +1,74 @@
1
+ """Hashing logic for tasks and arguments."""
2
+
3
+ import hashlib
4
+ import json
5
+ from typing import Any
6
+
7
+
8
+ def hash_task(cmd: str, outputs: list[str], working_dir: str, args: list[str]) -> str:
9
+ """Compute task definition hash.
10
+
11
+ The hash includes:
12
+ - cmd: The command to execute
13
+ - outputs: Declared output files
14
+ - working_dir: Execution directory
15
+ - args: Parameter definitions (names and types)
16
+
17
+ The hash excludes:
18
+ - deps: Only affects scheduling order
19
+ - inputs: Tracked separately via timestamps
20
+ - desc: Documentation only
21
+
22
+ Args:
23
+ cmd: Command to execute
24
+ outputs: List of output glob patterns
25
+ working_dir: Working directory for execution
26
+ args: List of argument definitions
27
+
28
+ Returns:
29
+ 8-character hex hash string
30
+ """
31
+ # Create a stable representation
32
+ data = {
33
+ "cmd": cmd,
34
+ "outputs": sorted(outputs), # Sort for stability
35
+ "working_dir": working_dir,
36
+ "args": sorted(args), # Sort for stability
37
+ }
38
+
39
+ # Serialize to JSON with sorted keys for deterministic hashing
40
+ serialized = json.dumps(data, sort_keys=True, separators=(",", ":"))
41
+
42
+ # Compute hash and truncate to 8 characters
43
+ return hashlib.sha256(serialized.encode()).hexdigest()[:8]
44
+
45
+
46
+ def hash_args(args_dict: dict[str, Any]) -> str:
47
+ """Compute hash of task arguments.
48
+
49
+ Args:
50
+ args_dict: Dictionary of argument names to values
51
+
52
+ Returns:
53
+ 8-character hex hash string
54
+ """
55
+ # Serialize arguments to JSON with sorted keys for deterministic hashing
56
+ serialized = json.dumps(args_dict, sort_keys=True, separators=(",", ":"))
57
+
58
+ # Compute hash and truncate to 8 characters
59
+ return hashlib.sha256(serialized.encode()).hexdigest()[:8]
60
+
61
+
62
+ def make_cache_key(task_hash: str, args_hash: str | None = None) -> str:
63
+ """Create cache key for task execution.
64
+
65
+ Args:
66
+ task_hash: Task definition hash
67
+ args_hash: Optional arguments hash
68
+
69
+ Returns:
70
+ Cache key string (task_hash or task_hash__args_hash)
71
+ """
72
+ if args_hash:
73
+ return f"{task_hash}__{args_hash}"
74
+ return task_hash