starbash 0.1.6__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starbash/app.py CHANGED
@@ -1,6 +1,8 @@
1
1
  import logging
2
2
  from importlib import resources
3
+ import os
3
4
  from pathlib import Path
5
+ import tempfile
4
6
  import typer
5
7
  import tomlkit
6
8
  from tomlkit.toml_file import TOMLFile
@@ -11,16 +13,20 @@ import itertools
11
13
  from rich.progress import track
12
14
  from rich.logging import RichHandler
13
15
  import shutil
16
+ from datetime import datetime
17
+ import rich.console
18
+ import copy
14
19
 
15
20
  import starbash
16
- from starbash import console
17
- from starbash.database import Database
18
- from starbash.repo.manager import Repo
19
- from starbash.tool import Tool
20
- from starbash.repo import RepoManager
21
+ from starbash import console, _is_test_env, to_shortdate
22
+ from starbash.database import Database, SessionRow, ImageRow, get_column_name
23
+ from repo import Repo, repo_suffix
24
+ from starbash.toml import toml_from_template
25
+ from starbash.tool import Tool, expand_context, expand_context_unsafe
26
+ from repo import RepoManager
21
27
  from starbash.tool import tools
22
28
  from starbash.paths import get_user_config_dir, get_user_data_dir
23
- from starbash.selection import Selection
29
+ from starbash.selection import Selection, where_tuple
24
30
  from starbash.analytics import (
25
31
  NopAnalytics,
26
32
  analytics_exception,
@@ -29,41 +35,41 @@ from starbash.analytics import (
29
35
  analytics_start_transaction,
30
36
  )
31
37
 
38
+ # Type aliases for better documentation
32
39
 
33
- def setup_logging():
40
+
41
+ def setup_logging(stderr: bool = False):
34
42
  """
35
43
  Configures basic logging.
36
44
  """
45
+ console = rich.console.Console(stderr=stderr)
46
+ handlers = (
47
+ [RichHandler(console=console, rich_tracebacks=True)] if not _is_test_env else []
48
+ )
37
49
  logging.basicConfig(
38
50
  level=starbash.log_filter_level, # use the global log filter level
39
51
  format="%(message)s",
40
52
  datefmt="[%X]",
41
- handlers=[RichHandler(rich_tracebacks=True)],
53
+ handlers=handlers,
42
54
  )
43
55
 
44
56
 
45
57
  def get_user_config_path() -> Path:
46
58
  """Returns the path to the user config file."""
47
59
  config_dir = get_user_config_dir()
48
- return config_dir / "starbash.toml"
60
+ return config_dir / repo_suffix
49
61
 
50
62
 
51
63
  def create_user() -> Path:
52
64
  """Create user directories if they don't exist yet."""
53
65
  path = get_user_config_path()
54
66
  if not path.exists():
55
- tomlstr = (
56
- resources.files("starbash")
57
- .joinpath("templates/userconfig.toml")
58
- .read_text()
59
- )
60
- toml = tomlkit.parse(tomlstr)
61
- TOMLFile(path).write(toml)
67
+ toml_from_template("userconfig", path)
62
68
  logging.info(f"Created user config file: {path}")
63
69
  return get_user_config_dir()
64
70
 
65
71
 
66
- def copy_images_to_dir(images: list[dict[str, Any]], output_dir: Path) -> None:
72
+ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
67
73
  """Copy images to the specified output directory (using symbolic links if possible)."""
68
74
 
69
75
  # Export images
@@ -112,15 +118,23 @@ def copy_images_to_dir(images: list[dict[str, Any]], output_dir: Path) -> None:
112
118
  console.print(f" [red]Errors: {error_count} files[/red]")
113
119
 
114
120
 
121
+ def imagetyp_equals(imagetyp1: str, imagetyp2: str) -> bool:
122
+ """Imagetyps (BIAS, Dark, FLAT, flats) have a number of slightly different convetions.
123
+ Do a sloppy equality check.
124
+
125
+ Eventually handle non english variants by using the repos aliases table."""
126
+ return imagetyp1.strip().lower() == imagetyp2.strip().lower()
127
+
128
+
115
129
  class Starbash:
116
130
  """The main Starbash application class."""
117
131
 
118
- def __init__(self, cmd: str = "unspecified"):
132
+ def __init__(self, cmd: str = "unspecified", stderr_logging: bool = False):
119
133
  """
120
134
  Initializes the Starbash application by loading configurations
121
135
  and setting up the repository manager.
122
136
  """
123
- setup_logging()
137
+ setup_logging(stderr=stderr_logging)
124
138
  logging.info("Starbash starting...")
125
139
 
126
140
  # Load app defaults and initialize the repository manager
@@ -148,23 +162,45 @@ class Starbash:
148
162
  )
149
163
  # self.repo_manager.dump()
150
164
 
151
- self.db = Database()
165
+ self._db = None # Lazy initialization - only create when accessed
152
166
  self.session_query = None # None means search all sessions
153
167
 
154
- # Initialize selection state
155
- data_dir = get_user_data_dir()
156
- selection_file = data_dir / "selection.json"
157
- self.selection = Selection(selection_file)
168
+ # Initialize selection state (stored in user config repo)
169
+ self.selection = Selection(self.user_repo)
158
170
 
159
171
  # FIXME, call reindex somewhere and also index whenever new repos are added
160
172
  # self.reindex_repos()
161
173
 
174
+ @property
175
+ def db(self) -> Database:
176
+ """Lazy initialization of database - only created as needed."""
177
+ if self._db is None:
178
+ self._db = Database()
179
+ # Ensure all repos are registered in the database
180
+ self.repo_db_update()
181
+ return self._db
182
+
183
+ def repo_db_update(self) -> None:
184
+ """Update the database with all managed repositories.
185
+
186
+ Iterates over all repos in the RepoManager and ensures each one
187
+ has a record in the repos table. This is called during lazy database
188
+ initialization to prepare repo_id values for image insertion.
189
+ """
190
+ if self._db is None:
191
+ return
192
+
193
+ for repo in self.repo_manager.repos:
194
+ self._db.upsert_repo(repo.url)
195
+ logging.debug(f"Registered repo in database: {repo.url}")
196
+
162
197
  # --- Lifecycle ---
163
198
  def close(self) -> None:
164
199
  self.analytics.__exit__(None, None, None)
165
200
 
166
201
  analytics_shutdown()
167
- self.db.close()
202
+ if self._db is not None:
203
+ self._db.close()
168
204
 
169
205
  # Context manager support
170
206
  def __enter__(self) -> "Starbash":
@@ -179,6 +215,7 @@ class Starbash:
179
215
  return handled
180
216
 
181
217
  def _add_session(self, f: str, image_doc_id: int, header: dict) -> None:
218
+ """We just added a new image, create or update its session entry as needed."""
182
219
  filter = header.get(Database.FILTER_KEY, "unspecified")
183
220
  image_type = header.get(Database.IMAGETYP_KEY)
184
221
  date = header.get(Database.DATE_OBS_KEY)
@@ -204,17 +241,154 @@ class Starbash:
204
241
  session = self.db.get_session(new)
205
242
  self.db.upsert_session(new, existing=session)
206
243
 
207
- def search_session(self) -> list[dict[str, Any]] | None:
244
+ def guess_sessions(
245
+ self, ref_session: SessionRow, want_type: str
246
+ ) -> list[SessionRow]:
247
+ """Given a particular session type (i.e. FLAT or BIAS etc...) and an
248
+ existing session (which is assumed to generally be a LIGHT frame based session):
249
+
250
+ Return a list of possible sessions which would be acceptable. The more desirable
251
+ matches are first in the list. Possibly in the future I might have a 'score' and reason
252
+ given for each ranking.
253
+
254
+ The following critera MUST match to be acceptable:
255
+ * matches requested imagetyp.
256
+ * same filter as reference session (in the case want_type==FLAT only)
257
+ * same telescope as reference session
258
+
259
+ Quality is determined by (most important first):
260
+ * temperature of CCD-TEMP is closer to the reference session
261
+ * smaller DATE-OBS delta to the reference session
262
+
263
+ Eventually the code will check the following for 'nice to have' (but not now):
264
+ * TBD
265
+
266
+ Possibly eventually this code could be moved into recipes.
267
+
268
+ """
269
+ # Get reference image to access CCD-TEMP and DATE-OBS
270
+ metadata: dict = ref_session.get("metadata", {})
271
+ ref_temp = metadata.get("CCD-TEMP", None)
272
+ ref_date_str = metadata.get(Database.DATE_OBS_KEY)
273
+
274
+ # Parse reference date for time delta calculations
275
+ ref_date = None
276
+ if ref_date_str:
277
+ try:
278
+ ref_date = datetime.fromisoformat(ref_date_str)
279
+ except (ValueError, TypeError):
280
+ logging.warning(f"Malformed session ref date: {ref_date_str}")
281
+
282
+ # Build search conditions - MUST match criteria
283
+ conditions = {
284
+ Database.IMAGETYP_KEY: want_type,
285
+ Database.TELESCOP_KEY: ref_session[get_column_name(Database.TELESCOP_KEY)],
286
+ }
287
+
288
+ # For FLAT frames, filter must match the reference session
289
+ if want_type.upper() == "FLAT":
290
+ conditions[Database.FILTER_KEY] = ref_session[
291
+ get_column_name(Database.FILTER_KEY)
292
+ ]
293
+
294
+ # Search for candidate sessions
295
+ candidates = self.db.search_session(where_tuple(conditions))
296
+
297
+ # Now score and sort the candidates
298
+ scored_candidates = []
299
+
300
+ for candidate in candidates:
301
+ score = 0.0
302
+
303
+ # Get candidate image metadata to access CCD-TEMP and DATE-OBS
304
+ try:
305
+ candidate_image = candidate.get("metadata", {})
306
+
307
+ # Score by CCD-TEMP difference (most important)
308
+ # Lower temperature difference = better score
309
+ if ref_temp is not None:
310
+ candidate_temp = candidate_image.get("CCD-TEMP")
311
+ if candidate_temp is not None:
312
+ try:
313
+ temp_diff = abs(float(ref_temp) - float(candidate_temp))
314
+ # Use exponential decay: closer temps get much better scores
315
+ # Perfect match (0°C diff) = 1000, 1°C diff ≈ 368, 2°C diff ≈ 135
316
+ score += 1000 * (2.718 ** (-temp_diff))
317
+ except (ValueError, TypeError):
318
+ # If we can't parse temps, give a neutral score
319
+ score += 0
320
+
321
+ # Score by date/time proximity (secondary importance)
322
+ if ref_date is not None:
323
+ candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
324
+ if candidate_date_str:
325
+ try:
326
+ candidate_date = datetime.fromisoformat(candidate_date_str)
327
+ time_delta = abs(
328
+ (ref_date - candidate_date).total_seconds()
329
+ )
330
+ # Closer in time = better score
331
+ # Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
332
+ # Using 7-day half-life
333
+ score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
334
+ except (ValueError, TypeError):
335
+ logging.warning(
336
+ f"Could not parse candidate date: {candidate_date_str}"
337
+ )
338
+
339
+ scored_candidates.append((score, candidate))
340
+
341
+ except (AssertionError, KeyError) as e:
342
+ # If we can't get the session image, log and skip this candidate
343
+ logging.warning(
344
+ f"Could not score candidate session {candidate.get('id')}: {e}"
345
+ )
346
+ continue
347
+
348
+ # Sort by score (highest first) and return just the sessions
349
+ scored_candidates.sort(key=lambda x: x[0], reverse=True)
350
+
351
+ return [candidate for score, candidate in scored_candidates]
352
+
353
+ def search_session(self) -> list[SessionRow]:
208
354
  """Search for sessions, optionally filtered by the current selection."""
209
- # If selection has filters, use them; otherwise return all sessions
210
- if self.selection.is_empty():
211
- return self.db.search_session(None)
212
- else:
213
- # Get query conditions from selection
214
- conditions = self.selection.get_query_conditions()
215
- return self.db.search_session(conditions)
355
+ # Get query conditions from selection
356
+ conditions = self.selection.get_query_conditions()
357
+ return self.db.search_session(conditions)
216
358
 
217
- def get_session_images(self, session_id: int) -> list[dict[str, Any]]:
359
+ def _reconstruct_image_path(self, image: ImageRow) -> ImageRow:
360
+ """Reconstruct absolute path from image row containing repo_url and relative path.
361
+
362
+ Args:
363
+ image: Image record with 'repo_url' and 'path' (relative) fields
364
+
365
+ Returns:
366
+ Modified image record with 'path' as absolute path
367
+ """
368
+ repo_url = image.get("repo_url")
369
+ relative_path = image.get("path")
370
+
371
+ if repo_url and relative_path:
372
+ repo = self.repo_manager.get_repo_by_url(repo_url)
373
+ if repo:
374
+ absolute_path = repo.resolve_path(relative_path)
375
+ image["path"] = str(absolute_path)
376
+
377
+ return image
378
+
379
+ def get_session_image(self, session: SessionRow) -> ImageRow:
380
+ """
381
+ Get the reference ImageRow for a session with absolute path.
382
+ """
383
+ images = self.db.search_image(
384
+ {Database.ID_KEY: session[get_column_name(Database.IMAGE_DOC_KEY)]}
385
+ )
386
+ assert (
387
+ len(images) == 1
388
+ ), f"Expected exactly one reference for session, found {len(images)}"
389
+ return self._reconstruct_image_path(images[0])
390
+
391
+ def get_session_images(self, session: SessionRow) -> list[ImageRow]:
218
392
  """
219
393
  Get all images belonging to a specific session.
220
394
 
@@ -232,24 +406,20 @@ class Starbash:
232
406
  Raises:
233
407
  ValueError: If session_id is not found in the database
234
408
  """
235
- # First get the session details
236
- session = self.db.get_session_by_id(session_id)
237
- if session is None:
238
- raise ValueError(f"Session with id {session_id} not found")
239
-
240
409
  # Query images that match ALL session criteria including date range
241
410
  conditions = {
242
- Database.FILTER_KEY: session[Database.FILTER_KEY],
243
- Database.IMAGETYP_KEY: session[Database.IMAGETYP_KEY],
244
- Database.OBJECT_KEY: session[Database.OBJECT_KEY],
245
- Database.TELESCOP_KEY: session[Database.TELESCOP_KEY],
246
- "date_start": session[Database.START_KEY],
247
- "date_end": session[Database.END_KEY],
411
+ Database.FILTER_KEY: session[get_column_name(Database.FILTER_KEY)],
412
+ Database.IMAGETYP_KEY: session[get_column_name(Database.IMAGETYP_KEY)],
413
+ Database.OBJECT_KEY: session[get_column_name(Database.OBJECT_KEY)],
414
+ Database.TELESCOP_KEY: session[get_column_name(Database.TELESCOP_KEY)],
415
+ "date_start": session[get_column_name(Database.START_KEY)],
416
+ "date_end": session[get_column_name(Database.END_KEY)],
248
417
  }
249
418
 
250
419
  # Single query with all conditions
251
420
  images = self.db.search_image(conditions)
252
- return images if images else []
421
+ # Reconstruct absolute paths for all images
422
+ return [self._reconstruct_image_path(img) for img in images] if images else []
253
423
 
254
424
  def remove_repo_ref(self, url: str) -> None:
255
425
  """
@@ -261,6 +431,8 @@ class Starbash:
261
431
  Raises:
262
432
  ValueError: If the repository URL is not found in user configuration
263
433
  """
434
+ self.db.remove_repo(url)
435
+
264
436
  # Get the repo-ref list from user config
265
437
  repo_refs = self.user_repo.config.get("repo-ref")
266
438
 
@@ -286,6 +458,10 @@ class Starbash:
286
458
 
287
459
  def reindex_repo(self, repo: Repo, force: bool = False):
288
460
  """Reindex all repositories managed by the RepoManager."""
461
+
462
+ # make sure this new repo is listed in the repos table
463
+ self.repo_db_update() # not really ideal, a more optimal version would just add the new repo
464
+
289
465
  # FIXME, add a method to get just the repos that contain images
290
466
  if repo.is_scheme("file") and repo.kind != "recipe":
291
467
  logging.debug("Reindexing %s...", repo.url)
@@ -306,7 +482,10 @@ class Starbash:
306
482
  ):
307
483
  # progress.console.print(f"Indexing {f}...")
308
484
  try:
309
- found = self.db.get_image(str(f))
485
+ # Convert absolute path to relative path within repo
486
+ relative_path = f.relative_to(path)
487
+
488
+ found = self.db.get_image(repo.url, str(relative_path))
310
489
  if not found or force:
311
490
  # Read and log the primary header (HDU 0)
312
491
  with fits.open(str(f), memmap=False) as hdul:
@@ -322,8 +501,9 @@ class Starbash:
322
501
  if (not whitelist) or (key in whitelist):
323
502
  headers[key] = value
324
503
  logging.debug("Headers for %s: %s", f, headers)
325
- headers["path"] = str(f)
326
- image_doc_id = self.db.upsert_image(headers)
504
+ # Store relative path in database
505
+ headers["path"] = str(relative_path)
506
+ image_doc_id = self.db.upsert_image(headers, repo.url)
327
507
 
328
508
  if not found:
329
509
  # Update the session infos, but ONLY on first file scan
@@ -340,10 +520,6 @@ class Starbash:
340
520
  for repo in track(self.repo_manager.repos, description="Reindexing repos..."):
341
521
  self.reindex_repo(repo, force=force)
342
522
 
343
- def test_processing(self):
344
- """A crude test of image processing pipeline - FIXME move into testing"""
345
- self.run_all_stages()
346
-
347
523
  def run_all_stages(self):
348
524
  """On the currently active session, run all processing stages"""
349
525
  logging.info("--- Running all stages ---")
@@ -361,30 +537,77 @@ class Starbash:
361
537
  f"invalid stage definition: a stage is missing the required 'priority' key"
362
538
  ) from e
363
539
 
364
- # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
365
- task_definitions = self.repo_manager.merged.getall("stage")
366
- all_tasks = list(itertools.chain.from_iterable(task_definitions))
367
-
368
540
  logging.info(
369
541
  f"Found {len(sorted_pipeline)} pipeline steps to run in order of priority."
370
542
  )
371
543
 
372
- self.start_session()
544
+ self.init_context()
373
545
  # 4. Iterate through the sorted pipeline and execute the associated tasks.
374
546
  for step in sorted_pipeline:
375
547
  step_name = step.get("name")
376
548
  if not step_name:
377
549
  raise ValueError("Invalid pipeline step found: missing 'name' key.")
550
+ self.run_pipeline_step(step_name)
551
+
552
+ def run_pipeline_step(self, step_name: str):
553
+ logging.info(f"--- Running pipeline step: '{step_name}' ---")
378
554
 
379
- logging.info(
380
- f"--- Running pipeline step: '{step_name}' (Priority: {step['priority']}) ---"
555
+ # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
556
+ task_definitions = self.repo_manager.merged.getall("stage")
557
+ all_tasks = list(itertools.chain.from_iterable(task_definitions))
558
+
559
+ # Find all tasks that should run during this pipeline step.
560
+ tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
561
+ for task in tasks_to_run:
562
+ self.run_stage(task)
563
+
564
+ def run_master_stages(self):
565
+ """Generate any missing master frames
566
+
567
+ Steps:
568
+ * set all_tasks to be all tasks for when == "setup.masters"
569
+ * loop over all currently unfiltered sessions
570
+ * for each session loop across all_tasks
571
+ * if task input.type == the imagetyp for this current session
572
+ * add_input_to_context() add the input files to the context (from the session)
573
+ * run_stage(task) to generate the new master frame
574
+ """
575
+ sessions = self.search_session()
576
+ for session in sessions:
577
+ imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
578
+ logging.debug(
579
+ f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
381
580
  )
382
- # Find all tasks that should run during this pipeline step.
383
- tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
384
- for task in tasks_to_run:
385
- self.run_stage(task)
386
581
 
387
- def start_session(self) -> None:
582
+ # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
583
+ task_definitions = self.repo_manager.merged.getall("stage")
584
+ all_tasks = list(itertools.chain.from_iterable(task_definitions))
585
+
586
+ # Find all tasks that should run during the "setup.masters" step.
587
+ tasks_to_run = [
588
+ task for task in all_tasks if task.get("when") == "setup.masters"
589
+ ]
590
+
591
+ for task in tasks_to_run:
592
+ input_config = task.get("input", {})
593
+ input_type = input_config.get("type")
594
+ if imagetyp_equals(input_type, imagetyp):
595
+ logging.info(
596
+ f" Running master stage task for imagetyp '{imagetyp}'"
597
+ )
598
+
599
+ # Create a default process dir in /tmp, though more advanced 'session' based workflows will
600
+ # probably override this and place it somewhere persistent.
601
+ with tempfile.TemporaryDirectory(prefix="session_tmp_") as temp_dir:
602
+ logging.debug(
603
+ f"Created temporary session directory: {temp_dir}"
604
+ )
605
+ self.init_context()
606
+ self.context["process_dir"] = temp_dir
607
+ self.add_session_to_context(session)
608
+ self.run_stage(task)
609
+
610
+ def init_context(self) -> None:
388
611
  """Do common session init"""
389
612
 
390
613
  # Context is preserved through all stages, so each stage can add new symbols to it for use by later stages
@@ -392,11 +615,152 @@ class Starbash:
392
615
 
393
616
  # Update the context with runtime values.
394
617
  runtime_context = {
395
- "process_dir": "/workspaces/starbash/images/process", # FIXME - create/find this more correctly per session
396
618
  "masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
397
619
  }
398
620
  self.context.update(runtime_context)
399
621
 
622
+ def add_session_to_context(self, session: SessionRow) -> None:
623
+ """adds to context from the indicated session:
624
+ * input_files - all of the files mentioned in the session
625
+ * instrument - for the session
626
+ * date - the localtimezone date of the session
627
+ * imagetyp - the imagetyp of the session
628
+ * session - the current session row (joined with a typical image) (can be used to
629
+ find things like telescope, temperature ...)
630
+ """
631
+ # Get images for this session
632
+ images = self.get_session_images(session)
633
+ logging.debug(f"Adding {len(images)} files as context.input_files")
634
+ self.context["input_files"] = [
635
+ img["path"] for img in images
636
+ ] # Pass in the file list via the context dict
637
+
638
+ # it is okay to give them the actual session row, because we're never using it again
639
+ self.context["session"] = session
640
+
641
+ instrument = session.get(get_column_name(Database.TELESCOP_KEY))
642
+ if instrument:
643
+ self.context["instrument"] = instrument
644
+
645
+ imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
646
+ if imagetyp:
647
+ self.context["imagetyp"] = imagetyp
648
+
649
+ date = session.get(get_column_name(Database.START_KEY))
650
+ if date:
651
+ self.context["date"] = to_shortdate(date)
652
+
653
+ def add_input_files(self, stage: dict) -> None:
654
+ """adds to context.input_files based on the stage input config"""
655
+ input_config = stage.get("input")
656
+ input_required = False
657
+ if input_config:
658
+ # if there is an "input" dict, we assume input.required is true if unset
659
+ input_required = input_config.get("required", True)
660
+ source = input_config.get("source")
661
+ if source is None:
662
+ raise ValueError(
663
+ f"Stage '{stage.get('name')}' has invalid 'input' configuration: missing 'source'"
664
+ )
665
+ if source == "path":
666
+ # The path might contain context variables that need to be expanded.
667
+ # path_pattern = expand_context(input_config["path"], context)
668
+ path_pattern = input_config["path"]
669
+ input_files = glob.glob(path_pattern, recursive=True)
670
+
671
+ self.context["input_files"] = (
672
+ input_files # Pass in the file list via the context dict
673
+ )
674
+ elif source == "repo":
675
+ # We expect that higher level code has already added the correct input files
676
+ # to the context
677
+ if not "input_files" in self.context:
678
+ raise RuntimeError(
679
+ "Input config specifies 'repo' but no 'input_files' found in context"
680
+ )
681
+ else:
682
+ raise ValueError(
683
+ f"Stage '{stage.get('name')}' has invalid 'input' source: {source}"
684
+ )
685
+
686
+ # FIXME compare context.output to see if it already exists and is newer than the input files, if so skip processing
687
+ else:
688
+ # The script doesn't mention input, therefore assume it doesn't want input_files
689
+ if "input_files" in self.context:
690
+ del self.context["input_files"]
691
+
692
+ if input_required and not "input_files" in self.context:
693
+ raise RuntimeError("No input files found for stage")
694
+
695
+ def add_output_path(self, stage: dict) -> None:
696
+ """Adds output path information to context based on the stage output config.
697
+
698
+ Sets the following context variables:
699
+ - context.output.root_path - base path of the destination repo
700
+ - context.output.base_path - full path without file extension
701
+ - context.output.suffix - file extension (e.g., .fits or .fit.gz)
702
+ - context.output.full_path - complete output file path
703
+ """
704
+ output_config = stage.get("output")
705
+ if not output_config:
706
+ # No output configuration, remove any existing output from context
707
+ if "output" in self.context:
708
+ del self.context["output"]
709
+ return
710
+
711
+ dest = output_config.get("dest")
712
+ if not dest:
713
+ raise ValueError(
714
+ f"Stage '{stage.get('description', 'unknown')}' has 'output' config but missing 'dest'"
715
+ )
716
+
717
+ if dest == "repo":
718
+ # Find the destination repo by type/kind
719
+ output_type = output_config.get("type")
720
+ if not output_type:
721
+ raise ValueError(
722
+ f"Stage '{stage.get('description', 'unknown')}' has output.dest='repo' but missing 'type'"
723
+ )
724
+
725
+ # Find the repo with matching kind
726
+ dest_repo = self.repo_manager.get_repo_by_kind(output_type)
727
+ if not dest_repo:
728
+ raise ValueError(
729
+ f"No repository found with kind '{output_type}' for output destination"
730
+ )
731
+
732
+ repo_base = dest_repo.get_path()
733
+ if not repo_base:
734
+ raise ValueError(f"Repository '{dest_repo.url}' has no filesystem path")
735
+
736
+ repo_relative: str | None = dest_repo.get("repo.relative")
737
+ if not repo_relative:
738
+ raise ValueError(
739
+ f"Repository '{dest_repo.url}' is missing 'repo.relative' configuration"
740
+ )
741
+
742
+ # we support context variables in the relative path
743
+ repo_relative = expand_context_unsafe(repo_relative, self.context)
744
+ full_path = repo_base / repo_relative
745
+
746
+ # base_path but without spaces - because Siril doesn't like that
747
+ full_path = Path(str(full_path).replace(" ", r"_"))
748
+
749
+ base_path = full_path.parent / full_path.stem
750
+
751
+ # Set context variables as documented in the TOML
752
+ self.context["output"] = {
753
+ # "root_path": repo_relative, not needed I think
754
+ "base_path": base_path,
755
+ # "suffix": full_path.suffix, not needed I think
756
+ "full_path": full_path,
757
+ }
758
+
759
+ else:
760
+ raise ValueError(
761
+ f"Unsupported output destination type: {dest}. Only 'repo' is currently supported."
762
+ )
763
+
400
764
  def run_stage(self, stage: dict) -> None:
401
765
  """
402
766
  Executes a single processing stage.
@@ -438,30 +802,29 @@ class Starbash:
438
802
  )
439
803
 
440
804
  # This allows recipe TOML to define their own default variables.
805
+ # (apply all of the changes to context that the task demands)
441
806
  stage_context = stage.get("context", {})
442
807
  self.context.update(stage_context)
808
+ self.add_input_files(stage)
809
+ self.add_output_path(stage)
443
810
 
444
- # Assume no files for this stage
445
- if "input_files" in self.context:
446
- del self.context["input_files"]
811
+ # if the output path already exists and is newer than all input files, skip processing
812
+ output_info: dict | None = self.context.get("output")
813
+ if output_info:
814
+ output_path = output_info.get("full_path")
447
815
 
448
- input_files = []
449
- input_config = stage.get("input")
450
- input_required = False
451
- if input_config:
452
- # if there is an "input" dict, we assume input.required is true if unset
453
- input_required = input_config.get("required", True)
454
- if "path" in input_config:
455
- # The path might contain context variables that need to be expanded.
456
- # path_pattern = expand_context(input_config["path"], context)
457
- path_pattern = input_config["path"]
458
- input_files = glob.glob(path_pattern, recursive=True)
816
+ if output_path and os.path.exists(output_path):
817
+ logging.info(
818
+ f"Output file already exists, skipping processing: {output_path}"
819
+ )
820
+ return
459
821
 
460
- self.context["input_files"] = (
461
- input_files # Pass in the file list via the context dict
462
- )
822
+ tool.run_in_temp_dir(script, context=self.context)
463
823
 
464
- if input_required and not input_files:
465
- raise RuntimeError("No input files found for stage")
824
+ # verify context.output was created if it was specified
825
+ output_info: dict | None = self.context.get("output")
826
+ if output_info:
827
+ output_path = output_info.get("full_path")
466
828
 
467
- tool.run_in_temp_dir(script, context=self.context)
829
+ if not output_path or not os.path.exists(output_path):
830
+ raise RuntimeError(f"Expected output file not found: {output_path}")