starbash 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starbash/app.py CHANGED
@@ -1,6 +1,8 @@
1
1
  import logging
2
2
  from importlib import resources
3
+ import os
3
4
  from pathlib import Path
5
+ import tempfile
4
6
  import typer
5
7
  import tomlkit
6
8
  from tomlkit.toml_file import TOMLFile
@@ -11,16 +13,20 @@ import itertools
11
13
  from rich.progress import track
12
14
  from rich.logging import RichHandler
13
15
  import shutil
16
+ from datetime import datetime
17
+ import rich.console
18
+ import copy
14
19
 
15
20
  import starbash
16
- from starbash import console, _is_test_env
17
- from starbash.database import Database
18
- from repo.manager import Repo
19
- from starbash.tool import Tool
21
+ from starbash import console, _is_test_env, to_shortdate
22
+ from starbash.database import Database, SessionRow, ImageRow, get_column_name
23
+ from repo import Repo, repo_suffix
24
+ from starbash.toml import toml_from_template
25
+ from starbash.tool import Tool, expand_context, expand_context_unsafe
20
26
  from repo import RepoManager
21
27
  from starbash.tool import tools
22
28
  from starbash.paths import get_user_config_dir, get_user_data_dir
23
- from starbash.selection import Selection
29
+ from starbash.selection import Selection, where_tuple
24
30
  from starbash.analytics import (
25
31
  NopAnalytics,
26
32
  analytics_exception,
@@ -29,12 +35,17 @@ from starbash.analytics import (
29
35
  analytics_start_transaction,
30
36
  )
31
37
 
38
+ # Type aliases for better documentation
32
39
 
33
- def setup_logging():
40
+
41
+ def setup_logging(stderr: bool = False):
34
42
  """
35
43
  Configures basic logging.
36
44
  """
37
- handlers = [RichHandler(rich_tracebacks=True)] if not _is_test_env else []
45
+ console = rich.console.Console(stderr=stderr)
46
+ handlers = (
47
+ [RichHandler(console=console, rich_tracebacks=True)] if not _is_test_env else []
48
+ )
38
49
  logging.basicConfig(
39
50
  level=starbash.log_filter_level, # use the global log filter level
40
51
  format="%(message)s",
@@ -46,25 +57,19 @@ def setup_logging():
46
57
  def get_user_config_path() -> Path:
47
58
  """Returns the path to the user config file."""
48
59
  config_dir = get_user_config_dir()
49
- return config_dir / "starbash.toml"
60
+ return config_dir / repo_suffix
50
61
 
51
62
 
52
63
  def create_user() -> Path:
53
64
  """Create user directories if they don't exist yet."""
54
65
  path = get_user_config_path()
55
66
  if not path.exists():
56
- tomlstr = (
57
- resources.files("starbash")
58
- .joinpath("templates/userconfig.toml")
59
- .read_text()
60
- )
61
- toml = tomlkit.parse(tomlstr)
62
- TOMLFile(path).write(toml)
67
+ toml_from_template("userconfig", path)
63
68
  logging.info(f"Created user config file: {path}")
64
69
  return get_user_config_dir()
65
70
 
66
71
 
67
- def copy_images_to_dir(images: list[dict[str, Any]], output_dir: Path) -> None:
72
+ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
68
73
  """Copy images to the specified output directory (using symbolic links if possible)."""
69
74
 
70
75
  # Export images
@@ -113,15 +118,23 @@ def copy_images_to_dir(images: list[dict[str, Any]], output_dir: Path) -> None:
113
118
  console.print(f" [red]Errors: {error_count} files[/red]")
114
119
 
115
120
 
121
+ def imagetyp_equals(imagetyp1: str, imagetyp2: str) -> bool:
122
+ """Imagetyps (BIAS, Dark, FLAT, flats) have a number of slightly different convetions.
123
+ Do a sloppy equality check.
124
+
125
+ Eventually handle non english variants by using the repos aliases table."""
126
+ return imagetyp1.strip().lower() == imagetyp2.strip().lower()
127
+
128
+
116
129
  class Starbash:
117
130
  """The main Starbash application class."""
118
131
 
119
- def __init__(self, cmd: str = "unspecified"):
132
+ def __init__(self, cmd: str = "unspecified", stderr_logging: bool = False):
120
133
  """
121
134
  Initializes the Starbash application by loading configurations
122
135
  and setting up the repository manager.
123
136
  """
124
- setup_logging()
137
+ setup_logging(stderr=stderr_logging)
125
138
  logging.info("Starbash starting...")
126
139
 
127
140
  # Load app defaults and initialize the repository manager
@@ -149,23 +162,45 @@ class Starbash:
149
162
  )
150
163
  # self.repo_manager.dump()
151
164
 
152
- self.db = Database()
165
+ self._db = None # Lazy initialization - only create when accessed
153
166
  self.session_query = None # None means search all sessions
154
167
 
155
- # Initialize selection state
156
- data_dir = get_user_data_dir()
157
- selection_file = data_dir / "selection.json"
158
- self.selection = Selection(selection_file)
168
+ # Initialize selection state (stored in user config repo)
169
+ self.selection = Selection(self.user_repo)
159
170
 
160
171
  # FIXME, call reindex somewhere and also index whenever new repos are added
161
172
  # self.reindex_repos()
162
173
 
174
+ @property
175
+ def db(self) -> Database:
176
+ """Lazy initialization of database - only created as needed."""
177
+ if self._db is None:
178
+ self._db = Database()
179
+ # Ensure all repos are registered in the database
180
+ self.repo_db_update()
181
+ return self._db
182
+
183
+ def repo_db_update(self) -> None:
184
+ """Update the database with all managed repositories.
185
+
186
+ Iterates over all repos in the RepoManager and ensures each one
187
+ has a record in the repos table. This is called during lazy database
188
+ initialization to prepare repo_id values for image insertion.
189
+ """
190
+ if self._db is None:
191
+ return
192
+
193
+ for repo in self.repo_manager.repos:
194
+ self._db.upsert_repo(repo.url)
195
+ logging.debug(f"Registered repo in database: {repo.url}")
196
+
163
197
  # --- Lifecycle ---
164
198
  def close(self) -> None:
165
199
  self.analytics.__exit__(None, None, None)
166
200
 
167
201
  analytics_shutdown()
168
- self.db.close()
202
+ if self._db is not None:
203
+ self._db.close()
169
204
 
170
205
  # Context manager support
171
206
  def __enter__(self) -> "Starbash":
@@ -180,6 +215,7 @@ class Starbash:
180
215
  return handled
181
216
 
182
217
  def _add_session(self, f: str, image_doc_id: int, header: dict) -> None:
218
+ """We just added a new image, create or update its session entry as needed."""
183
219
  filter = header.get(Database.FILTER_KEY, "unspecified")
184
220
  image_type = header.get(Database.IMAGETYP_KEY)
185
221
  date = header.get(Database.DATE_OBS_KEY)
@@ -205,13 +241,154 @@ class Starbash:
205
241
  session = self.db.get_session(new)
206
242
  self.db.upsert_session(new, existing=session)
207
243
 
208
- def search_session(self) -> list[dict[str, Any]]:
244
+ def guess_sessions(
245
+ self, ref_session: SessionRow, want_type: str
246
+ ) -> list[SessionRow]:
247
+ """Given a particular session type (i.e. FLAT or BIAS etc...) and an
248
+ existing session (which is assumed to generally be a LIGHT frame based session):
249
+
250
+ Return a list of possible sessions which would be acceptable. The more desirable
251
+ matches are first in the list. Possibly in the future I might have a 'score' and reason
252
+ given for each ranking.
253
+
254
+ The following critera MUST match to be acceptable:
255
+ * matches requested imagetyp.
256
+ * same filter as reference session (in the case want_type==FLAT only)
257
+ * same telescope as reference session
258
+
259
+ Quality is determined by (most important first):
260
+ * temperature of CCD-TEMP is closer to the reference session
261
+ * smaller DATE-OBS delta to the reference session
262
+
263
+ Eventually the code will check the following for 'nice to have' (but not now):
264
+ * TBD
265
+
266
+ Possibly eventually this code could be moved into recipes.
267
+
268
+ """
269
+ # Get reference image to access CCD-TEMP and DATE-OBS
270
+ metadata: dict = ref_session.get("metadata", {})
271
+ ref_temp = metadata.get("CCD-TEMP", None)
272
+ ref_date_str = metadata.get(Database.DATE_OBS_KEY)
273
+
274
+ # Parse reference date for time delta calculations
275
+ ref_date = None
276
+ if ref_date_str:
277
+ try:
278
+ ref_date = datetime.fromisoformat(ref_date_str)
279
+ except (ValueError, TypeError):
280
+ logging.warning(f"Malformed session ref date: {ref_date_str}")
281
+
282
+ # Build search conditions - MUST match criteria
283
+ conditions = {
284
+ Database.IMAGETYP_KEY: want_type,
285
+ Database.TELESCOP_KEY: ref_session[get_column_name(Database.TELESCOP_KEY)],
286
+ }
287
+
288
+ # For FLAT frames, filter must match the reference session
289
+ if want_type.upper() == "FLAT":
290
+ conditions[Database.FILTER_KEY] = ref_session[
291
+ get_column_name(Database.FILTER_KEY)
292
+ ]
293
+
294
+ # Search for candidate sessions
295
+ candidates = self.db.search_session(where_tuple(conditions))
296
+
297
+ # Now score and sort the candidates
298
+ scored_candidates = []
299
+
300
+ for candidate in candidates:
301
+ score = 0.0
302
+
303
+ # Get candidate image metadata to access CCD-TEMP and DATE-OBS
304
+ try:
305
+ candidate_image = candidate.get("metadata", {})
306
+
307
+ # Score by CCD-TEMP difference (most important)
308
+ # Lower temperature difference = better score
309
+ if ref_temp is not None:
310
+ candidate_temp = candidate_image.get("CCD-TEMP")
311
+ if candidate_temp is not None:
312
+ try:
313
+ temp_diff = abs(float(ref_temp) - float(candidate_temp))
314
+ # Use exponential decay: closer temps get much better scores
315
+ # Perfect match (0°C diff) = 1000, 1°C diff ≈ 368, 2°C diff ≈ 135
316
+ score += 1000 * (2.718 ** (-temp_diff))
317
+ except (ValueError, TypeError):
318
+ # If we can't parse temps, give a neutral score
319
+ score += 0
320
+
321
+ # Score by date/time proximity (secondary importance)
322
+ if ref_date is not None:
323
+ candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
324
+ if candidate_date_str:
325
+ try:
326
+ candidate_date = datetime.fromisoformat(candidate_date_str)
327
+ time_delta = abs(
328
+ (ref_date - candidate_date).total_seconds()
329
+ )
330
+ # Closer in time = better score
331
+ # Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
332
+ # Using 7-day half-life
333
+ score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
334
+ except (ValueError, TypeError):
335
+ logging.warning(
336
+ f"Could not parse candidate date: {candidate_date_str}"
337
+ )
338
+
339
+ scored_candidates.append((score, candidate))
340
+
341
+ except (AssertionError, KeyError) as e:
342
+ # If we can't get the session image, log and skip this candidate
343
+ logging.warning(
344
+ f"Could not score candidate session {candidate.get('id')}: {e}"
345
+ )
346
+ continue
347
+
348
+ # Sort by score (highest first) and return just the sessions
349
+ scored_candidates.sort(key=lambda x: x[0], reverse=True)
350
+
351
+ return [candidate for score, candidate in scored_candidates]
352
+
353
+ def search_session(self) -> list[SessionRow]:
209
354
  """Search for sessions, optionally filtered by the current selection."""
210
355
  # Get query conditions from selection
211
356
  conditions = self.selection.get_query_conditions()
212
357
  return self.db.search_session(conditions)
213
358
 
214
- def get_session_images(self, session_id: int) -> list[dict[str, Any]]:
359
+ def _reconstruct_image_path(self, image: ImageRow) -> ImageRow:
360
+ """Reconstruct absolute path from image row containing repo_url and relative path.
361
+
362
+ Args:
363
+ image: Image record with 'repo_url' and 'path' (relative) fields
364
+
365
+ Returns:
366
+ Modified image record with 'path' as absolute path
367
+ """
368
+ repo_url = image.get("repo_url")
369
+ relative_path = image.get("path")
370
+
371
+ if repo_url and relative_path:
372
+ repo = self.repo_manager.get_repo_by_url(repo_url)
373
+ if repo:
374
+ absolute_path = repo.resolve_path(relative_path)
375
+ image["path"] = str(absolute_path)
376
+
377
+ return image
378
+
379
+ def get_session_image(self, session: SessionRow) -> ImageRow:
380
+ """
381
+ Get the reference ImageRow for a session with absolute path.
382
+ """
383
+ images = self.db.search_image(
384
+ {Database.ID_KEY: session[get_column_name(Database.IMAGE_DOC_KEY)]}
385
+ )
386
+ assert (
387
+ len(images) == 1
388
+ ), f"Expected exactly one reference for session, found {len(images)}"
389
+ return self._reconstruct_image_path(images[0])
390
+
391
+ def get_session_images(self, session: SessionRow) -> list[ImageRow]:
215
392
  """
216
393
  Get all images belonging to a specific session.
217
394
 
@@ -229,24 +406,20 @@ class Starbash:
229
406
  Raises:
230
407
  ValueError: If session_id is not found in the database
231
408
  """
232
- # First get the session details
233
- session = self.db.get_session_by_id(session_id)
234
- if session is None:
235
- raise ValueError(f"Session with id {session_id} not found")
236
-
237
409
  # Query images that match ALL session criteria including date range
238
410
  conditions = {
239
- Database.FILTER_KEY: session[Database.FILTER_KEY],
240
- Database.IMAGETYP_KEY: session[Database.IMAGETYP_KEY],
241
- Database.OBJECT_KEY: session[Database.OBJECT_KEY],
242
- Database.TELESCOP_KEY: session[Database.TELESCOP_KEY],
243
- "date_start": session[Database.START_KEY],
244
- "date_end": session[Database.END_KEY],
411
+ Database.FILTER_KEY: session[get_column_name(Database.FILTER_KEY)],
412
+ Database.IMAGETYP_KEY: session[get_column_name(Database.IMAGETYP_KEY)],
413
+ Database.OBJECT_KEY: session[get_column_name(Database.OBJECT_KEY)],
414
+ Database.TELESCOP_KEY: session[get_column_name(Database.TELESCOP_KEY)],
415
+ "date_start": session[get_column_name(Database.START_KEY)],
416
+ "date_end": session[get_column_name(Database.END_KEY)],
245
417
  }
246
418
 
247
419
  # Single query with all conditions
248
420
  images = self.db.search_image(conditions)
249
- return images if images else []
421
+ # Reconstruct absolute paths for all images
422
+ return [self._reconstruct_image_path(img) for img in images] if images else []
250
423
 
251
424
  def remove_repo_ref(self, url: str) -> None:
252
425
  """
@@ -258,6 +431,8 @@ class Starbash:
258
431
  Raises:
259
432
  ValueError: If the repository URL is not found in user configuration
260
433
  """
434
+ self.db.remove_repo(url)
435
+
261
436
  # Get the repo-ref list from user config
262
437
  repo_refs = self.user_repo.config.get("repo-ref")
263
438
 
@@ -283,6 +458,10 @@ class Starbash:
283
458
 
284
459
  def reindex_repo(self, repo: Repo, force: bool = False):
285
460
  """Reindex all repositories managed by the RepoManager."""
461
+
462
+ # make sure this new repo is listed in the repos table
463
+ self.repo_db_update() # not really ideal, a more optimal version would just add the new repo
464
+
286
465
  # FIXME, add a method to get just the repos that contain images
287
466
  if repo.is_scheme("file") and repo.kind != "recipe":
288
467
  logging.debug("Reindexing %s...", repo.url)
@@ -303,7 +482,10 @@ class Starbash:
303
482
  ):
304
483
  # progress.console.print(f"Indexing {f}...")
305
484
  try:
306
- found = self.db.get_image(str(f))
485
+ # Convert absolute path to relative path within repo
486
+ relative_path = f.relative_to(path)
487
+
488
+ found = self.db.get_image(repo.url, str(relative_path))
307
489
  if not found or force:
308
490
  # Read and log the primary header (HDU 0)
309
491
  with fits.open(str(f), memmap=False) as hdul:
@@ -319,8 +501,9 @@ class Starbash:
319
501
  if (not whitelist) or (key in whitelist):
320
502
  headers[key] = value
321
503
  logging.debug("Headers for %s: %s", f, headers)
322
- headers["path"] = str(f)
323
- image_doc_id = self.db.upsert_image(headers)
504
+ # Store relative path in database
505
+ headers["path"] = str(relative_path)
506
+ image_doc_id = self.db.upsert_image(headers, repo.url)
324
507
 
325
508
  if not found:
326
509
  # Update the session infos, but ONLY on first file scan
@@ -337,10 +520,6 @@ class Starbash:
337
520
  for repo in track(self.repo_manager.repos, description="Reindexing repos..."):
338
521
  self.reindex_repo(repo, force=force)
339
522
 
340
- def test_processing(self):
341
- """A crude test of image processing pipeline - FIXME move into testing"""
342
- self.run_all_stages()
343
-
344
523
  def run_all_stages(self):
345
524
  """On the currently active session, run all processing stages"""
346
525
  logging.info("--- Running all stages ---")
@@ -358,30 +537,77 @@ class Starbash:
358
537
  f"invalid stage definition: a stage is missing the required 'priority' key"
359
538
  ) from e
360
539
 
361
- # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
362
- task_definitions = self.repo_manager.merged.getall("stage")
363
- all_tasks = list(itertools.chain.from_iterable(task_definitions))
364
-
365
540
  logging.info(
366
541
  f"Found {len(sorted_pipeline)} pipeline steps to run in order of priority."
367
542
  )
368
543
 
369
- self.start_session()
544
+ self.init_context()
370
545
  # 4. Iterate through the sorted pipeline and execute the associated tasks.
371
546
  for step in sorted_pipeline:
372
547
  step_name = step.get("name")
373
548
  if not step_name:
374
549
  raise ValueError("Invalid pipeline step found: missing 'name' key.")
550
+ self.run_pipeline_step(step_name)
551
+
552
+ def run_pipeline_step(self, step_name: str):
553
+ logging.info(f"--- Running pipeline step: '{step_name}' ---")
375
554
 
376
- logging.info(
377
- f"--- Running pipeline step: '{step_name}' (Priority: {step['priority']}) ---"
555
+ # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
556
+ task_definitions = self.repo_manager.merged.getall("stage")
557
+ all_tasks = list(itertools.chain.from_iterable(task_definitions))
558
+
559
+ # Find all tasks that should run during this pipeline step.
560
+ tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
561
+ for task in tasks_to_run:
562
+ self.run_stage(task)
563
+
564
+ def run_master_stages(self):
565
+ """Generate any missing master frames
566
+
567
+ Steps:
568
+ * set all_tasks to be all tasks for when == "setup.masters"
569
+ * loop over all currently unfiltered sessions
570
+ * for each session loop across all_tasks
571
+ * if task input.type == the imagetyp for this current session
572
+ * add_input_to_context() add the input files to the context (from the session)
573
+ * run_stage(task) to generate the new master frame
574
+ """
575
+ sessions = self.search_session()
576
+ for session in sessions:
577
+ imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
578
+ logging.debug(
579
+ f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
378
580
  )
379
- # Find all tasks that should run during this pipeline step.
380
- tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
381
- for task in tasks_to_run:
382
- self.run_stage(task)
383
581
 
384
- def start_session(self) -> None:
582
+ # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
583
+ task_definitions = self.repo_manager.merged.getall("stage")
584
+ all_tasks = list(itertools.chain.from_iterable(task_definitions))
585
+
586
+ # Find all tasks that should run during the "setup.masters" step.
587
+ tasks_to_run = [
588
+ task for task in all_tasks if task.get("when") == "setup.masters"
589
+ ]
590
+
591
+ for task in tasks_to_run:
592
+ input_config = task.get("input", {})
593
+ input_type = input_config.get("type")
594
+ if imagetyp_equals(input_type, imagetyp):
595
+ logging.info(
596
+ f" Running master stage task for imagetyp '{imagetyp}'"
597
+ )
598
+
599
+ # Create a default process dir in /tmp, though more advanced 'session' based workflows will
600
+ # probably override this and place it somewhere persistent.
601
+ with tempfile.TemporaryDirectory(prefix="session_tmp_") as temp_dir:
602
+ logging.debug(
603
+ f"Created temporary session directory: {temp_dir}"
604
+ )
605
+ self.init_context()
606
+ self.context["process_dir"] = temp_dir
607
+ self.add_session_to_context(session)
608
+ self.run_stage(task)
609
+
610
+ def init_context(self) -> None:
385
611
  """Do common session init"""
386
612
 
387
613
  # Context is preserved through all stages, so each stage can add new symbols to it for use by later stages
@@ -389,11 +615,152 @@ class Starbash:
389
615
 
390
616
  # Update the context with runtime values.
391
617
  runtime_context = {
392
- "process_dir": "/workspaces/starbash/images/process", # FIXME - create/find this more correctly per session
393
618
  "masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
394
619
  }
395
620
  self.context.update(runtime_context)
396
621
 
622
+ def add_session_to_context(self, session: SessionRow) -> None:
623
+ """adds to context from the indicated session:
624
+ * input_files - all of the files mentioned in the session
625
+ * instrument - for the session
626
+ * date - the localtimezone date of the session
627
+ * imagetyp - the imagetyp of the session
628
+ * session - the current session row (joined with a typical image) (can be used to
629
+ find things like telescope, temperature ...)
630
+ """
631
+ # Get images for this session
632
+ images = self.get_session_images(session)
633
+ logging.debug(f"Adding {len(images)} files as context.input_files")
634
+ self.context["input_files"] = [
635
+ img["path"] for img in images
636
+ ] # Pass in the file list via the context dict
637
+
638
+ # it is okay to give them the actual session row, because we're never using it again
639
+ self.context["session"] = session
640
+
641
+ instrument = session.get(get_column_name(Database.TELESCOP_KEY))
642
+ if instrument:
643
+ self.context["instrument"] = instrument
644
+
645
+ imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
646
+ if imagetyp:
647
+ self.context["imagetyp"] = imagetyp
648
+
649
+ date = session.get(get_column_name(Database.START_KEY))
650
+ if date:
651
+ self.context["date"] = to_shortdate(date)
652
+
653
+ def add_input_files(self, stage: dict) -> None:
654
+ """adds to context.input_files based on the stage input config"""
655
+ input_config = stage.get("input")
656
+ input_required = False
657
+ if input_config:
658
+ # if there is an "input" dict, we assume input.required is true if unset
659
+ input_required = input_config.get("required", True)
660
+ source = input_config.get("source")
661
+ if source is None:
662
+ raise ValueError(
663
+ f"Stage '{stage.get('name')}' has invalid 'input' configuration: missing 'source'"
664
+ )
665
+ if source == "path":
666
+ # The path might contain context variables that need to be expanded.
667
+ # path_pattern = expand_context(input_config["path"], context)
668
+ path_pattern = input_config["path"]
669
+ input_files = glob.glob(path_pattern, recursive=True)
670
+
671
+ self.context["input_files"] = (
672
+ input_files # Pass in the file list via the context dict
673
+ )
674
+ elif source == "repo":
675
+ # We expect that higher level code has already added the correct input files
676
+ # to the context
677
+ if not "input_files" in self.context:
678
+ raise RuntimeError(
679
+ "Input config specifies 'repo' but no 'input_files' found in context"
680
+ )
681
+ else:
682
+ raise ValueError(
683
+ f"Stage '{stage.get('name')}' has invalid 'input' source: {source}"
684
+ )
685
+
686
+ # FIXME compare context.output to see if it already exists and is newer than the input files, if so skip processing
687
+ else:
688
+ # The script doesn't mention input, therefore assume it doesn't want input_files
689
+ if "input_files" in self.context:
690
+ del self.context["input_files"]
691
+
692
+ if input_required and not "input_files" in self.context:
693
+ raise RuntimeError("No input files found for stage")
694
+
695
+ def add_output_path(self, stage: dict) -> None:
696
+ """Adds output path information to context based on the stage output config.
697
+
698
+ Sets the following context variables:
699
+ - context.output.root_path - base path of the destination repo
700
+ - context.output.base_path - full path without file extension
701
+ - context.output.suffix - file extension (e.g., .fits or .fit.gz)
702
+ - context.output.full_path - complete output file path
703
+ """
704
+ output_config = stage.get("output")
705
+ if not output_config:
706
+ # No output configuration, remove any existing output from context
707
+ if "output" in self.context:
708
+ del self.context["output"]
709
+ return
710
+
711
+ dest = output_config.get("dest")
712
+ if not dest:
713
+ raise ValueError(
714
+ f"Stage '{stage.get('description', 'unknown')}' has 'output' config but missing 'dest'"
715
+ )
716
+
717
+ if dest == "repo":
718
+ # Find the destination repo by type/kind
719
+ output_type = output_config.get("type")
720
+ if not output_type:
721
+ raise ValueError(
722
+ f"Stage '{stage.get('description', 'unknown')}' has output.dest='repo' but missing 'type'"
723
+ )
724
+
725
+ # Find the repo with matching kind
726
+ dest_repo = self.repo_manager.get_repo_by_kind(output_type)
727
+ if not dest_repo:
728
+ raise ValueError(
729
+ f"No repository found with kind '{output_type}' for output destination"
730
+ )
731
+
732
+ repo_base = dest_repo.get_path()
733
+ if not repo_base:
734
+ raise ValueError(f"Repository '{dest_repo.url}' has no filesystem path")
735
+
736
+ repo_relative: str | None = dest_repo.get("repo.relative")
737
+ if not repo_relative:
738
+ raise ValueError(
739
+ f"Repository '{dest_repo.url}' is missing 'repo.relative' configuration"
740
+ )
741
+
742
+ # we support context variables in the relative path
743
+ repo_relative = expand_context_unsafe(repo_relative, self.context)
744
+ full_path = repo_base / repo_relative
745
+
746
+ # base_path but without spaces - because Siril doesn't like that
747
+ full_path = Path(str(full_path).replace(" ", r"_"))
748
+
749
+ base_path = full_path.parent / full_path.stem
750
+
751
+ # Set context variables as documented in the TOML
752
+ self.context["output"] = {
753
+ # "root_path": repo_relative, not needed I think
754
+ "base_path": base_path,
755
+ # "suffix": full_path.suffix, not needed I think
756
+ "full_path": full_path,
757
+ }
758
+
759
+ else:
760
+ raise ValueError(
761
+ f"Unsupported output destination type: {dest}. Only 'repo' is currently supported."
762
+ )
763
+
397
764
  def run_stage(self, stage: dict) -> None:
398
765
  """
399
766
  Executes a single processing stage.
@@ -435,30 +802,29 @@ class Starbash:
435
802
  )
436
803
 
437
804
  # This allows recipe TOML to define their own default variables.
805
+ # (apply all of the changes to context that the task demands)
438
806
  stage_context = stage.get("context", {})
439
807
  self.context.update(stage_context)
808
+ self.add_input_files(stage)
809
+ self.add_output_path(stage)
440
810
 
441
- # Assume no files for this stage
442
- if "input_files" in self.context:
443
- del self.context["input_files"]
811
+ # if the output path already exists and is newer than all input files, skip processing
812
+ output_info: dict | None = self.context.get("output")
813
+ if output_info:
814
+ output_path = output_info.get("full_path")
444
815
 
445
- input_files = []
446
- input_config = stage.get("input")
447
- input_required = False
448
- if input_config:
449
- # if there is an "input" dict, we assume input.required is true if unset
450
- input_required = input_config.get("required", True)
451
- if "path" in input_config:
452
- # The path might contain context variables that need to be expanded.
453
- # path_pattern = expand_context(input_config["path"], context)
454
- path_pattern = input_config["path"]
455
- input_files = glob.glob(path_pattern, recursive=True)
816
+ if output_path and os.path.exists(output_path):
817
+ logging.info(
818
+ f"Output file already exists, skipping processing: {output_path}"
819
+ )
820
+ return
456
821
 
457
- self.context["input_files"] = (
458
- input_files # Pass in the file list via the context dict
459
- )
822
+ tool.run_in_temp_dir(script, context=self.context)
460
823
 
461
- if input_required and not input_files:
462
- raise RuntimeError("No input files found for stage")
824
+ # verify context.output was created if it was specified
825
+ output_info: dict | None = self.context.get("output")
826
+ if output_info:
827
+ output_path = output_info.get("full_path")
463
828
 
464
- tool.run_in_temp_dir(script, context=self.context)
829
+ if not output_path or not os.path.exists(output_path):
830
+ raise RuntimeError(f"Expected output file not found: {output_path}")