starbash 0.1.9__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of starbash might be problematic. Click here for more details.

starbash/app.py CHANGED
@@ -1,3 +1,4 @@
1
+ import cmd
1
2
  import logging
2
3
  from importlib import resources
3
4
  import os
@@ -19,6 +20,7 @@ import copy
19
20
 
20
21
  import starbash
21
22
  from starbash import console, _is_test_env, to_shortdate
23
+ from starbash.aliases import Aliases
22
24
  from starbash.database import Database, SessionRow, ImageRow, get_column_name
23
25
  from repo import Repo, repo_suffix
24
26
  from starbash.toml import toml_from_template
@@ -70,7 +72,11 @@ def create_user() -> Path:
70
72
 
71
73
 
72
74
  def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
73
- """Copy images to the specified output directory (using symbolic links if possible)."""
75
+ """Copy images to the specified output directory (using symbolic links if possible).
76
+
77
+ This function requires that "abspath" already be populated in each ImageRow. Normally
78
+ the caller does this by calling Starbash._add_image_abspath() on the image.
79
+ """
74
80
 
75
81
  # Export images
76
82
  console.print(f"[cyan]Exporting {len(images)} images to {output_dir}...[/cyan]")
@@ -81,7 +87,7 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
81
87
 
82
88
  for image in images:
83
89
  # Get the source path from the image metadata
84
- source_path = Path(image.get("path", ""))
90
+ source_path = Path(image.get("abspath", ""))
85
91
 
86
92
  if not source_path.exists():
87
93
  console.print(f"[red]Warning: Source file not found: {source_path}[/red]")
@@ -118,14 +124,6 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
118
124
  console.print(f" [red]Errors: {error_count} files[/red]")
119
125
 
120
126
 
121
- def imagetyp_equals(imagetyp1: str, imagetyp2: str) -> bool:
122
- """Imagetyps (BIAS, Dark, FLAT, flats) have a number of slightly different convetions.
123
- Do a sloppy equality check.
124
-
125
- Eventually handle non english variants by using the repos aliases table."""
126
- return imagetyp1.strip().lower() == imagetyp2.strip().lower()
127
-
128
-
129
127
  class Starbash:
130
128
  """The main Starbash application class."""
131
129
 
@@ -138,12 +136,29 @@ class Starbash:
138
136
  logging.info("Starbash starting...")
139
137
 
140
138
  # Load app defaults and initialize the repository manager
139
+ self._init_repos()
140
+ self._init_analytics(cmd)
141
+ self._init_aliases()
142
+
143
+ logging.info(
144
+ f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
145
+ )
146
+ # self.repo_manager.dump()
147
+
148
+ self._db = None # Lazy initialization - only create when accessed
149
+
150
+ # Initialize selection state (stored in user config repo)
151
+ self.selection = Selection(self.user_repo)
152
+
153
+ def _init_repos(self) -> None:
154
+ """Initialize all repositories managed by the RepoManager."""
141
155
  self.repo_manager = RepoManager()
142
156
  self.repo_manager.add_repo("pkg://defaults")
143
157
 
144
158
  # Add user prefs as a repo
145
159
  self.user_repo = self.repo_manager.add_repo("file://" + str(create_user()))
146
160
 
161
+ def _init_analytics(self, cmd: str) -> None:
147
162
  self.analytics = NopAnalytics()
148
163
  if self.user_repo.get("analytics.enabled", True):
149
164
  include_user = self.user_repo.get("analytics.include_user", False)
@@ -157,19 +172,10 @@ class Starbash:
157
172
  self.analytics = analytics_start_transaction(name="App session", op=cmd)
158
173
  self.analytics.__enter__()
159
174
 
160
- logging.info(
161
- f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
162
- )
163
- # self.repo_manager.dump()
164
-
165
- self._db = None # Lazy initialization - only create when accessed
166
- self.session_query = None # None means search all sessions
167
-
168
- # Initialize selection state (stored in user config repo)
169
- self.selection = Selection(self.user_repo)
170
-
171
- # FIXME, call reindex somewhere and also index whenever new repos are added
172
- # self.reindex_repos()
175
+ def _init_aliases(self) -> None:
176
+ alias_dict = self.repo_manager.get("aliases", {})
177
+ assert isinstance(alias_dict, dict), "Aliases config must be a dictionary"
178
+ self.aliases = Aliases(alias_dict)
173
179
 
174
180
  @property
175
181
  def db(self) -> Database:
@@ -214,30 +220,42 @@ class Starbash:
214
220
  self.close()
215
221
  return handled
216
222
 
217
- def _add_session(self, f: str, image_doc_id: int, header: dict) -> None:
223
+ def _add_session(self, image_doc_id: int, header: dict) -> None:
218
224
  """We just added a new image, create or update its session entry as needed."""
219
- filter = header.get(Database.FILTER_KEY, "unspecified")
220
225
  image_type = header.get(Database.IMAGETYP_KEY)
221
226
  date = header.get(Database.DATE_OBS_KEY)
222
227
  if not date or not image_type:
223
228
  logging.warning(
224
- "Image %s missing either DATE-OBS or IMAGETYP FITS header, skipping...",
225
- f,
229
+ "Image '%s' missing either DATE-OBS or IMAGETYP FITS header, skipping...",
230
+ header.get("path", "unspecified"),
226
231
  )
227
232
  else:
228
233
  exptime = header.get(Database.EXPTIME_KEY, 0)
229
- telescop = header.get(Database.TELESCOP_KEY, "unspecified")
234
+
230
235
  new = {
231
- Database.FILTER_KEY: filter,
232
- Database.START_KEY: date,
233
- Database.END_KEY: date, # FIXME not quite correct, should be longer by exptime
234
- Database.IMAGE_DOC_KEY: image_doc_id,
235
- Database.IMAGETYP_KEY: image_type,
236
- Database.NUM_IMAGES_KEY: 1,
237
- Database.EXPTIME_TOTAL_KEY: exptime,
238
- Database.OBJECT_KEY: header.get(Database.OBJECT_KEY, "unspecified"),
239
- Database.TELESCOP_KEY: telescop,
236
+ get_column_name(Database.START_KEY): date,
237
+ get_column_name(
238
+ Database.END_KEY
239
+ ): date, # FIXME not quite correct, should be longer by exptime
240
+ get_column_name(Database.IMAGE_DOC_KEY): image_doc_id,
241
+ get_column_name(Database.IMAGETYP_KEY): image_type,
242
+ get_column_name(Database.NUM_IMAGES_KEY): 1,
243
+ get_column_name(Database.EXPTIME_TOTAL_KEY): exptime,
244
+ get_column_name(Database.EXPTIME_KEY): exptime,
240
245
  }
246
+
247
+ filter = header.get(Database.FILTER_KEY)
248
+ if filter:
249
+ new[get_column_name(Database.FILTER_KEY)] = filter
250
+
251
+ telescop = header.get(Database.TELESCOP_KEY)
252
+ if telescop:
253
+ new[get_column_name(Database.TELESCOP_KEY)] = telescop
254
+
255
+ obj = header.get(Database.OBJECT_KEY)
256
+ if obj:
257
+ new[get_column_name(Database.OBJECT_KEY)] = obj
258
+
241
259
  session = self.db.get_session(new)
242
260
  self.db.upsert_session(new, existing=session)
243
261
 
@@ -267,17 +285,6 @@ class Starbash:
267
285
 
268
286
  """
269
287
  # Get reference image to access CCD-TEMP and DATE-OBS
270
- metadata: dict = ref_session.get("metadata", {})
271
- ref_temp = metadata.get("CCD-TEMP", None)
272
- ref_date_str = metadata.get(Database.DATE_OBS_KEY)
273
-
274
- # Parse reference date for time delta calculations
275
- ref_date = None
276
- if ref_date_str:
277
- try:
278
- ref_date = datetime.fromisoformat(ref_date_str)
279
- except (ValueError, TypeError):
280
- logging.warning(f"Malformed session ref date: {ref_date_str}")
281
288
 
282
289
  # Build search conditions - MUST match criteria
283
290
  conditions = {
@@ -294,6 +301,37 @@ class Starbash:
294
301
  # Search for candidate sessions
295
302
  candidates = self.db.search_session(where_tuple(conditions))
296
303
 
304
+ return self.score_candidates(candidates, ref_session)
305
+
306
+ def score_candidates(
307
+ self, candidates: list[dict[str, Any]], ref_session: SessionRow
308
+ ) -> list[SessionRow]:
309
+ """Given a list of images or sessions, try to rank that list by desirability.
310
+
311
+ Return a list of possible images/sessions which would be acceptable. The more desirable
312
+ matches are first in the list. Possibly in the future I might have a 'score' and reason
313
+ given for each ranking.
314
+
315
+ The following critera MUST match to be acceptable:
316
+ * matches requested imagetyp.
317
+ * same filter as reference session (in the case want_type==FLAT only)
318
+ * same telescope as reference session
319
+
320
+ Quality is determined by (most important first):
321
+ * temperature of CCD-TEMP is closer to the reference session
322
+ * smaller DATE-OBS delta to the reference session
323
+
324
+ Eventually the code will check the following for 'nice to have' (but not now):
325
+ * TBD
326
+
327
+ Possibly eventually this code could be moved into recipes.
328
+
329
+ """
330
+
331
+ metadata: dict = ref_session.get("metadata", {})
332
+ ref_temp = metadata.get("CCD-TEMP", None)
333
+ ref_date_str = metadata.get(Database.DATE_OBS_KEY)
334
+
297
335
  # Now score and sort the candidates
298
336
  scored_candidates = []
299
337
 
@@ -318,23 +356,19 @@ class Starbash:
318
356
  # If we can't parse temps, give a neutral score
319
357
  score += 0
320
358
 
321
- # Score by date/time proximity (secondary importance)
322
- if ref_date is not None:
323
- candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
324
- if candidate_date_str:
325
- try:
326
- candidate_date = datetime.fromisoformat(candidate_date_str)
327
- time_delta = abs(
328
- (ref_date - candidate_date).total_seconds()
329
- )
330
- # Closer in time = better score
331
- # Same day ≈ 100, 7 days 37, 30 days 9
332
- # Using 7-day half-life
333
- score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
334
- except (ValueError, TypeError):
335
- logging.warning(
336
- f"Could not parse candidate date: {candidate_date_str}"
337
- )
359
+ # Parse reference date for time delta calculations
360
+ candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
361
+ if ref_date_str and candidate_date_str:
362
+ try:
363
+ ref_date = datetime.fromisoformat(ref_date_str)
364
+ candidate_date = datetime.fromisoformat(candidate_date_str)
365
+ time_delta = abs((ref_date - candidate_date).total_seconds())
366
+ # Closer in time = better score
367
+ # Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
368
+ # Using 7-day half-life
369
+ score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
370
+ except (ValueError, TypeError):
371
+ logging.warning(f"Malformed date - ignoring entry")
338
372
 
339
373
  scored_candidates.append((score, candidate))
340
374
 
@@ -345,10 +379,10 @@ class Starbash:
345
379
  )
346
380
  continue
347
381
 
348
- # Sort by score (highest first) and return just the sessions
382
+ # Sort by score (highest first)
349
383
  scored_candidates.sort(key=lambda x: x[0], reverse=True)
350
384
 
351
- return [candidate for score, candidate in scored_candidates]
385
+ return [candidate for _, candidate in scored_candidates]
352
386
 
353
387
  def search_session(self) -> list[SessionRow]:
354
388
  """Search for sessions, optionally filtered by the current selection."""
@@ -356,23 +390,24 @@ class Starbash:
356
390
  conditions = self.selection.get_query_conditions()
357
391
  return self.db.search_session(conditions)
358
392
 
359
- def _reconstruct_image_path(self, image: ImageRow) -> ImageRow:
393
+ def _add_image_abspath(self, image: ImageRow) -> ImageRow:
360
394
  """Reconstruct absolute path from image row containing repo_url and relative path.
361
395
 
362
396
  Args:
363
397
  image: Image record with 'repo_url' and 'path' (relative) fields
364
398
 
365
399
  Returns:
366
- Modified image record with 'path' as absolute path
400
+ Modified image record with 'abspath' as absolute path
367
401
  """
368
- repo_url = image.get("repo_url")
369
- relative_path = image.get("path")
402
+ if not image.get("abspath"):
403
+ repo_url = image.get(Database.REPO_URL_KEY)
404
+ relative_path = image.get("path")
370
405
 
371
- if repo_url and relative_path:
372
- repo = self.repo_manager.get_repo_by_url(repo_url)
373
- if repo:
374
- absolute_path = repo.resolve_path(relative_path)
375
- image["path"] = str(absolute_path)
406
+ if repo_url and relative_path:
407
+ repo = self.repo_manager.get_repo_by_url(repo_url)
408
+ if repo:
409
+ absolute_path = repo.resolve_path(relative_path)
410
+ image["abspath"] = str(absolute_path)
376
411
 
377
412
  return image
378
413
 
@@ -380,13 +415,52 @@ class Starbash:
380
415
  """
381
416
  Get the reference ImageRow for a session with absolute path.
382
417
  """
418
+ from starbash.database import SearchCondition
419
+
383
420
  images = self.db.search_image(
384
- {Database.ID_KEY: session[get_column_name(Database.IMAGE_DOC_KEY)]}
421
+ [
422
+ SearchCondition(
423
+ "i.id", "=", session[get_column_name(Database.IMAGE_DOC_KEY)]
424
+ )
425
+ ]
385
426
  )
386
427
  assert (
387
428
  len(images) == 1
388
429
  ), f"Expected exactly one reference for session, found {len(images)}"
389
- return self._reconstruct_image_path(images[0])
430
+ return self._add_image_abspath(images[0])
431
+
432
+ def get_master_images(
433
+ self, imagetyp: str | None = None, reference_session: SessionRow | None = None
434
+ ) -> list[ImageRow]:
435
+ """Return a list of the specified master imagetyp (bias, flat etc...)
436
+ (or any type if not specified).
437
+
438
+ The first image will be the 'best' remaining entries progressively worse matches.
439
+
440
+ (the following is not yet implemented)
441
+ If reference_session is provided it will be used to refine the search as follows:
442
+ * The telescope must match
443
+ * The image resolutions and binnings must match
444
+ * The filter must match (for FLAT frames only)
445
+ * Preferably the master date_obs would be either before or slightly after (<24 hrs) the reference session start time
446
+ * Preferably the master date_obs should be the closest in date to the reference session start time
447
+ * The camera temperature should be as close as possible to the reference session camera temperature
448
+ """
449
+ master_repo = self.repo_manager.get_repo_by_kind("master")
450
+
451
+ if master_repo is None:
452
+ logging.warning("No master repo configured - skipping master frame load.")
453
+ return []
454
+
455
+ # Search for images in the master repo only
456
+ from starbash.database import SearchCondition
457
+
458
+ search_conditions = [SearchCondition("r.url", "=", master_repo.url)]
459
+ if imagetyp:
460
+ search_conditions.append(SearchCondition("i.imagetyp", "=", imagetyp))
461
+
462
+ images = self.db.search_image(search_conditions)
463
+ return images
390
464
 
391
465
  def get_session_images(self, session: SessionRow) -> list[ImageRow]:
392
466
  """
@@ -406,20 +480,52 @@ class Starbash:
406
480
  Raises:
407
481
  ValueError: If session_id is not found in the database
408
482
  """
409
- # Query images that match ALL session criteria including date range
410
- conditions = {
411
- Database.FILTER_KEY: session[get_column_name(Database.FILTER_KEY)],
412
- Database.IMAGETYP_KEY: session[get_column_name(Database.IMAGETYP_KEY)],
413
- Database.OBJECT_KEY: session[get_column_name(Database.OBJECT_KEY)],
414
- Database.TELESCOP_KEY: session[get_column_name(Database.TELESCOP_KEY)],
415
- "date_start": session[get_column_name(Database.START_KEY)],
416
- "date_end": session[get_column_name(Database.END_KEY)],
417
- }
483
+ from starbash.database import SearchCondition
418
484
 
419
- # Single query with all conditions
485
+ # Query images that match ALL session criteria including date range
486
+ # Note: We need to search JSON metadata for FILTER, IMAGETYP, OBJECT, TELESCOP
487
+ # since they're not indexed columns in the images table
488
+ conditions = [
489
+ SearchCondition(
490
+ "i.date_obs", ">=", session[get_column_name(Database.START_KEY)]
491
+ ),
492
+ SearchCondition(
493
+ "i.date_obs", "<=", session[get_column_name(Database.END_KEY)]
494
+ ),
495
+ SearchCondition(
496
+ "i.imagetyp", "=", session[get_column_name(Database.IMAGETYP_KEY)]
497
+ ),
498
+ ]
499
+
500
+ # we never want to return 'master' images as part of the session image paths
501
+ # (because we will be passing these tool siril or whatever to generate masters or
502
+ # some other downstream image)
503
+ master_repo = self.repo_manager.get_repo_by_kind("master")
504
+ if master_repo is not None:
505
+ conditions.append(SearchCondition("r.url", "<>", master_repo.url))
506
+
507
+ # Single query with indexed date conditions
420
508
  images = self.db.search_image(conditions)
509
+
510
+ # We no lognger filter by target(object) because it might not be set anyways
511
+ filtered_images = []
512
+ for img in images:
513
+ if (
514
+ img.get(Database.FILTER_KEY)
515
+ == session[get_column_name(Database.FILTER_KEY)]
516
+ # and img.get(Database.OBJECT_KEY)
517
+ # == session[get_column_name(Database.OBJECT_KEY)]
518
+ and img.get(Database.TELESCOP_KEY)
519
+ == session[get_column_name(Database.TELESCOP_KEY)]
520
+ ):
521
+ filtered_images.append(img)
522
+
421
523
  # Reconstruct absolute paths for all images
422
- return [self._reconstruct_image_path(img) for img in images] if images else []
524
+ return (
525
+ [self._add_image_abspath(img) for img in filtered_images]
526
+ if filtered_images
527
+ else []
528
+ )
423
529
 
424
530
  def remove_repo_ref(self, url: str) -> None:
425
531
  """
@@ -447,6 +553,7 @@ class Starbash:
447
553
  # Match by converting to file:// URL format if needed
448
554
  if ref_dir == url or f"file://{ref_dir}" == url:
449
555
  repo_refs.remove(ref)
556
+
450
557
  found = True
451
558
  break
452
559
 
@@ -456,24 +563,76 @@ class Starbash:
456
563
  # Write the updated config
457
564
  self.user_repo.write_config()
458
565
 
459
- def reindex_repo(self, repo: Repo, force: bool = False):
566
+ def add_image_to_db(self, repo: Repo, f: Path, force: bool = False) -> None:
567
+ """Read FITS header from file and add/update image entry in the database."""
568
+
569
+ path = repo.get_path()
570
+ if not path:
571
+ raise ValueError(f"Repo path not found for {repo}")
572
+
573
+ whitelist = None
574
+ config = self.repo_manager.merged.get("config")
575
+ if config:
576
+ whitelist = config.get("fits-whitelist", None)
577
+
578
+ try:
579
+ # Convert absolute path to relative path within repo
580
+ relative_path = f.relative_to(path)
581
+
582
+ found = self.db.get_image(repo.url, str(relative_path))
583
+
584
+ # for debugging sometimes we want to limit scanning to a single directory or file
585
+ # debug_target = "masters-raw/2025-09-09/DARK"
586
+ debug_target = None
587
+ if debug_target:
588
+ if str(relative_path).startswith(debug_target):
589
+ logging.error("Debugging %s...", f)
590
+ found = False
591
+ else:
592
+ found = True # skip processing
593
+ force = False
594
+
595
+ if not found or force:
596
+ # Read and log the primary header (HDU 0)
597
+ with fits.open(str(f), memmap=False) as hdul:
598
+ # convert headers to dict
599
+ hdu0: Any = hdul[0]
600
+ header = hdu0.header
601
+ if type(header).__name__ == "Unknown":
602
+ raise ValueError("FITS header has Unknown type: %s", f)
603
+
604
+ items = header.items()
605
+ headers = {}
606
+ for key, value in items:
607
+ if (not whitelist) or (key in whitelist):
608
+ headers[key] = value
609
+ logging.debug("Headers for %s: %s", f, headers)
610
+ # Store relative path in database
611
+ headers["path"] = str(relative_path)
612
+ image_doc_id = self.db.upsert_image(headers, repo.url)
613
+
614
+ if not found:
615
+ # Update the session infos, but ONLY on first file scan
616
+ # (otherwise invariants will get messed up)
617
+ self._add_session(image_doc_id, header)
618
+
619
+ except Exception as e:
620
+ logging.warning("Failed to read FITS header for %s: %s", f, e)
621
+
622
+ def reindex_repo(self, repo: Repo, force: bool = False, subdir: str | None = None):
460
623
  """Reindex all repositories managed by the RepoManager."""
461
624
 
462
625
  # make sure this new repo is listed in the repos table
463
626
  self.repo_db_update() # not really ideal, a more optimal version would just add the new repo
464
627
 
465
- # FIXME, add a method to get just the repos that contain images
466
- if repo.is_scheme("file") and repo.kind != "recipe":
467
- logging.debug("Reindexing %s...", repo.url)
628
+ path = repo.get_path()
468
629
 
469
- whitelist = None
470
- config = self.repo_manager.merged.get("config")
471
- if config:
472
- whitelist = config.get("fits-whitelist", None)
630
+ if path and repo.is_scheme("file") and repo.kind != "recipe":
631
+ logging.debug("Reindexing %s...", repo.url)
473
632
 
474
- path = repo.get_path()
475
- if not path:
476
- raise ValueError(f"Repo path not found for {repo}")
633
+ if subdir:
634
+ path = path / subdir
635
+ # used to debug
477
636
 
478
637
  # Find all FITS files under this repo path
479
638
  for f in track(
@@ -481,37 +640,7 @@ class Starbash:
481
640
  description=f"Indexing {repo.url}...",
482
641
  ):
483
642
  # progress.console.print(f"Indexing {f}...")
484
- try:
485
- # Convert absolute path to relative path within repo
486
- relative_path = f.relative_to(path)
487
-
488
- found = self.db.get_image(repo.url, str(relative_path))
489
- if not found or force:
490
- # Read and log the primary header (HDU 0)
491
- with fits.open(str(f), memmap=False) as hdul:
492
- # convert headers to dict
493
- hdu0: Any = hdul[0]
494
- header = hdu0.header
495
- if type(header).__name__ == "Unknown":
496
- raise ValueError("FITS header has Unknown type: %s", f)
497
-
498
- items = header.items()
499
- headers = {}
500
- for key, value in items:
501
- if (not whitelist) or (key in whitelist):
502
- headers[key] = value
503
- logging.debug("Headers for %s: %s", f, headers)
504
- # Store relative path in database
505
- headers["path"] = str(relative_path)
506
- image_doc_id = self.db.upsert_image(headers, repo.url)
507
-
508
- if not found:
509
- # Update the session infos, but ONLY on first file scan
510
- # (otherwise invariants will get messed up)
511
- self._add_session(str(f), image_doc_id, header)
512
-
513
- except Exception as e:
514
- logging.warning("Failed to read FITS header for %s: %s", f, e)
643
+ self.add_image_to_db(repo, f, force=force)
515
644
 
516
645
  def reindex_repos(self, force: bool = False):
517
646
  """Reindex all repositories managed by the RepoManager."""
@@ -520,12 +649,14 @@ class Starbash:
520
649
  for repo in track(self.repo_manager.repos, description="Reindexing repos..."):
521
650
  self.reindex_repo(repo, force=force)
522
651
 
523
- def run_all_stages(self):
524
- """On the currently active session, run all processing stages"""
525
- logging.info("--- Running all stages ---")
652
+ def _get_stages(self, name: str) -> list[dict[str, Any]]:
653
+ """Get all pipeline stages defined in the merged configuration.
526
654
 
655
+ Returns:
656
+ List of stage definitions (dictionaries with 'name' and 'priority')
657
+ """
527
658
  # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
528
- pipeline_definitions = self.repo_manager.merged.getall("stages")
659
+ pipeline_definitions = self.repo_manager.merged.getall(name)
529
660
  flat_pipeline_steps = list(itertools.chain.from_iterable(pipeline_definitions))
530
661
 
531
662
  # 2. Sort the pipeline steps by their 'priority' field.
@@ -537,9 +668,17 @@ class Starbash:
537
668
  f"invalid stage definition: a stage is missing the required 'priority' key"
538
669
  ) from e
539
670
 
540
- logging.info(
671
+ logging.debug(
541
672
  f"Found {len(sorted_pipeline)} pipeline steps to run in order of priority."
542
673
  )
674
+ return sorted_pipeline
675
+
676
+ def run_all_stages(self):
677
+ """On the currently active session, run all processing stages"""
678
+ logging.info("--- Running all stages ---")
679
+
680
+ # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
681
+ sorted_pipeline = self._get_stages("stages")
543
682
 
544
683
  self.init_context()
545
684
  # 4. Iterate through the sorted pipeline and execute the associated tasks.
@@ -565,7 +704,7 @@ class Starbash:
565
704
  """Generate any missing master frames
566
705
 
567
706
  Steps:
568
- * set all_tasks to be all tasks for when == "setup.masters"
707
+ * set all_tasks to be all tasks for when == "setup.master.bias"
569
708
  * loop over all currently unfiltered sessions
570
709
  * for each session loop across all_tasks
571
710
  * if task input.type == the imagetyp for this current session
@@ -574,38 +713,60 @@ class Starbash:
574
713
  """
575
714
  sessions = self.search_session()
576
715
  for session in sessions:
577
- imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
578
- logging.debug(
579
- f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
580
- )
581
-
582
- # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
583
- task_definitions = self.repo_manager.merged.getall("stage")
584
- all_tasks = list(itertools.chain.from_iterable(task_definitions))
716
+ try:
717
+ imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
718
+ logging.debug(
719
+ f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
720
+ )
585
721
 
586
- # Find all tasks that should run during the "setup.masters" step.
587
- tasks_to_run = [
588
- task for task in all_tasks if task.get("when") == "setup.masters"
589
- ]
722
+ sorted_pipeline = self._get_stages("master-stages")
590
723
 
591
- for task in tasks_to_run:
592
- input_config = task.get("input", {})
593
- input_type = input_config.get("type")
594
- if imagetyp_equals(input_type, imagetyp):
595
- logging.info(
596
- f" Running master stage task for imagetyp '{imagetyp}'"
597
- )
598
-
599
- # Create a default process dir in /tmp, though more advanced 'session' based workflows will
600
- # probably override this and place it somewhere persistent.
601
- with tempfile.TemporaryDirectory(prefix="session_tmp_") as temp_dir:
602
- logging.debug(
603
- f"Created temporary session directory: {temp_dir}"
724
+ # 4. Iterate through the sorted pipeline and execute the associated tasks.
725
+ # FIXME unify the master vs normal step running code
726
+ for step in sorted_pipeline:
727
+ step_name = step.get("name")
728
+ if not step_name:
729
+ raise ValueError(
730
+ "Invalid pipeline step found: missing 'name' key."
604
731
  )
605
- self.init_context()
606
- self.context["process_dir"] = temp_dir
607
- self.add_session_to_context(session)
608
- self.run_stage(task)
732
+
733
+ # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
734
+ task_definitions = self.repo_manager.merged.getall("stage")
735
+ all_tasks = list(itertools.chain.from_iterable(task_definitions))
736
+
737
+ # Find all tasks that should run during this step
738
+ tasks_to_run = [
739
+ task for task in all_tasks if task.get("when") == step_name
740
+ ]
741
+
742
+ for task in tasks_to_run:
743
+ input_config = task.get("input", {})
744
+ input_type = input_config.get("type")
745
+ if not input_type:
746
+ raise ValueError(
747
+ f"Task for step '{step_name}' missing required input.type"
748
+ )
749
+ if self.aliases.equals(input_type, imagetyp):
750
+ logging.debug(
751
+ f"Running {step_name} task for imagetyp '{imagetyp}'"
752
+ )
753
+
754
+ # Create a default process dir in /tmp, though more advanced 'session' based workflows will
755
+ # probably override this and place it somewhere persistent.
756
+ with tempfile.TemporaryDirectory(
757
+ prefix="session_tmp_"
758
+ ) as temp_dir:
759
+ logging.debug(
760
+ f"Created temporary session directory: {temp_dir}"
761
+ )
762
+ self.init_context()
763
+ self.context["process_dir"] = temp_dir
764
+ self.add_session_to_context(session)
765
+ self.run_stage(task)
766
+ except RuntimeError as e:
767
+ logging.error(
768
+ f"Skipping session {session[get_column_name(Database.ID_KEY)]}: {e}"
769
+ )
609
770
 
610
771
  def init_context(self) -> None:
611
772
  """Do common session init"""
@@ -615,26 +776,19 @@ class Starbash:
615
776
 
616
777
  # Update the context with runtime values.
617
778
  runtime_context = {
618
- "masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
779
+ # "masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
619
780
  }
620
781
  self.context.update(runtime_context)
621
782
 
622
783
  def add_session_to_context(self, session: SessionRow) -> None:
623
784
  """adds to context from the indicated session:
624
- * input_files - all of the files mentioned in the session
625
785
  * instrument - for the session
626
786
  * date - the localtimezone date of the session
627
787
  * imagetyp - the imagetyp of the session
628
788
  * session - the current session row (joined with a typical image) (can be used to
629
789
  find things like telescope, temperature ...)
790
+ * session_config - a short human readable description of the session - suitable for logs or filenames
630
791
  """
631
- # Get images for this session
632
- images = self.get_session_images(session)
633
- logging.debug(f"Adding {len(images)} files as context.input_files")
634
- self.context["input_files"] = [
635
- img["path"] for img in images
636
- ] # Pass in the file list via the context dict
637
-
638
792
  # it is okay to give them the actual session row, because we're never using it again
639
793
  self.context["session"] = session
640
794
 
@@ -644,19 +798,67 @@ class Starbash:
644
798
 
645
799
  imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
646
800
  if imagetyp:
801
+ imagetyp = self.aliases.normalize(imagetyp)
647
802
  self.context["imagetyp"] = imagetyp
648
803
 
804
+ # add a short human readable description of the session - suitable for logs or in filenames
805
+ session_config = f"{imagetyp}"
806
+
807
+ metadata = session.get("metadata", {})
808
+ filter = metadata.get(Database.FILTER_KEY)
809
+ if (imagetyp == "flat" or imagetyp == "light") and filter:
810
+ # we only care about filters in these cases
811
+ session_config += f"_{filter}"
812
+ if imagetyp == "dark":
813
+ exptime = session.get(get_column_name(Database.EXPTIME_KEY))
814
+ if exptime:
815
+ session_config += f"_{int(float(exptime))}s"
816
+
817
+ self.context["session_config"] = session_config
818
+
649
819
  date = session.get(get_column_name(Database.START_KEY))
650
820
  if date:
651
821
  self.context["date"] = to_shortdate(date)
652
822
 
823
+ def add_input_masters(self, stage: dict) -> None:
824
+ """based on input.masters add the correct master frames as context.master.<type> filepaths"""
825
+ session = self.context.get("session")
826
+ assert session is not None, "context.session should have been already set"
827
+
828
+ input_config = stage.get("input", {})
829
+ master_types: list[str] = input_config.get("masters", [])
830
+ for master_type in master_types:
831
+ masters = self.get_master_images(
832
+ imagetyp=master_type, reference_session=session
833
+ )
834
+ if not masters:
835
+ raise RuntimeError(
836
+ f"No master frames of type '{master_type}' found for stage '{stage.get('name')}'"
837
+ )
838
+
839
+ context_master = self.context.setdefault("master", {})
840
+
841
+ if len(masters) > 1:
842
+ logging.debug(
843
+ f"Multiple ({len(masters)}) master frames of type '{master_type}' found, using first. FIXME."
844
+ )
845
+
846
+ # Try to rank the images by desirability
847
+ masters = self.score_candidates(masters, session)
848
+
849
+ self._add_image_abspath(masters[0]) # make sure abspath is populated
850
+ selected_master = masters[0]["abspath"]
851
+ logging.info(f"For master '{master_type}', using: {selected_master}")
852
+
853
+ context_master[master_type] = selected_master
854
+
653
855
  def add_input_files(self, stage: dict) -> None:
654
856
  """adds to context.input_files based on the stage input config"""
655
857
  input_config = stage.get("input")
656
- input_required = False
858
+ input_required = 0
657
859
  if input_config:
658
860
  # if there is an "input" dict, we assume input.required is true if unset
659
- input_required = input_config.get("required", True)
861
+ input_required = input_config.get("required", 0)
660
862
  source = input_config.get("source")
661
863
  if source is None:
662
864
  raise ValueError(
@@ -672,12 +874,17 @@ class Starbash:
672
874
  input_files # Pass in the file list via the context dict
673
875
  )
674
876
  elif source == "repo":
675
- # We expect that higher level code has already added the correct input files
676
- # to the context
677
- if not "input_files" in self.context:
678
- raise RuntimeError(
679
- "Input config specifies 'repo' but no 'input_files' found in context"
680
- )
877
+ # Get images for this session (by pulling from repo)
878
+ session = self.context.get("session")
879
+ assert (
880
+ session is not None
881
+ ), "context.session should have been already set"
882
+
883
+ images = self.get_session_images(session)
884
+ logging.debug(f"Using {len(images)} files as input_files")
885
+ self.context["input_files"] = [
886
+ img["abspath"] for img in images
887
+ ] # Pass in the file list via the context dict
681
888
  else:
682
889
  raise ValueError(
683
890
  f"Stage '{stage.get('name')}' has invalid 'input' source: {source}"
@@ -689,8 +896,8 @@ class Starbash:
689
896
  if "input_files" in self.context:
690
897
  del self.context["input_files"]
691
898
 
692
- if input_required and not "input_files" in self.context:
693
- raise RuntimeError("No input files found for stage")
899
+ if input_required and len(self.context.get("input_files", [])) < input_required:
900
+ raise RuntimeError(f"Stage requires at least {input_required} input files")
694
901
 
695
902
  def add_output_path(self, stage: dict) -> None:
696
903
  """Adds output path information to context based on the stage output config.
@@ -700,6 +907,7 @@ class Starbash:
700
907
  - context.output.base_path - full path without file extension
701
908
  - context.output.suffix - file extension (e.g., .fits or .fit.gz)
702
909
  - context.output.full_path - complete output file path
910
+ - context.output.repo - the destination Repo (if applicable)
703
911
  """
704
912
  output_config = stage.get("output")
705
913
  if not output_config:
@@ -754,8 +962,8 @@ class Starbash:
754
962
  "base_path": base_path,
755
963
  # "suffix": full_path.suffix, not needed I think
756
964
  "full_path": full_path,
965
+ "repo": dest_repo,
757
966
  }
758
-
759
967
  else:
760
968
  raise ValueError(
761
969
  f"Unsupported output destination type: {dest}. Only 'repo' is currently supported."
@@ -777,17 +985,29 @@ class Starbash:
777
985
 
778
986
  logging.info(f"Running stage: {stage_desc}")
779
987
 
780
- tool_name = stage.get("tool")
781
- if not tool_name:
988
+ tool_dict = stage.get("tool")
989
+ if not tool_dict:
782
990
  raise ValueError(
783
991
  f"Stage '{stage.get('name')}' is missing a 'tool' definition."
784
992
  )
785
- tool: Tool | None = tools.get(tool_name)
993
+ tool_name = tool_dict.get("name")
994
+ if not tool_name:
995
+ raise ValueError(
996
+ f"Stage '{stage.get('name')}' is missing a 'tool.name' definition."
997
+ )
998
+ tool = tools.get(tool_name)
786
999
  if not tool:
787
1000
  raise ValueError(
788
1001
  f"Tool '{tool_name}' for stage '{stage.get('name')}' not found."
789
1002
  )
790
1003
  logging.debug(f" Using tool: {tool_name}")
1004
+ tool.set_defaults()
1005
+
1006
+ # Allow stage to override tool timeout if specified
1007
+ tool_timeout = tool_dict.get("timeout")
1008
+ if tool_timeout is not None:
1009
+ tool.timeout = float(tool_timeout)
1010
+ logging.debug(f"Using tool timeout: {tool.timeout} seconds")
791
1011
 
792
1012
  script_filename = stage.get("script-file", tool.default_script_file)
793
1013
  if script_filename:
@@ -806,6 +1026,7 @@ class Starbash:
806
1026
  stage_context = stage.get("context", {})
807
1027
  self.context.update(stage_context)
808
1028
  self.add_input_files(stage)
1029
+ self.add_input_masters(stage)
809
1030
  self.add_output_path(stage)
810
1031
 
811
1032
  # if the output path already exists and is newer than all input files, skip processing
@@ -828,3 +1049,5 @@ class Starbash:
828
1049
 
829
1050
  if not output_path or not os.path.exists(output_path):
830
1051
  raise RuntimeError(f"Expected output file not found: {output_path}")
1052
+ else:
1053
+ self.add_image_to_db(output_info["repo"], Path(output_path), force=True)