starbash 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starbash/app.py CHANGED
@@ -1,3 +1,4 @@
1
+ import cmd
1
2
  import logging
2
3
  from importlib import resources
3
4
  import os
@@ -19,8 +20,9 @@ import copy
19
20
 
20
21
  import starbash
21
22
  from starbash import console, _is_test_env, to_shortdate
23
+ from starbash.aliases import Aliases
22
24
  from starbash.database import Database, SessionRow, ImageRow, get_column_name
23
- from repo import Repo, repo_suffix
25
+ from repo import Repo, repo, repo_suffix
24
26
  from starbash.toml import toml_from_template
25
27
  from starbash.tool import Tool, expand_context, expand_context_unsafe
26
28
  from repo import RepoManager
@@ -70,7 +72,11 @@ def create_user() -> Path:
70
72
 
71
73
 
72
74
  def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
73
- """Copy images to the specified output directory (using symbolic links if possible)."""
75
+ """Copy images to the specified output directory (using symbolic links if possible).
76
+
77
+ This function requires that "abspath" already be populated in each ImageRow. Normally
78
+ the caller does this by calling Starbash._add_image_abspath() on the image.
79
+ """
74
80
 
75
81
  # Export images
76
82
  console.print(f"[cyan]Exporting {len(images)} images to {output_dir}...[/cyan]")
@@ -81,7 +87,7 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
81
87
 
82
88
  for image in images:
83
89
  # Get the source path from the image metadata
84
- source_path = Path(image.get("path", ""))
90
+ source_path = Path(image.get("abspath", ""))
85
91
 
86
92
  if not source_path.exists():
87
93
  console.print(f"[red]Warning: Source file not found: {source_path}[/red]")
@@ -118,12 +124,31 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
118
124
  console.print(f" [red]Errors: {error_count} files[/red]")
119
125
 
120
126
 
121
- def imagetyp_equals(imagetyp1: str, imagetyp2: str) -> bool:
122
- """Imagetyps (BIAS, Dark, FLAT, flats) have a number of slightly different convetions.
123
- Do a sloppy equality check.
127
+ class ProcessingContext(tempfile.TemporaryDirectory):
128
+ """For processing a set of sessions for a particular target.
129
+
130
+ Keeps a shared temporary directory for intermediate files. We expose the path to that
131
+ directory in context["process_dir"].
132
+ """
133
+
134
+ def __init__(self, starbash: "Starbash"):
135
+ super().__init__(prefix="sbprocessing_")
136
+ self.sb = starbash
137
+ logging.debug(f"Created processing context at {self.name}")
138
+
139
+ self.sb.init_context()
140
+ self.sb.context["process_dir"] = self.name
141
+
142
+ def __enter__(self) -> "ProcessingContext":
143
+ return super().__enter__()
144
+
145
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
146
+ logging.debug(f"Cleaning up processing context at {self.name}")
124
147
 
125
- Eventually handle non english variants by using the repos aliases table."""
126
- return imagetyp1.strip().lower() == imagetyp2.strip().lower()
148
+ # unregister our process dir
149
+ self.sb.context.pop("process_dir", None)
150
+
151
+ super().__exit__(exc_type, exc_value, traceback)
127
152
 
128
153
 
129
154
  class Starbash:
@@ -138,12 +163,29 @@ class Starbash:
138
163
  logging.info("Starbash starting...")
139
164
 
140
165
  # Load app defaults and initialize the repository manager
166
+ self._init_repos()
167
+ self._init_analytics(cmd)
168
+ self._init_aliases()
169
+
170
+ logging.info(
171
+ f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
172
+ )
173
+ # self.repo_manager.dump()
174
+
175
+ self._db = None # Lazy initialization - only create when accessed
176
+
177
+ # Initialize selection state (stored in user config repo)
178
+ self.selection = Selection(self.user_repo)
179
+
180
+ def _init_repos(self) -> None:
181
+ """Initialize all repositories managed by the RepoManager."""
141
182
  self.repo_manager = RepoManager()
142
183
  self.repo_manager.add_repo("pkg://defaults")
143
184
 
144
185
  # Add user prefs as a repo
145
186
  self.user_repo = self.repo_manager.add_repo("file://" + str(create_user()))
146
187
 
188
+ def _init_analytics(self, cmd: str) -> None:
147
189
  self.analytics = NopAnalytics()
148
190
  if self.user_repo.get("analytics.enabled", True):
149
191
  include_user = self.user_repo.get("analytics.include_user", False)
@@ -157,19 +199,10 @@ class Starbash:
157
199
  self.analytics = analytics_start_transaction(name="App session", op=cmd)
158
200
  self.analytics.__enter__()
159
201
 
160
- logging.info(
161
- f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
162
- )
163
- # self.repo_manager.dump()
164
-
165
- self._db = None # Lazy initialization - only create when accessed
166
- self.session_query = None # None means search all sessions
167
-
168
- # Initialize selection state (stored in user config repo)
169
- self.selection = Selection(self.user_repo)
170
-
171
- # FIXME, call reindex somewhere and also index whenever new repos are added
172
- # self.reindex_repos()
202
+ def _init_aliases(self) -> None:
203
+ alias_dict = self.repo_manager.get("aliases", {})
204
+ assert isinstance(alias_dict, dict), "Aliases config must be a dictionary"
205
+ self.aliases = Aliases(alias_dict)
173
206
 
174
207
  @property
175
208
  def db(self) -> Database:
@@ -214,30 +247,42 @@ class Starbash:
214
247
  self.close()
215
248
  return handled
216
249
 
217
- def _add_session(self, f: str, image_doc_id: int, header: dict) -> None:
250
+ def _add_session(self, image_doc_id: int, header: dict) -> None:
218
251
  """We just added a new image, create or update its session entry as needed."""
219
- filter = header.get(Database.FILTER_KEY, "unspecified")
220
252
  image_type = header.get(Database.IMAGETYP_KEY)
221
253
  date = header.get(Database.DATE_OBS_KEY)
222
254
  if not date or not image_type:
223
255
  logging.warning(
224
- "Image %s missing either DATE-OBS or IMAGETYP FITS header, skipping...",
225
- f,
256
+ "Image '%s' missing either DATE-OBS or IMAGETYP FITS header, skipping...",
257
+ header.get("path", "unspecified"),
226
258
  )
227
259
  else:
228
260
  exptime = header.get(Database.EXPTIME_KEY, 0)
229
- telescop = header.get(Database.TELESCOP_KEY, "unspecified")
261
+
230
262
  new = {
231
- Database.FILTER_KEY: filter,
232
- Database.START_KEY: date,
233
- Database.END_KEY: date, # FIXME not quite correct, should be longer by exptime
234
- Database.IMAGE_DOC_KEY: image_doc_id,
235
- Database.IMAGETYP_KEY: image_type,
236
- Database.NUM_IMAGES_KEY: 1,
237
- Database.EXPTIME_TOTAL_KEY: exptime,
238
- Database.OBJECT_KEY: header.get(Database.OBJECT_KEY, "unspecified"),
239
- Database.TELESCOP_KEY: telescop,
263
+ get_column_name(Database.START_KEY): date,
264
+ get_column_name(
265
+ Database.END_KEY
266
+ ): date, # FIXME not quite correct, should be longer by exptime
267
+ get_column_name(Database.IMAGE_DOC_KEY): image_doc_id,
268
+ get_column_name(Database.IMAGETYP_KEY): image_type,
269
+ get_column_name(Database.NUM_IMAGES_KEY): 1,
270
+ get_column_name(Database.EXPTIME_TOTAL_KEY): exptime,
271
+ get_column_name(Database.EXPTIME_KEY): exptime,
240
272
  }
273
+
274
+ filter = header.get(Database.FILTER_KEY)
275
+ if filter:
276
+ new[get_column_name(Database.FILTER_KEY)] = filter
277
+
278
+ telescop = header.get(Database.TELESCOP_KEY)
279
+ if telescop:
280
+ new[get_column_name(Database.TELESCOP_KEY)] = telescop
281
+
282
+ obj = header.get(Database.OBJECT_KEY)
283
+ if obj:
284
+ new[get_column_name(Database.OBJECT_KEY)] = obj
285
+
241
286
  session = self.db.get_session(new)
242
287
  self.db.upsert_session(new, existing=session)
243
288
 
@@ -267,17 +312,6 @@ class Starbash:
267
312
 
268
313
  """
269
314
  # Get reference image to access CCD-TEMP and DATE-OBS
270
- metadata: dict = ref_session.get("metadata", {})
271
- ref_temp = metadata.get("CCD-TEMP", None)
272
- ref_date_str = metadata.get(Database.DATE_OBS_KEY)
273
-
274
- # Parse reference date for time delta calculations
275
- ref_date = None
276
- if ref_date_str:
277
- try:
278
- ref_date = datetime.fromisoformat(ref_date_str)
279
- except (ValueError, TypeError):
280
- logging.warning(f"Malformed session ref date: {ref_date_str}")
281
315
 
282
316
  # Build search conditions - MUST match criteria
283
317
  conditions = {
@@ -294,6 +328,37 @@ class Starbash:
294
328
  # Search for candidate sessions
295
329
  candidates = self.db.search_session(where_tuple(conditions))
296
330
 
331
+ return self.score_candidates(candidates, ref_session)
332
+
333
+ def score_candidates(
334
+ self, candidates: list[dict[str, Any]], ref_session: SessionRow
335
+ ) -> list[SessionRow]:
336
+ """Given a list of images or sessions, try to rank that list by desirability.
337
+
338
+ Return a list of possible images/sessions which would be acceptable. The more desirable
339
+ matches are first in the list. Possibly in the future I might have a 'score' and reason
340
+ given for each ranking.
341
+
342
+ The following critera MUST match to be acceptable:
343
+ * matches requested imagetyp.
344
+ * same filter as reference session (in the case want_type==FLAT only)
345
+ * same telescope as reference session
346
+
347
+ Quality is determined by (most important first):
348
+ * temperature of CCD-TEMP is closer to the reference session
349
+ * smaller DATE-OBS delta to the reference session
350
+
351
+ Eventually the code will check the following for 'nice to have' (but not now):
352
+ * TBD
353
+
354
+ Possibly eventually this code could be moved into recipes.
355
+
356
+ """
357
+
358
+ metadata: dict = ref_session.get("metadata", {})
359
+ ref_temp = metadata.get("CCD-TEMP", None)
360
+ ref_date_str = metadata.get(Database.DATE_OBS_KEY)
361
+
297
362
  # Now score and sort the candidates
298
363
  scored_candidates = []
299
364
 
@@ -318,23 +383,19 @@ class Starbash:
318
383
  # If we can't parse temps, give a neutral score
319
384
  score += 0
320
385
 
321
- # Score by date/time proximity (secondary importance)
322
- if ref_date is not None:
323
- candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
324
- if candidate_date_str:
325
- try:
326
- candidate_date = datetime.fromisoformat(candidate_date_str)
327
- time_delta = abs(
328
- (ref_date - candidate_date).total_seconds()
329
- )
330
- # Closer in time = better score
331
- # Same day ≈ 100, 7 days 37, 30 days 9
332
- # Using 7-day half-life
333
- score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
334
- except (ValueError, TypeError):
335
- logging.warning(
336
- f"Could not parse candidate date: {candidate_date_str}"
337
- )
386
+ # Parse reference date for time delta calculations
387
+ candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
388
+ if ref_date_str and candidate_date_str:
389
+ try:
390
+ ref_date = datetime.fromisoformat(ref_date_str)
391
+ candidate_date = datetime.fromisoformat(candidate_date_str)
392
+ time_delta = abs((ref_date - candidate_date).total_seconds())
393
+ # Closer in time = better score
394
+ # Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
395
+ # Using 7-day half-life
396
+ score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
397
+ except (ValueError, TypeError):
398
+ logging.warning(f"Malformed date - ignoring entry")
338
399
 
339
400
  scored_candidates.append((score, candidate))
340
401
 
@@ -345,10 +406,10 @@ class Starbash:
345
406
  )
346
407
  continue
347
408
 
348
- # Sort by score (highest first) and return just the sessions
409
+ # Sort by score (highest first)
349
410
  scored_candidates.sort(key=lambda x: x[0], reverse=True)
350
411
 
351
- return [candidate for score, candidate in scored_candidates]
412
+ return [candidate for _, candidate in scored_candidates]
352
413
 
353
414
  def search_session(self) -> list[SessionRow]:
354
415
  """Search for sessions, optionally filtered by the current selection."""
@@ -356,23 +417,24 @@ class Starbash:
356
417
  conditions = self.selection.get_query_conditions()
357
418
  return self.db.search_session(conditions)
358
419
 
359
- def _reconstruct_image_path(self, image: ImageRow) -> ImageRow:
420
+ def _add_image_abspath(self, image: ImageRow) -> ImageRow:
360
421
  """Reconstruct absolute path from image row containing repo_url and relative path.
361
422
 
362
423
  Args:
363
424
  image: Image record with 'repo_url' and 'path' (relative) fields
364
425
 
365
426
  Returns:
366
- Modified image record with 'path' as absolute path
427
+ Modified image record with 'abspath' as absolute path
367
428
  """
368
- repo_url = image.get("repo_url")
369
- relative_path = image.get("path")
429
+ if not image.get("abspath"):
430
+ repo_url = image.get(Database.REPO_URL_KEY)
431
+ relative_path = image.get("path")
370
432
 
371
- if repo_url and relative_path:
372
- repo = self.repo_manager.get_repo_by_url(repo_url)
373
- if repo:
374
- absolute_path = repo.resolve_path(relative_path)
375
- image["path"] = str(absolute_path)
433
+ if repo_url and relative_path:
434
+ repo = self.repo_manager.get_repo_by_url(repo_url)
435
+ if repo:
436
+ absolute_path = repo.resolve_path(relative_path)
437
+ image["abspath"] = str(absolute_path)
376
438
 
377
439
  return image
378
440
 
@@ -380,13 +442,52 @@ class Starbash:
380
442
  """
381
443
  Get the reference ImageRow for a session with absolute path.
382
444
  """
445
+ from starbash.database import SearchCondition
446
+
383
447
  images = self.db.search_image(
384
- {Database.ID_KEY: session[get_column_name(Database.IMAGE_DOC_KEY)]}
448
+ [
449
+ SearchCondition(
450
+ "i.id", "=", session[get_column_name(Database.IMAGE_DOC_KEY)]
451
+ )
452
+ ]
385
453
  )
386
454
  assert (
387
455
  len(images) == 1
388
456
  ), f"Expected exactly one reference for session, found {len(images)}"
389
- return self._reconstruct_image_path(images[0])
457
+ return self._add_image_abspath(images[0])
458
+
459
+ def get_master_images(
460
+ self, imagetyp: str | None = None, reference_session: SessionRow | None = None
461
+ ) -> list[ImageRow]:
462
+ """Return a list of the specified master imagetyp (bias, flat etc...)
463
+ (or any type if not specified).
464
+
465
+ The first image will be the 'best' remaining entries progressively worse matches.
466
+
467
+ (the following is not yet implemented)
468
+ If reference_session is provided it will be used to refine the search as follows:
469
+ * The telescope must match
470
+ * The image resolutions and binnings must match
471
+ * The filter must match (for FLAT frames only)
472
+ * Preferably the master date_obs would be either before or slightly after (<24 hrs) the reference session start time
473
+ * Preferably the master date_obs should be the closest in date to the reference session start time
474
+ * The camera temperature should be as close as possible to the reference session camera temperature
475
+ """
476
+ master_repo = self.repo_manager.get_repo_by_kind("master")
477
+
478
+ if master_repo is None:
479
+ logging.warning("No master repo configured - skipping master frame load.")
480
+ return []
481
+
482
+ # Search for images in the master repo only
483
+ from starbash.database import SearchCondition
484
+
485
+ search_conditions = [SearchCondition("r.url", "=", master_repo.url)]
486
+ if imagetyp:
487
+ search_conditions.append(SearchCondition("i.imagetyp", "=", imagetyp))
488
+
489
+ images = self.db.search_image(search_conditions)
490
+ return images
390
491
 
391
492
  def get_session_images(self, session: SessionRow) -> list[ImageRow]:
392
493
  """
@@ -406,20 +507,52 @@ class Starbash:
406
507
  Raises:
407
508
  ValueError: If session_id is not found in the database
408
509
  """
409
- # Query images that match ALL session criteria including date range
410
- conditions = {
411
- Database.FILTER_KEY: session[get_column_name(Database.FILTER_KEY)],
412
- Database.IMAGETYP_KEY: session[get_column_name(Database.IMAGETYP_KEY)],
413
- Database.OBJECT_KEY: session[get_column_name(Database.OBJECT_KEY)],
414
- Database.TELESCOP_KEY: session[get_column_name(Database.TELESCOP_KEY)],
415
- "date_start": session[get_column_name(Database.START_KEY)],
416
- "date_end": session[get_column_name(Database.END_KEY)],
417
- }
510
+ from starbash.database import SearchCondition
418
511
 
419
- # Single query with all conditions
512
+ # Query images that match ALL session criteria including date range
513
+ # Note: We need to search JSON metadata for FILTER, IMAGETYP, OBJECT, TELESCOP
514
+ # since they're not indexed columns in the images table
515
+ conditions = [
516
+ SearchCondition(
517
+ "i.date_obs", ">=", session[get_column_name(Database.START_KEY)]
518
+ ),
519
+ SearchCondition(
520
+ "i.date_obs", "<=", session[get_column_name(Database.END_KEY)]
521
+ ),
522
+ SearchCondition(
523
+ "i.imagetyp", "=", session[get_column_name(Database.IMAGETYP_KEY)]
524
+ ),
525
+ ]
526
+
527
+ # we never want to return 'master' images as part of the session image paths
528
+ # (because we will be passing these tool siril or whatever to generate masters or
529
+ # some other downstream image)
530
+ master_repo = self.repo_manager.get_repo_by_kind("master")
531
+ if master_repo is not None:
532
+ conditions.append(SearchCondition("r.url", "<>", master_repo.url))
533
+
534
+ # Single query with indexed date conditions
420
535
  images = self.db.search_image(conditions)
536
+
537
+ # We no lognger filter by target(object) because it might not be set anyways
538
+ filtered_images = []
539
+ for img in images:
540
+ if (
541
+ img.get(Database.FILTER_KEY)
542
+ == session[get_column_name(Database.FILTER_KEY)]
543
+ # and img.get(Database.OBJECT_KEY)
544
+ # == session[get_column_name(Database.OBJECT_KEY)]
545
+ and img.get(Database.TELESCOP_KEY)
546
+ == session[get_column_name(Database.TELESCOP_KEY)]
547
+ ):
548
+ filtered_images.append(img)
549
+
421
550
  # Reconstruct absolute paths for all images
422
- return [self._reconstruct_image_path(img) for img in images] if images else []
551
+ return (
552
+ [self._add_image_abspath(img) for img in filtered_images]
553
+ if filtered_images
554
+ else []
555
+ )
423
556
 
424
557
  def remove_repo_ref(self, url: str) -> None:
425
558
  """
@@ -447,6 +580,7 @@ class Starbash:
447
580
  # Match by converting to file:// URL format if needed
448
581
  if ref_dir == url or f"file://{ref_dir}" == url:
449
582
  repo_refs.remove(ref)
583
+
450
584
  found = True
451
585
  break
452
586
 
@@ -456,24 +590,76 @@ class Starbash:
456
590
  # Write the updated config
457
591
  self.user_repo.write_config()
458
592
 
459
- def reindex_repo(self, repo: Repo, force: bool = False):
593
+ def add_image_to_db(self, repo: Repo, f: Path, force: bool = False) -> None:
594
+ """Read FITS header from file and add/update image entry in the database."""
595
+
596
+ path = repo.get_path()
597
+ if not path:
598
+ raise ValueError(f"Repo path not found for {repo}")
599
+
600
+ whitelist = None
601
+ config = self.repo_manager.merged.get("config")
602
+ if config:
603
+ whitelist = config.get("fits-whitelist", None)
604
+
605
+ try:
606
+ # Convert absolute path to relative path within repo
607
+ relative_path = f.relative_to(path)
608
+
609
+ found = self.db.get_image(repo.url, str(relative_path))
610
+
611
+ # for debugging sometimes we want to limit scanning to a single directory or file
612
+ # debug_target = "masters-raw/2025-09-09/DARK"
613
+ debug_target = None
614
+ if debug_target:
615
+ if str(relative_path).startswith(debug_target):
616
+ logging.error("Debugging %s...", f)
617
+ found = False
618
+ else:
619
+ found = True # skip processing
620
+ force = False
621
+
622
+ if not found or force:
623
+ # Read and log the primary header (HDU 0)
624
+ with fits.open(str(f), memmap=False) as hdul:
625
+ # convert headers to dict
626
+ hdu0: Any = hdul[0]
627
+ header = hdu0.header
628
+ if type(header).__name__ == "Unknown":
629
+ raise ValueError("FITS header has Unknown type: %s", f)
630
+
631
+ items = header.items()
632
+ headers = {}
633
+ for key, value in items:
634
+ if (not whitelist) or (key in whitelist):
635
+ headers[key] = value
636
+ logging.debug("Headers for %s: %s", f, headers)
637
+ # Store relative path in database
638
+ headers["path"] = str(relative_path)
639
+ image_doc_id = self.db.upsert_image(headers, repo.url)
640
+
641
+ if not found:
642
+ # Update the session infos, but ONLY on first file scan
643
+ # (otherwise invariants will get messed up)
644
+ self._add_session(image_doc_id, header)
645
+
646
+ except Exception as e:
647
+ logging.warning("Failed to read FITS header for %s: %s", f, e)
648
+
649
+ def reindex_repo(self, repo: Repo, force: bool = False, subdir: str | None = None):
460
650
  """Reindex all repositories managed by the RepoManager."""
461
651
 
462
652
  # make sure this new repo is listed in the repos table
463
653
  self.repo_db_update() # not really ideal, a more optimal version would just add the new repo
464
654
 
465
- # FIXME, add a method to get just the repos that contain images
466
- if repo.is_scheme("file") and repo.kind != "recipe":
467
- logging.debug("Reindexing %s...", repo.url)
655
+ path = repo.get_path()
468
656
 
469
- whitelist = None
470
- config = self.repo_manager.merged.get("config")
471
- if config:
472
- whitelist = config.get("fits-whitelist", None)
657
+ if path and repo.is_scheme("file") and repo.kind != "recipe":
658
+ logging.debug("Reindexing %s...", repo.url)
473
659
 
474
- path = repo.get_path()
475
- if not path:
476
- raise ValueError(f"Repo path not found for {repo}")
660
+ if subdir:
661
+ path = path / subdir
662
+ # used to debug
477
663
 
478
664
  # Find all FITS files under this repo path
479
665
  for f in track(
@@ -481,37 +667,7 @@ class Starbash:
481
667
  description=f"Indexing {repo.url}...",
482
668
  ):
483
669
  # progress.console.print(f"Indexing {f}...")
484
- try:
485
- # Convert absolute path to relative path within repo
486
- relative_path = f.relative_to(path)
487
-
488
- found = self.db.get_image(repo.url, str(relative_path))
489
- if not found or force:
490
- # Read and log the primary header (HDU 0)
491
- with fits.open(str(f), memmap=False) as hdul:
492
- # convert headers to dict
493
- hdu0: Any = hdul[0]
494
- header = hdu0.header
495
- if type(header).__name__ == "Unknown":
496
- raise ValueError("FITS header has Unknown type: %s", f)
497
-
498
- items = header.items()
499
- headers = {}
500
- for key, value in items:
501
- if (not whitelist) or (key in whitelist):
502
- headers[key] = value
503
- logging.debug("Headers for %s: %s", f, headers)
504
- # Store relative path in database
505
- headers["path"] = str(relative_path)
506
- image_doc_id = self.db.upsert_image(headers, repo.url)
507
-
508
- if not found:
509
- # Update the session infos, but ONLY on first file scan
510
- # (otherwise invariants will get messed up)
511
- self._add_session(str(f), image_doc_id, header)
512
-
513
- except Exception as e:
514
- logging.warning("Failed to read FITS header for %s: %s", f, e)
670
+ self.add_image_to_db(repo, f, force=force)
515
671
 
516
672
  def reindex_repos(self, force: bool = False):
517
673
  """Reindex all repositories managed by the RepoManager."""
@@ -520,12 +676,14 @@ class Starbash:
520
676
  for repo in track(self.repo_manager.repos, description="Reindexing repos..."):
521
677
  self.reindex_repo(repo, force=force)
522
678
 
523
- def run_all_stages(self):
524
- """On the currently active session, run all processing stages"""
525
- logging.info("--- Running all stages ---")
679
+ def _get_stages(self, name: str) -> list[dict[str, Any]]:
680
+ """Get all pipeline stages defined in the merged configuration.
526
681
 
682
+ Returns:
683
+ List of stage definitions (dictionaries with 'name' and 'priority')
684
+ """
527
685
  # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
528
- pipeline_definitions = self.repo_manager.merged.getall("stages")
686
+ pipeline_definitions = self.repo_manager.merged.getall(name)
529
687
  flat_pipeline_steps = list(itertools.chain.from_iterable(pipeline_definitions))
530
688
 
531
689
  # 2. Sort the pipeline steps by their 'priority' field.
@@ -537,9 +695,32 @@ class Starbash:
537
695
  f"invalid stage definition: a stage is missing the required 'priority' key"
538
696
  ) from e
539
697
 
540
- logging.info(
698
+ logging.debug(
541
699
  f"Found {len(sorted_pipeline)} pipeline steps to run in order of priority."
542
700
  )
701
+ return sorted_pipeline
702
+
703
+ def run_all_stages(self):
704
+ """On the currently active session, run all processing stages
705
+
706
+ New design, not yet implemented:
707
+ * find all recipes
708
+ * for each target in the current selection:
709
+ * select ONE recipe for processing that target (check recipe.auto.require.* conditions)
710
+ * create a processing output directory (for high value final files)
711
+ * create a temporary processing directory (for intermediate files - shared by all stages)
712
+ * init session context (it will be shared for all following steps)
713
+ * iterate over all light frame sessions in the current selection
714
+ * for each session:
715
+ * update context input and output files
716
+ * run session.light stages
717
+ * after all sessions are processed, run final.stack stages (using the shared context and temp dir)
718
+
719
+ """
720
+ logging.info("--- Running all stages ---")
721
+
722
+ # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
723
+ sorted_pipeline = self._get_stages("stages")
543
724
 
544
725
  self.init_context()
545
726
  # 4. Iterate through the sorted pipeline and execute the associated tasks.
@@ -561,50 +742,122 @@ class Starbash:
561
742
  for task in tasks_to_run:
562
743
  self.run_stage(task)
563
744
 
745
+ def get_recipe_for_session(
746
+ self, session: SessionRow, step: dict[str, Any]
747
+ ) -> Repo | None:
748
+ """Try to find a recipe that can be used to process the given session for the given step name
749
+ (master-dark, master-bias, light, stack, etc...)
750
+
751
+ * if a recipe doesn't have a matching recipe.stage.<step_name> it is not considered
752
+ * As part of this checking we will look at recipe.auto.require.* conditions to see if the recipe
753
+ is suitable for this session.
754
+ * the imagetyp of this session matches step.input
755
+
756
+ Currently we return just one Repo but eventually we should support multiple matching recipes
757
+ and make the user pick (by throwing an exception?).
758
+ """
759
+ # Get all recipe repos - FIXME add a getall(kind) to RepoManager
760
+ recipe_repos = [r for r in self.repo_manager.repos if r.kind() == "recipe"]
761
+
762
+ step_name = step.get("name")
763
+ if not step_name:
764
+ raise ValueError("Invalid pipeline step found: missing 'name' key.")
765
+
766
+ input_name = step.get("input")
767
+ if not input_name:
768
+ raise ValueError("Invalid pipeline step found: missing 'input' key.")
769
+
770
+ imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
771
+
772
+ if not imagetyp or input_name != self.aliases.normalize(imagetyp):
773
+ logging.debug(
774
+ f"Session imagetyp '{imagetyp}' does not match step input '{input_name}', skipping"
775
+ )
776
+ return None
777
+
778
+ # Get session metadata for checking requirements
779
+ session_metadata = session.get("metadata", {})
780
+
781
+ for repo in recipe_repos:
782
+ # Check if this recipe has the requested stage
783
+ stage_config = repo.get(f"recipe.stage.{step_name}")
784
+ if not stage_config:
785
+ logging.debug(
786
+ f"Recipe {repo.url} does not have stage '{step_name}', skipping"
787
+ )
788
+ continue
789
+
790
+ # Check auto.require conditions if they exist
791
+
792
+ # If requirements are specified, check if session matches
793
+ required_filters = repo.get("auto.require.filter", [])
794
+ if required_filters:
795
+ session_filter = self.aliases.normalize(
796
+ session_metadata.get(Database.FILTER_KEY)
797
+ )
798
+
799
+ # Session must have a filter that matches one of the required filters
800
+ if not session_filter or session_filter not in required_filters:
801
+ logging.debug(
802
+ f"Recipe {repo.url} requires filters {required_filters}, "
803
+ f"session has '{session_filter}', skipping"
804
+ )
805
+ continue
806
+
807
+ required_cameras = repo.get("auto.require.camera", [])
808
+ if required_cameras:
809
+ session_camera = self.aliases.normalize(
810
+ session_metadata.get("INSTRUME")
811
+ ) # Camera identifier
812
+
813
+ # Session must have a camera that matches one of the required cameras
814
+ if not session_camera or session_camera not in required_cameras:
815
+ logging.debug(
816
+ f"Recipe {repo.url} requires cameras {required_cameras}, "
817
+ f"session has '{session_camera}', skipping"
818
+ )
819
+ continue
820
+
821
+ # This recipe matches!
822
+ logging.info(f"Selected recipe {repo.url} for stage '{step_name}' ")
823
+ return repo
824
+
825
+ # No matching recipe found
826
+ return None
827
+
564
828
  def run_master_stages(self):
565
829
  """Generate any missing master frames
566
830
 
567
831
  Steps:
568
- * set all_tasks to be all tasks for when == "setup.masters"
832
+ * set all_tasks to be all tasks for when == "setup.master.bias"
569
833
  * loop over all currently unfiltered sessions
570
834
  * for each session loop across all_tasks
571
835
  * if task input.type == the imagetyp for this current session
572
836
  * add_input_to_context() add the input files to the context (from the session)
573
837
  * run_stage(task) to generate the new master frame
574
838
  """
839
+ sorted_pipeline = self._get_stages("master-stages")
575
840
  sessions = self.search_session()
576
- for session in sessions:
577
- imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
578
- logging.debug(
579
- f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
580
- )
581
-
582
- # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
583
- task_definitions = self.repo_manager.merged.getall("stage")
584
- all_tasks = list(itertools.chain.from_iterable(task_definitions))
585
-
586
- # Find all tasks that should run during the "setup.masters" step.
587
- tasks_to_run = [
588
- task for task in all_tasks if task.get("when") == "setup.masters"
589
- ]
590
-
591
- for task in tasks_to_run:
592
- input_config = task.get("input", {})
593
- input_type = input_config.get("type")
594
- if imagetyp_equals(input_type, imagetyp):
595
- logging.info(
596
- f" Running master stage task for imagetyp '{imagetyp}'"
597
- )
598
-
599
- # Create a default process dir in /tmp, though more advanced 'session' based workflows will
600
- # probably override this and place it somewhere persistent.
601
- with tempfile.TemporaryDirectory(prefix="session_tmp_") as temp_dir:
602
- logging.debug(
603
- f"Created temporary session directory: {temp_dir}"
604
- )
605
- self.init_context()
606
- self.context["process_dir"] = temp_dir
607
- self.add_session_to_context(session)
841
+ for session in track(sessions, description="Generating masters..."):
842
+ # 4. Iterate through the sorted pipeline and execute the associated tasks.
843
+ # FIXME unify the master vs normal step running code
844
+ for step in sorted_pipeline:
845
+ task = None
846
+ recipe = self.get_recipe_for_session(session, step)
847
+ if recipe:
848
+ task = recipe.get("recipe.stage." + step["name"])
849
+
850
+ if task:
851
+ input_config = task.get("input", {})
852
+ input_type = input_config.get("type")
853
+ if not input_type:
854
+ raise ValueError(f"Task for step missing required input.type")
855
+
856
+ # Create a default process dir in /tmp.
857
+ # FIXME - eventually we should allow hashing or somesuch to keep reusing processing
858
+ # dirs for particular targets?
859
+ with ProcessingContext(self) as temp_dir:
860
+ self.set_session_in_context(session)
608
861
  self.run_stage(task)
609
862
 
610
863
  def init_context(self) -> None:
@@ -615,26 +868,21 @@ class Starbash:
615
868
 
616
869
  # Update the context with runtime values.
617
870
  runtime_context = {
618
- "masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
871
+ # "masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
619
872
  }
620
873
  self.context.update(runtime_context)
621
874
 
622
- def add_session_to_context(self, session: SessionRow) -> None:
875
+ def set_session_in_context(self, session: SessionRow) -> None:
623
876
  """adds to context from the indicated session:
624
- * input_files - all of the files mentioned in the session
877
+
878
+ Sets the following context variables based on the provided session:
625
879
  * instrument - for the session
626
880
  * date - the localtimezone date of the session
627
881
  * imagetyp - the imagetyp of the session
628
882
  * session - the current session row (joined with a typical image) (can be used to
629
883
  find things like telescope, temperature ...)
884
+ * session_config - a short human readable description of the session - suitable for logs or filenames
630
885
  """
631
- # Get images for this session
632
- images = self.get_session_images(session)
633
- logging.debug(f"Adding {len(images)} files as context.input_files")
634
- self.context["input_files"] = [
635
- img["path"] for img in images
636
- ] # Pass in the file list via the context dict
637
-
638
886
  # it is okay to give them the actual session row, because we're never using it again
639
887
  self.context["session"] = session
640
888
 
@@ -644,19 +892,67 @@ class Starbash:
644
892
 
645
893
  imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
646
894
  if imagetyp:
895
+ imagetyp = self.aliases.normalize(imagetyp)
647
896
  self.context["imagetyp"] = imagetyp
648
897
 
898
+ # add a short human readable description of the session - suitable for logs or in filenames
899
+ session_config = f"{imagetyp}"
900
+
901
+ metadata = session.get("metadata", {})
902
+ filter = metadata.get(Database.FILTER_KEY)
903
+ if (imagetyp == "flat" or imagetyp == "light") and filter:
904
+ # we only care about filters in these cases
905
+ session_config += f"_{filter}"
906
+ if imagetyp == "dark":
907
+ exptime = session.get(get_column_name(Database.EXPTIME_KEY))
908
+ if exptime:
909
+ session_config += f"_{int(float(exptime))}s"
910
+
911
+ self.context["session_config"] = session_config
912
+
649
913
  date = session.get(get_column_name(Database.START_KEY))
650
914
  if date:
651
915
  self.context["date"] = to_shortdate(date)
652
916
 
917
+ def add_input_masters(self, stage: dict) -> None:
918
+ """based on input.masters add the correct master frames as context.master.<type> filepaths"""
919
+ session = self.context.get("session")
920
+ assert session is not None, "context.session should have been already set"
921
+
922
+ input_config = stage.get("input", {})
923
+ master_types: list[str] = input_config.get("masters", [])
924
+ for master_type in master_types:
925
+ masters = self.get_master_images(
926
+ imagetyp=master_type, reference_session=session
927
+ )
928
+ if not masters:
929
+ raise RuntimeError(
930
+ f"No master frames of type '{master_type}' found for stage '{stage.get('name')}'"
931
+ )
932
+
933
+ context_master = self.context.setdefault("master", {})
934
+
935
+ if len(masters) > 1:
936
+ logging.debug(
937
+ f"Multiple ({len(masters)}) master frames of type '{master_type}' found, using first. FIXME."
938
+ )
939
+
940
+ # Try to rank the images by desirability
941
+ masters = self.score_candidates(masters, session)
942
+
943
+ self._add_image_abspath(masters[0]) # make sure abspath is populated
944
+ selected_master = masters[0]["abspath"]
945
+ logging.info(f"For master '{master_type}', using: {selected_master}")
946
+
947
+ context_master[master_type] = selected_master
948
+
653
949
  def add_input_files(self, stage: dict) -> None:
654
950
  """adds to context.input_files based on the stage input config"""
655
951
  input_config = stage.get("input")
656
- input_required = False
952
+ input_required = 0
657
953
  if input_config:
658
954
  # if there is an "input" dict, we assume input.required is true if unset
659
- input_required = input_config.get("required", True)
955
+ input_required = input_config.get("required", 0)
660
956
  source = input_config.get("source")
661
957
  if source is None:
662
958
  raise ValueError(
@@ -672,12 +968,17 @@ class Starbash:
672
968
  input_files # Pass in the file list via the context dict
673
969
  )
674
970
  elif source == "repo":
675
- # We expect that higher level code has already added the correct input files
676
- # to the context
677
- if not "input_files" in self.context:
678
- raise RuntimeError(
679
- "Input config specifies 'repo' but no 'input_files' found in context"
680
- )
971
+ # Get images for this session (by pulling from repo)
972
+ session = self.context.get("session")
973
+ assert (
974
+ session is not None
975
+ ), "context.session should have been already set"
976
+
977
+ images = self.get_session_images(session)
978
+ logging.debug(f"Using {len(images)} files as input_files")
979
+ self.context["input_files"] = [
980
+ img["abspath"] for img in images
981
+ ] # Pass in the file list via the context dict
681
982
  else:
682
983
  raise ValueError(
683
984
  f"Stage '{stage.get('name')}' has invalid 'input' source: {source}"
@@ -689,8 +990,8 @@ class Starbash:
689
990
  if "input_files" in self.context:
690
991
  del self.context["input_files"]
691
992
 
692
- if input_required and not "input_files" in self.context:
693
- raise RuntimeError("No input files found for stage")
993
+ if input_required and len(self.context.get("input_files", [])) < input_required:
994
+ raise RuntimeError(f"Stage requires at least {input_required} input files")
694
995
 
695
996
  def add_output_path(self, stage: dict) -> None:
696
997
  """Adds output path information to context based on the stage output config.
@@ -700,6 +1001,7 @@ class Starbash:
700
1001
  - context.output.base_path - full path without file extension
701
1002
  - context.output.suffix - file extension (e.g., .fits or .fit.gz)
702
1003
  - context.output.full_path - complete output file path
1004
+ - context.output.repo - the destination Repo (if applicable)
703
1005
  """
704
1006
  output_config = stage.get("output")
705
1007
  if not output_config:
@@ -754,8 +1056,8 @@ class Starbash:
754
1056
  "base_path": base_path,
755
1057
  # "suffix": full_path.suffix, not needed I think
756
1058
  "full_path": full_path,
1059
+ "repo": dest_repo,
757
1060
  }
758
-
759
1061
  else:
760
1062
  raise ValueError(
761
1063
  f"Unsupported output destination type: {dest}. Only 'repo' is currently supported."
@@ -777,17 +1079,29 @@ class Starbash:
777
1079
 
778
1080
  logging.info(f"Running stage: {stage_desc}")
779
1081
 
780
- tool_name = stage.get("tool")
781
- if not tool_name:
1082
+ tool_dict = stage.get("tool")
1083
+ if not tool_dict:
782
1084
  raise ValueError(
783
1085
  f"Stage '{stage.get('name')}' is missing a 'tool' definition."
784
1086
  )
785
- tool: Tool | None = tools.get(tool_name)
1087
+ tool_name = tool_dict.get("name")
1088
+ if not tool_name:
1089
+ raise ValueError(
1090
+ f"Stage '{stage.get('name')}' is missing a 'tool.name' definition."
1091
+ )
1092
+ tool = tools.get(tool_name)
786
1093
  if not tool:
787
1094
  raise ValueError(
788
1095
  f"Tool '{tool_name}' for stage '{stage.get('name')}' not found."
789
1096
  )
790
- logging.debug(f" Using tool: {tool_name}")
1097
+ logging.debug(f"Using tool: {tool_name}")
1098
+ tool.set_defaults()
1099
+
1100
+ # Allow stage to override tool timeout if specified
1101
+ tool_timeout = tool_dict.get("timeout")
1102
+ if tool_timeout is not None:
1103
+ tool.timeout = float(tool_timeout)
1104
+ logging.debug(f"Using tool timeout: {tool.timeout} seconds")
791
1105
 
792
1106
  script_filename = stage.get("script-file", tool.default_script_file)
793
1107
  if script_filename:
@@ -806,6 +1120,7 @@ class Starbash:
806
1120
  stage_context = stage.get("context", {})
807
1121
  self.context.update(stage_context)
808
1122
  self.add_input_files(stage)
1123
+ self.add_input_masters(stage)
809
1124
  self.add_output_path(stage)
810
1125
 
811
1126
  # if the output path already exists and is newer than all input files, skip processing
@@ -828,3 +1143,5 @@ class Starbash:
828
1143
 
829
1144
  if not output_path or not os.path.exists(output_path):
830
1145
  raise RuntimeError(f"Expected output file not found: {output_path}")
1146
+ else:
1147
+ self.add_image_to_db(output_info["repo"], Path(output_path), force=True)