starbash 0.1.11__py3-none-any.whl → 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. repo/__init__.py +1 -1
  2. repo/manager.py +14 -23
  3. repo/repo.py +52 -10
  4. starbash/__init__.py +10 -3
  5. starbash/aliases.py +49 -4
  6. starbash/analytics.py +3 -2
  7. starbash/app.py +287 -565
  8. starbash/check_version.py +18 -0
  9. starbash/commands/__init__.py +2 -1
  10. starbash/commands/info.py +26 -21
  11. starbash/commands/process.py +76 -24
  12. starbash/commands/repo.py +25 -68
  13. starbash/commands/select.py +140 -148
  14. starbash/commands/user.py +88 -23
  15. starbash/database.py +41 -27
  16. starbash/defaults/starbash.toml +1 -0
  17. starbash/exception.py +21 -0
  18. starbash/main.py +29 -7
  19. starbash/paths.py +23 -9
  20. starbash/processing.py +724 -0
  21. starbash/recipes/README.md +3 -0
  22. starbash/recipes/master_bias/starbash.toml +4 -1
  23. starbash/recipes/master_dark/starbash.toml +0 -1
  24. starbash/recipes/osc.py +190 -0
  25. starbash/recipes/osc_dual_duo/starbash.toml +31 -34
  26. starbash/recipes/osc_simple/starbash.toml +82 -0
  27. starbash/recipes/osc_single_duo/starbash.toml +51 -32
  28. starbash/recipes/seestar/starbash.toml +82 -0
  29. starbash/recipes/starbash.toml +8 -9
  30. starbash/selection.py +29 -38
  31. starbash/templates/repo/master.toml +7 -3
  32. starbash/templates/repo/processed.toml +7 -2
  33. starbash/templates/userconfig.toml +9 -0
  34. starbash/toml.py +13 -13
  35. starbash/tool.py +186 -149
  36. starbash-0.1.15.dist-info/METADATA +216 -0
  37. starbash-0.1.15.dist-info/RECORD +45 -0
  38. starbash/recipes/osc_dual_duo/starbash.py +0 -147
  39. starbash-0.1.11.dist-info/METADATA +0 -147
  40. starbash-0.1.11.dist-info/RECORD +0 -40
  41. {starbash-0.1.11.dist-info → starbash-0.1.15.dist-info}/WHEEL +0 -0
  42. {starbash-0.1.11.dist-info → starbash-0.1.15.dist-info}/entry_points.txt +0 -0
  43. {starbash-0.1.11.dist-info → starbash-0.1.15.dist-info}/licenses/LICENSE +0 -0
starbash/app.py CHANGED
@@ -1,34 +1,18 @@
1
- import cmd
2
1
  import logging
3
- from importlib import resources
4
- import os
2
+ import shutil
3
+ from datetime import datetime
5
4
  from pathlib import Path
6
- import tempfile
7
- import typer
8
- import tomlkit
9
- from tomlkit.toml_file import TOMLFile
10
- import glob
11
5
  from typing import Any
6
+
7
+ import rich.console
8
+ import typer
12
9
  from astropy.io import fits
13
- import itertools
14
- from rich.progress import track
15
10
  from rich.logging import RichHandler
16
- import shutil
17
- from datetime import datetime
18
- import rich.console
19
- import copy
11
+ from rich.progress import track
20
12
 
21
13
  import starbash
22
- from starbash import console, _is_test_env, to_shortdate
23
- from starbash.aliases import Aliases
24
- from starbash.database import Database, SessionRow, ImageRow, get_column_name
25
- from repo import Repo, repo, repo_suffix
26
- from starbash.toml import toml_from_template
27
- from starbash.tool import Tool, expand_context, expand_context_unsafe
28
- from repo import RepoManager
29
- from starbash.tool import tools
30
- from starbash.paths import get_user_config_dir, get_user_data_dir
31
- from starbash.selection import Selection, where_tuple
14
+ from repo import Repo, RepoManager, repo_suffix
15
+ from starbash.aliases import Aliases, normalize_target_name
32
16
  from starbash.analytics import (
33
17
  NopAnalytics,
34
18
  analytics_exception,
@@ -36,17 +20,30 @@ from starbash.analytics import (
36
20
  analytics_shutdown,
37
21
  analytics_start_transaction,
38
22
  )
39
-
40
- # Type aliases for better documentation
23
+ from starbash.check_version import check_version
24
+ from starbash.database import (
25
+ Database,
26
+ ImageRow,
27
+ SearchCondition,
28
+ SessionRow,
29
+ get_column_name,
30
+ )
31
+ from starbash.paths import get_user_config_dir, get_user_config_path
32
+ from starbash.selection import Selection, build_search_conditions
33
+ from starbash.toml import toml_from_template
34
+ from starbash.tool import preflight_tools
41
35
 
42
36
 
43
- def setup_logging(stderr: bool = False):
37
+ def setup_logging(console: rich.console.Console):
44
38
  """
45
39
  Configures basic logging.
46
40
  """
47
- console = rich.console.Console(stderr=stderr)
41
+ from starbash import _is_test_env # Lazy import to avoid circular dependency
42
+
48
43
  handlers = (
49
- [RichHandler(console=console, rich_tracebacks=True)] if not _is_test_env else []
44
+ [RichHandler(console=console, rich_tracebacks=True, markup=True)]
45
+ if not _is_test_env
46
+ else []
50
47
  )
51
48
  logging.basicConfig(
52
49
  level=starbash.log_filter_level, # use the global log filter level
@@ -56,12 +53,6 @@ def setup_logging(stderr: bool = False):
56
53
  )
57
54
 
58
55
 
59
- def get_user_config_path() -> Path:
60
- """Returns the path to the user config file."""
61
- config_dir = get_user_config_dir()
62
- return config_dir / repo_suffix
63
-
64
-
65
56
  def create_user() -> Path:
66
57
  """Create user directories if they don't exist yet."""
67
58
  path = get_user_config_path()
@@ -77,6 +68,7 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
77
68
  This function requires that "abspath" already be populated in each ImageRow. Normally
78
69
  the caller does this by calling Starbash._add_image_abspath() on the image.
79
70
  """
71
+ from starbash import console # Lazy import to avoid circular dependency
80
72
 
81
73
  # Export images
82
74
  console.print(f"[cyan]Exporting {len(images)} images to {output_dir}...[/cyan]")
@@ -115,7 +107,7 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
115
107
  error_count += 1
116
108
 
117
109
  # Print summary
118
- console.print(f"[green]Export complete![/green]")
110
+ console.print("[green]Export complete![/green]")
119
111
  if linked_count > 0:
120
112
  console.print(f" Linked: {linked_count} files")
121
113
  if copied_count > 0:
@@ -124,33 +116,6 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
124
116
  console.print(f" [red]Errors: {error_count} files[/red]")
125
117
 
126
118
 
127
- class ProcessingContext(tempfile.TemporaryDirectory):
128
- """For processing a set of sessions for a particular target.
129
-
130
- Keeps a shared temporary directory for intermediate files. We expose the path to that
131
- directory in context["process_dir"].
132
- """
133
-
134
- def __init__(self, starbash: "Starbash"):
135
- super().__init__(prefix="sbprocessing_")
136
- self.sb = starbash
137
- logging.debug(f"Created processing context at {self.name}")
138
-
139
- self.sb.init_context()
140
- self.sb.context["process_dir"] = self.name
141
-
142
- def __enter__(self) -> "ProcessingContext":
143
- return super().__enter__()
144
-
145
- def __exit__(self, exc_type, exc_value, traceback) -> None:
146
- logging.debug(f"Cleaning up processing context at {self.name}")
147
-
148
- # unregister our process dir
149
- self.sb.context.pop("process_dir", None)
150
-
151
- super().__exit__(exc_type, exc_value, traceback)
152
-
153
-
154
119
  class Starbash:
155
120
  """The main Starbash application class."""
156
121
 
@@ -158,24 +123,41 @@ class Starbash:
158
123
  """
159
124
  Initializes the Starbash application by loading configurations
160
125
  and setting up the repository manager.
126
+
127
+ Args:
128
+ cmd (str): The command name or identifier for the current Starbash session.
129
+ stderr_logging (bool): Whether to enable logging to stderr.
130
+ no_progress (bool): Whether to disable the (asynchronous) progress display (because it breaks typer.ask)
161
131
  """
162
- setup_logging(stderr=stderr_logging)
132
+ from starbash import _is_test_env # Lazy import to avoid circular dependency
133
+
134
+ # It is important to disable fancy colors and line wrapping if running under test - because
135
+ # those tests will be string parsing our output.
136
+ console = rich.console.Console(
137
+ force_terminal=False if _is_test_env else None,
138
+ width=999999 if _is_test_env else None, # Disable line wrapping in tests
139
+ stderr=stderr_logging,
140
+ )
141
+
142
+ starbash.console = console # Update the global console to use the progress version
143
+
144
+ setup_logging(starbash.console)
163
145
  logging.info("Starbash starting...")
164
146
 
165
147
  # Load app defaults and initialize the repository manager
166
148
  self._init_repos()
167
- self._init_analytics(cmd)
149
+ self._init_analytics(cmd) # after init repos so we have user prefs
150
+ check_version()
168
151
  self._init_aliases()
169
152
 
170
- logging.info(
171
- f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
172
- )
153
+ logging.info(f"Repo manager initialized with {len(self.repo_manager.repos)} repos.")
173
154
  # self.repo_manager.dump()
174
155
 
175
156
  self._db = None # Lazy initialization - only create when accessed
176
157
 
177
158
  # Initialize selection state (stored in user config repo)
178
159
  self.selection = Selection(self.user_repo)
160
+ preflight_tools()
179
161
 
180
162
  def _init_repos(self) -> None:
181
163
  """Initialize all repositories managed by the RepoManager."""
@@ -189,9 +171,7 @@ class Starbash:
189
171
  self.analytics = NopAnalytics()
190
172
  if self.user_repo.get("analytics.enabled", True):
191
173
  include_user = self.user_repo.get("analytics.include_user", False)
192
- user_email = (
193
- self.user_repo.get("user.email", None) if include_user else None
194
- )
174
+ user_email = self.user_repo.get("user.email", None) if include_user else None
195
175
  if user_email is not None:
196
176
  user_email = str(user_email)
197
177
  analytics_setup(allowed=True, user_email=user_email)
@@ -247,8 +227,9 @@ class Starbash:
247
227
  self.close()
248
228
  return handled
249
229
 
250
- def _add_session(self, image_doc_id: int, header: dict) -> None:
230
+ def _add_session(self, header: dict) -> None:
251
231
  """We just added a new image, create or update its session entry as needed."""
232
+ image_doc_id: int = header[Database.ID_KEY] # this key is required to exist
252
233
  image_type = header.get(Database.IMAGETYP_KEY)
253
234
  date = header.get(Database.DATE_OBS_KEY)
254
235
  if not date or not image_type:
@@ -286,9 +267,47 @@ class Starbash:
286
267
  session = self.db.get_session(new)
287
268
  self.db.upsert_session(new, existing=session)
288
269
 
289
- def guess_sessions(
290
- self, ref_session: SessionRow, want_type: str
291
- ) -> list[SessionRow]:
270
+ def add_local_repo(self, path: str, repo_type: str | None = None) -> None:
271
+ """Add a local repository located at the specified path. If necessary toml config files
272
+ will be created at the root of the repository."""
273
+
274
+ p = Path(path)
275
+ console = starbash.console
276
+
277
+ repo_toml = p / repo_suffix # the starbash.toml file at the root of the repo
278
+ if repo_toml.exists():
279
+ logging.warning("Using existing repository config file: %s", repo_toml)
280
+ else:
281
+ if repo_type:
282
+ console.print(f"Creating {repo_type} repository: {p}")
283
+ p.mkdir(parents=True, exist_ok=True)
284
+
285
+ toml_from_template(
286
+ f"repo/{repo_type}",
287
+ p / repo_suffix,
288
+ overrides={
289
+ "REPO_TYPE": repo_type,
290
+ "REPO_PATH": str(p),
291
+ },
292
+ )
293
+ else:
294
+ # No type specified, therefore (for now) assume we are just using this as an input
295
+ # repo (and it must exist)
296
+ if not p.exists():
297
+ console.print(f"[red]Error: Repo path does not exist: {p}[/red]")
298
+ raise typer.Exit(code=1)
299
+
300
+ console.print(f"Adding repository: {p}")
301
+
302
+ repo = self.user_repo.add_repo_ref(p)
303
+ if repo:
304
+ self.reindex_repo(repo)
305
+
306
+ # we don't yet always write default config files at roots of repos, but it would be easy to add here
307
+ # r.write_config()
308
+ self.user_repo.write_config()
309
+
310
+ def guess_sessions(self, ref_session: SessionRow, want_type: str) -> list[SessionRow]:
292
311
  """Given a particular session type (i.e. FLAT or BIAS etc...) and an
293
312
  existing session (which is assumed to generally be a LIGHT frame based session):
294
313
 
@@ -320,13 +339,11 @@ class Starbash:
320
339
  }
321
340
 
322
341
  # For FLAT frames, filter must match the reference session
323
- if want_type.upper() == "FLAT":
324
- conditions[Database.FILTER_KEY] = ref_session[
325
- get_column_name(Database.FILTER_KEY)
326
- ]
342
+ if want_type.lower() == "flat":
343
+ conditions[Database.FILTER_KEY] = ref_session[get_column_name(Database.FILTER_KEY)]
327
344
 
328
345
  # Search for candidate sessions
329
- candidates = self.db.search_session(where_tuple(conditions))
346
+ candidates = self.db.search_session(build_search_conditions(conditions))
330
347
 
331
348
  return self.score_candidates(candidates, ref_session)
332
349
 
@@ -395,15 +412,13 @@ class Starbash:
395
412
  # Using 7-day half-life
396
413
  score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
397
414
  except (ValueError, TypeError):
398
- logging.warning(f"Malformed date - ignoring entry")
415
+ logging.warning("Malformed date - ignoring entry")
399
416
 
400
417
  scored_candidates.append((score, candidate))
401
418
 
402
419
  except (AssertionError, KeyError) as e:
403
420
  # If we can't get the session image, log and skip this candidate
404
- logging.warning(
405
- f"Could not score candidate session {candidate.get('id')}: {e}"
406
- )
421
+ logging.warning(f"Could not score candidate session {candidate.get('id')}: {e}")
407
422
  continue
408
423
 
409
424
  # Sort by score (highest first)
@@ -411,10 +426,13 @@ class Starbash:
411
426
 
412
427
  return [candidate for _, candidate in scored_candidates]
413
428
 
414
- def search_session(self) -> list[SessionRow]:
429
+ def search_session(self, conditions: list[SearchCondition] | None = None) -> list[SessionRow]:
415
430
  """Search for sessions, optionally filtered by the current selection."""
416
431
  # Get query conditions from selection
417
- conditions = self.selection.get_query_conditions()
432
+ if conditions is None:
433
+ conditions = self.selection.get_query_conditions()
434
+
435
+ self.add_filter_not_masters(conditions) # we never return processed masters as sessions
418
436
  return self.db.search_session(conditions)
419
437
 
420
438
  def _add_image_abspath(self, image: ImageRow) -> ImageRow:
@@ -445,15 +463,9 @@ class Starbash:
445
463
  from starbash.database import SearchCondition
446
464
 
447
465
  images = self.db.search_image(
448
- [
449
- SearchCondition(
450
- "i.id", "=", session[get_column_name(Database.IMAGE_DOC_KEY)]
451
- )
452
- ]
466
+ [SearchCondition("i.id", "=", session[get_column_name(Database.IMAGE_DOC_KEY)])]
453
467
  )
454
- assert (
455
- len(images) == 1
456
- ), f"Expected exactly one reference for session, found {len(images)}"
468
+ assert len(images) == 1, f"Expected exactly one reference for session, found {len(images)}"
457
469
  return self._add_image_abspath(images[0])
458
470
 
459
471
  def get_master_images(
@@ -487,9 +499,34 @@ class Starbash:
487
499
  search_conditions.append(SearchCondition("i.imagetyp", "=", imagetyp))
488
500
 
489
501
  images = self.db.search_image(search_conditions)
502
+
503
+ # FIXME - move this into a general filter function
504
+ # For flat frames, filter images based on matching reference_session filter
505
+ if reference_session and imagetyp and self.aliases.normalize(imagetyp) == "flat":
506
+ ref_filter = self.aliases.normalize(
507
+ reference_session.get(get_column_name(Database.FILTER_KEY), "None")
508
+ )
509
+ if ref_filter:
510
+ # Filter images to only those with matching filter in metadata
511
+ filtered_images = []
512
+ for img in images:
513
+ img_filter = img.get(Database.FILTER_KEY, "None")
514
+ if img_filter == ref_filter:
515
+ filtered_images.append(img)
516
+ images = filtered_images
517
+
490
518
  return images
491
519
 
492
- def get_session_images(self, session: SessionRow) -> list[ImageRow]:
520
+ def add_filter_not_masters(self, conditions: list[SearchCondition]) -> None:
521
+ """Add conditions to filter out master and processed repos from image searches."""
522
+ master_repo = self.repo_manager.get_repo_by_kind("master")
523
+ if master_repo is not None:
524
+ conditions.append(SearchCondition("r.url", "<>", master_repo.url))
525
+ processed_repo = self.repo_manager.get_repo_by_kind("processed")
526
+ if processed_repo is not None:
527
+ conditions.append(SearchCondition("r.url", "<>", processed_repo.url))
528
+
529
+ def get_session_images(self, session: SessionRow, processed_ok: bool = False) -> list[ImageRow]:
493
530
  """
494
531
  Get all images belonging to a specific session.
495
532
 
@@ -500,6 +537,9 @@ class Starbash:
500
537
  Args:
501
538
  session_id: The database ID of the session
502
539
 
540
+ processed_ok: If True, include images which were processed by apps (i.e. stacked or other procesing)
541
+ Normally image pipelines don't want to accidentially consume those files.
542
+
503
543
  Returns:
504
544
  List of image records (dictionaries with path, metadata, etc.)
505
545
  Returns empty list if session not found or has no images.
@@ -513,23 +553,17 @@ class Starbash:
513
553
  # Note: We need to search JSON metadata for FILTER, IMAGETYP, OBJECT, TELESCOP
514
554
  # since they're not indexed columns in the images table
515
555
  conditions = [
516
- SearchCondition(
517
- "i.date_obs", ">=", session[get_column_name(Database.START_KEY)]
518
- ),
519
- SearchCondition(
520
- "i.date_obs", "<=", session[get_column_name(Database.END_KEY)]
521
- ),
522
- SearchCondition(
523
- "i.imagetyp", "=", session[get_column_name(Database.IMAGETYP_KEY)]
524
- ),
556
+ SearchCondition("i.date_obs", ">=", session[get_column_name(Database.START_KEY)]),
557
+ SearchCondition("i.date_obs", "<=", session[get_column_name(Database.END_KEY)]),
558
+ SearchCondition("i.imagetyp", "=", session[get_column_name(Database.IMAGETYP_KEY)]),
525
559
  ]
526
560
 
527
- # we never want to return 'master' images as part of the session image paths
561
+ # Note: not needed here, because we filter this earlier - when building the
562
+ # list of candidate sessions.
563
+ # we never want to return 'master' or 'processed' images as part of the session image paths
528
564
  # (because we will be passing these tool siril or whatever to generate masters or
529
565
  # some other downstream image)
530
- master_repo = self.repo_manager.get_repo_by_kind("master")
531
- if master_repo is not None:
532
- conditions.append(SearchCondition("r.url", "<>", master_repo.url))
566
+ # self.add_filter_not_masters(conditions)
533
567
 
534
568
  # Single query with indexed date conditions
535
569
  images = self.db.search_image(conditions)
@@ -537,22 +571,24 @@ class Starbash:
537
571
  # We no lognger filter by target(object) because it might not be set anyways
538
572
  filtered_images = []
539
573
  for img in images:
574
+ # "HISTORY" nodes are added by processing tools (Siril etc...), we never want to accidentally read those images
575
+ has_history = img.get("HISTORY")
576
+
577
+ # images that were stacked seem to always have a STACKCNT header set
578
+ is_stacked = img.get("STACKCNT")
579
+
540
580
  if (
541
- img.get(Database.FILTER_KEY)
542
- == session[get_column_name(Database.FILTER_KEY)]
581
+ img.get(Database.FILTER_KEY) == session[get_column_name(Database.FILTER_KEY)]
543
582
  # and img.get(Database.OBJECT_KEY)
544
583
  # == session[get_column_name(Database.OBJECT_KEY)]
545
584
  and img.get(Database.TELESCOP_KEY)
546
585
  == session[get_column_name(Database.TELESCOP_KEY)]
586
+ and (processed_ok or (not has_history and not is_stacked))
547
587
  ):
548
588
  filtered_images.append(img)
549
589
 
550
590
  # Reconstruct absolute paths for all images
551
- return (
552
- [self._add_image_abspath(img) for img in filtered_images]
553
- if filtered_images
554
- else []
555
- )
591
+ return [self._add_image_abspath(img) for img in filtered_images]
556
592
 
557
593
  def remove_repo_ref(self, url: str) -> None:
558
594
  """
@@ -570,7 +606,7 @@ class Starbash:
570
606
  repo_refs = self.user_repo.config.get("repo-ref")
571
607
 
572
608
  if not repo_refs:
573
- raise ValueError(f"No repository references found in user configuration.")
609
+ raise ValueError("No repository references found in user configuration.")
574
610
 
575
611
  # Find and remove the matching repo-ref
576
612
  found = False
@@ -590,7 +626,7 @@ class Starbash:
590
626
  # Write the updated config
591
627
  self.user_repo.write_config()
592
628
 
593
- def add_image_to_db(self, repo: Repo, f: Path, force: bool = False) -> None:
629
+ def add_image(self, repo: Repo, f: Path, force: bool = False) -> dict[str, Any] | None:
594
630
  """Read FITS header from file and add/update image entry in the database."""
595
631
 
596
632
  path = repo.get_path()
@@ -602,51 +638,65 @@ class Starbash:
602
638
  if config:
603
639
  whitelist = config.get("fits-whitelist", None)
604
640
 
605
- try:
606
- # Convert absolute path to relative path within repo
607
- relative_path = f.relative_to(path)
608
-
609
- found = self.db.get_image(repo.url, str(relative_path))
610
-
611
- # for debugging sometimes we want to limit scanning to a single directory or file
612
- # debug_target = "masters-raw/2025-09-09/DARK"
613
- debug_target = None
614
- if debug_target:
615
- if str(relative_path).startswith(debug_target):
616
- logging.error("Debugging %s...", f)
617
- found = False
618
- else:
619
- found = True # skip processing
620
- force = False
621
-
622
- if not found or force:
623
- # Read and log the primary header (HDU 0)
624
- with fits.open(str(f), memmap=False) as hdul:
625
- # convert headers to dict
626
- hdu0: Any = hdul[0]
627
- header = hdu0.header
628
- if type(header).__name__ == "Unknown":
629
- raise ValueError("FITS header has Unknown type: %s", f)
630
-
631
- items = header.items()
632
- headers = {}
633
- for key, value in items:
634
- if (not whitelist) or (key in whitelist):
635
- headers[key] = value
636
- logging.debug("Headers for %s: %s", f, headers)
637
- # Store relative path in database
638
- headers["path"] = str(relative_path)
639
- image_doc_id = self.db.upsert_image(headers, repo.url)
640
-
641
- if not found:
642
- # Update the session infos, but ONLY on first file scan
643
- # (otherwise invariants will get messed up)
644
- self._add_session(image_doc_id, header)
645
-
646
- except Exception as e:
647
- logging.warning("Failed to read FITS header for %s: %s", f, e)
648
-
649
- def reindex_repo(self, repo: Repo, force: bool = False, subdir: str | None = None):
641
+ # Convert absolute path to relative path within repo
642
+ relative_path = f.relative_to(path)
643
+
644
+ found = self.db.get_image(repo.url, str(relative_path))
645
+
646
+ # for debugging sometimes we want to limit scanning to a single directory or file
647
+ # debug_target = "masters-raw/2025-09-09/DARK"
648
+ debug_target = None
649
+ if debug_target:
650
+ if str(relative_path).startswith(debug_target):
651
+ logging.error("Debugging %s...", f)
652
+ found = False
653
+ else:
654
+ found = True # skip processing
655
+ force = False
656
+
657
+ if not found or force:
658
+ # Read and log the primary header (HDU 0)
659
+ with fits.open(str(f), memmap=False) as hdul:
660
+ # convert headers to dict
661
+ hdu0: Any = hdul[0]
662
+ header = hdu0.header
663
+ if type(header).__name__ == "Unknown":
664
+ raise ValueError("FITS header has Unknown type: %s", f)
665
+
666
+ items = header.items()
667
+ headers = {}
668
+ for key, value in items:
669
+ if (not whitelist) or (key in whitelist):
670
+ headers[key] = value
671
+
672
+ # Some device software (old Asiair versions) fails to populate TELESCOP, in that case fall back to
673
+ # CREATOR (see doc/fits/malformedasimaster.txt for an example)
674
+ if Database.TELESCOP_KEY not in headers:
675
+ creator = headers.get("CREATOR")
676
+ if creator:
677
+ headers[Database.TELESCOP_KEY] = creator
678
+
679
+ logging.debug("Headers for %s: %s", f, headers)
680
+
681
+ # Store relative path in database
682
+ headers["path"] = str(relative_path)
683
+ image_doc_id = self.db.upsert_image(headers, repo.url)
684
+ headers[Database.ID_KEY] = image_doc_id
685
+
686
+ if not found:
687
+ return headers
688
+
689
+ return None
690
+
691
+ def add_image_and_session(self, repo: Repo, f: Path, force: bool = False) -> None:
692
+ """Read FITS header from file and add/update image entry in the database."""
693
+ headers = self.add_image(repo, f, force=force)
694
+ if headers:
695
+ # Update the session infos, but ONLY on first file scan
696
+ # (otherwise invariants will get messed up)
697
+ self._add_session(headers)
698
+
699
+ def reindex_repo(self, repo: Repo, subdir: str | None = None):
650
700
  """Reindex all repositories managed by the RepoManager."""
651
701
 
652
702
  # make sure this new repo is listed in the repos table
@@ -654,7 +704,8 @@ class Starbash:
654
704
 
655
705
  path = repo.get_path()
656
706
 
657
- if path and repo.is_scheme("file") and repo.kind != "recipe":
707
+ repo_kind = repo.kind()
708
+ if path and repo.is_scheme("file") and repo_kind != "recipe":
658
709
  logging.debug("Reindexing %s...", repo.url)
659
710
 
660
711
  if subdir:
@@ -667,84 +718,39 @@ class Starbash:
667
718
  description=f"Indexing {repo.url}...",
668
719
  ):
669
720
  # progress.console.print(f"Indexing {f}...")
670
- self.add_image_to_db(repo, f, force=force)
721
+ if repo_kind == "master":
722
+ # for master repos we only add to the image table
723
+ self.add_image(repo, f, force=True)
724
+ elif repo_kind == "processed":
725
+ pass # we never add processed images to our db
726
+ else:
727
+ self.add_image_and_session(repo, f, force=starbash.force_regen)
671
728
 
672
- def reindex_repos(self, force: bool = False):
729
+ def reindex_repos(self):
673
730
  """Reindex all repositories managed by the RepoManager."""
674
731
  logging.debug("Reindexing all repositories...")
675
732
 
676
733
  for repo in track(self.repo_manager.repos, description="Reindexing repos..."):
677
- self.reindex_repo(repo, force=force)
734
+ self.reindex_repo(repo)
678
735
 
679
- def _get_stages(self, name: str) -> list[dict[str, Any]]:
680
- """Get all pipeline stages defined in the merged configuration.
736
+ def get_recipes(self) -> list[Repo]:
737
+ """Get all recipe repos available, sorted by priority (lower number first).
681
738
 
682
- Returns:
683
- List of stage definitions (dictionaries with 'name' and 'priority')
739
+ Recipes without a priority are placed at the end of the list.
684
740
  """
685
- # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
686
- pipeline_definitions = self.repo_manager.merged.getall(name)
687
- flat_pipeline_steps = list(itertools.chain.from_iterable(pipeline_definitions))
741
+ recipes = [r for r in self.repo_manager.repos if r.kind() == "recipe"]
688
742
 
689
- # 2. Sort the pipeline steps by their 'priority' field.
690
- try:
691
- sorted_pipeline = sorted(flat_pipeline_steps, key=lambda s: s["priority"])
692
- except KeyError as e:
693
- # Re-raise as a ValueError with a more descriptive message.
694
- raise ValueError(
695
- f"invalid stage definition: a stage is missing the required 'priority' key"
696
- ) from e
697
-
698
- logging.debug(
699
- f"Found {len(sorted_pipeline)} pipeline steps to run in order of priority."
700
- )
701
- return sorted_pipeline
702
-
703
- def run_all_stages(self):
704
- """On the currently active session, run all processing stages
705
-
706
- New design, not yet implemented:
707
- * find all recipes
708
- * for each target in the current selection:
709
- * select ONE recipe for processing that target (check recipe.auto.require.* conditions)
710
- * create a processing output directory (for high value final files)
711
- * create a temporary processing directory (for intermediate files - shared by all stages)
712
- * init session context (it will be shared for all following steps)
713
- * iterate over all light frame sessions in the current selection
714
- * for each session:
715
- * update context input and output files
716
- * run session.light stages
717
- * after all sessions are processed, run final.stack stages (using the shared context and temp dir)
743
+ # Sort recipes by priority (lower number first). If no priority specified,
744
+ # use float('inf') to push those to the end of the list.
745
+ def priority_key(r: Repo) -> float:
746
+ priority = r.get("recipe.priority")
747
+ return float(priority) if priority is not None else float("inf")
718
748
 
719
- """
720
- logging.info("--- Running all stages ---")
721
-
722
- # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
723
- sorted_pipeline = self._get_stages("stages")
724
-
725
- self.init_context()
726
- # 4. Iterate through the sorted pipeline and execute the associated tasks.
727
- for step in sorted_pipeline:
728
- step_name = step.get("name")
729
- if not step_name:
730
- raise ValueError("Invalid pipeline step found: missing 'name' key.")
731
- self.run_pipeline_step(step_name)
732
-
733
- def run_pipeline_step(self, step_name: str):
734
- logging.info(f"--- Running pipeline step: '{step_name}' ---")
735
-
736
- # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
737
- task_definitions = self.repo_manager.merged.getall("stage")
738
- all_tasks = list(itertools.chain.from_iterable(task_definitions))
739
-
740
- # Find all tasks that should run during this pipeline step.
741
- tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
742
- for task in tasks_to_run:
743
- self.run_stage(task)
744
-
745
- def get_recipe_for_session(
746
- self, session: SessionRow, step: dict[str, Any]
747
- ) -> Repo | None:
749
+ recipes.sort(key=priority_key)
750
+
751
+ return recipes
752
+
753
+ def get_recipe_for_session(self, session: SessionRow, step: dict[str, Any]) -> Repo | None:
748
754
  """Try to find a recipe that can be used to process the given session for the given step name
749
755
  (master-dark, master-bias, light, stack, etc...)
750
756
 
@@ -757,7 +763,7 @@ class Starbash:
757
763
  and make the user pick (by throwing an exception?).
758
764
  """
759
765
  # Get all recipe repos - FIXME add a getall(kind) to RepoManager
760
- recipe_repos = [r for r in self.repo_manager.repos if r.kind() == "recipe"]
766
+ recipe_repos = self.get_recipes()
761
767
 
762
768
  step_name = step.get("name")
763
769
  if not step_name:
@@ -767,13 +773,16 @@ class Starbash:
767
773
  if not input_name:
768
774
  raise ValueError("Invalid pipeline step found: missing 'input' key.")
769
775
 
770
- imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
776
+ # if input type is recipe we don't check for filetype match - because we'll just use files already in
777
+ # the tempdir
778
+ if input_name != "recipe":
779
+ imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
771
780
 
772
- if not imagetyp or input_name != self.aliases.normalize(imagetyp):
773
- logging.debug(
774
- f"Session imagetyp '{imagetyp}' does not match step input '{input_name}', skipping"
775
- )
776
- return None
781
+ if not imagetyp or input_name != self.aliases.normalize(imagetyp):
782
+ logging.debug(
783
+ f"Session imagetyp '{imagetyp}' does not match step input '{input_name}', skipping"
784
+ )
785
+ return None
777
786
 
778
787
  # Get session metadata for checking requirements
779
788
  session_metadata = session.get("metadata", {})
@@ -782,21 +791,19 @@ class Starbash:
782
791
  # Check if this recipe has the requested stage
783
792
  stage_config = repo.get(f"recipe.stage.{step_name}")
784
793
  if not stage_config:
785
- logging.debug(
786
- f"Recipe {repo.url} does not have stage '{step_name}', skipping"
787
- )
794
+ logging.debug(f"Recipe {repo.url} does not have stage '{step_name}', skipping")
788
795
  continue
789
796
 
790
797
  # Check auto.require conditions if they exist
791
798
 
792
799
  # If requirements are specified, check if session matches
793
- required_filters = repo.get("auto.require.filter", [])
800
+ required_filters = repo.get("recipe.auto.require.filter", [])
794
801
  if required_filters:
795
802
  session_filter = self.aliases.normalize(
796
- session_metadata.get(Database.FILTER_KEY)
803
+ session_metadata.get(Database.FILTER_KEY), lenient=True
797
804
  )
798
805
 
799
- # Session must have a filter that matches one of the required filters
806
+ # Session must have AT LEAST one filter that matches one of the required filters
800
807
  if not session_filter or session_filter not in required_filters:
801
808
  logging.debug(
802
809
  f"Recipe {repo.url} requires filters {required_filters}, "
@@ -804,10 +811,22 @@ class Starbash:
804
811
  )
805
812
  continue
806
813
 
807
- required_cameras = repo.get("auto.require.camera", [])
814
+ required_color = repo.get("recipe.auto.require.color", False)
815
+ if required_color:
816
+ session_bayer = session_metadata.get("BAYERPAT")
817
+
818
+ # Session must be color (i.e. have a BAYERPAT header)
819
+ if not session_bayer:
820
+ logging.debug(
821
+ f"Recipe {repo.url} requires a color camera, "
822
+ f"but session has no BAYERPAT header, skipping"
823
+ )
824
+ continue
825
+
826
+ required_cameras = repo.get("recipe.auto.require.camera", [])
808
827
  if required_cameras:
809
828
  session_camera = self.aliases.normalize(
810
- session_metadata.get("INSTRUME")
829
+ session_metadata.get("INSTRUME"), lenient=True
811
830
  ) # Camera identifier
812
831
 
813
832
  # Session must have a camera that matches one of the required cameras
@@ -825,323 +844,26 @@ class Starbash:
825
844
  # No matching recipe found
826
845
  return None
827
846
 
828
- def run_master_stages(self):
829
- """Generate any missing master frames
830
-
831
- Steps:
832
- * set all_tasks to be all tasks for when == "setup.master.bias"
833
- * loop over all currently unfiltered sessions
834
- * for each session loop across all_tasks
835
- * if task input.type == the imagetyp for this current session
836
- * add_input_to_context() add the input files to the context (from the session)
837
- * run_stage(task) to generate the new master frame
838
- """
839
- sorted_pipeline = self._get_stages("master-stages")
840
- sessions = self.search_session()
841
- for session in track(sessions, description="Generating masters..."):
842
- # 4. Iterate through the sorted pipeline and execute the associated tasks.
843
- # FIXME unify the master vs normal step running code
844
- for step in sorted_pipeline:
845
- task = None
846
- recipe = self.get_recipe_for_session(session, step)
847
- if recipe:
848
- task = recipe.get("recipe.stage." + step["name"])
849
-
850
- if task:
851
- input_config = task.get("input", {})
852
- input_type = input_config.get("type")
853
- if not input_type:
854
- raise ValueError(f"Task for step missing required input.type")
855
-
856
- # Create a default process dir in /tmp.
857
- # FIXME - eventually we should allow hashing or somesuch to keep reusing processing
858
- # dirs for particular targets?
859
- with ProcessingContext(self) as temp_dir:
860
- self.set_session_in_context(session)
861
- self.run_stage(task)
862
-
863
- def init_context(self) -> None:
864
- """Do common session init"""
865
-
866
- # Context is preserved through all stages, so each stage can add new symbols to it for use by later stages
867
- self.context = {}
868
-
869
- # Update the context with runtime values.
870
- runtime_context = {
871
- # "masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
872
- }
873
- self.context.update(runtime_context)
874
-
875
- def set_session_in_context(self, session: SessionRow) -> None:
876
- """adds to context from the indicated session:
877
-
878
- Sets the following context variables based on the provided session:
879
- * instrument - for the session
880
- * date - the localtimezone date of the session
881
- * imagetyp - the imagetyp of the session
882
- * session - the current session row (joined with a typical image) (can be used to
883
- find things like telescope, temperature ...)
884
- * session_config - a short human readable description of the session - suitable for logs or filenames
885
- """
886
- # it is okay to give them the actual session row, because we're never using it again
887
- self.context["session"] = session
888
-
889
- instrument = session.get(get_column_name(Database.TELESCOP_KEY))
890
- if instrument:
891
- self.context["instrument"] = instrument
892
-
893
- imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
894
- if imagetyp:
895
- imagetyp = self.aliases.normalize(imagetyp)
896
- self.context["imagetyp"] = imagetyp
897
-
898
- # add a short human readable description of the session - suitable for logs or in filenames
899
- session_config = f"{imagetyp}"
900
-
901
- metadata = session.get("metadata", {})
902
- filter = metadata.get(Database.FILTER_KEY)
903
- if (imagetyp == "flat" or imagetyp == "light") and filter:
904
- # we only care about filters in these cases
905
- session_config += f"_{filter}"
906
- if imagetyp == "dark":
907
- exptime = session.get(get_column_name(Database.EXPTIME_KEY))
908
- if exptime:
909
- session_config += f"_{int(float(exptime))}s"
910
-
911
- self.context["session_config"] = session_config
912
-
913
- date = session.get(get_column_name(Database.START_KEY))
914
- if date:
915
- self.context["date"] = to_shortdate(date)
916
-
917
- def add_input_masters(self, stage: dict) -> None:
918
- """based on input.masters add the correct master frames as context.master.<type> filepaths"""
919
- session = self.context.get("session")
920
- assert session is not None, "context.session should have been already set"
921
-
922
- input_config = stage.get("input", {})
923
- master_types: list[str] = input_config.get("masters", [])
924
- for master_type in master_types:
925
- masters = self.get_master_images(
926
- imagetyp=master_type, reference_session=session
927
- )
928
- if not masters:
929
- raise RuntimeError(
930
- f"No master frames of type '{master_type}' found for stage '{stage.get('name')}'"
931
- )
932
-
933
- context_master = self.context.setdefault("master", {})
934
-
935
- if len(masters) > 1:
936
- logging.debug(
937
- f"Multiple ({len(masters)}) master frames of type '{master_type}' found, using first. FIXME."
938
- )
939
-
940
- # Try to rank the images by desirability
941
- masters = self.score_candidates(masters, session)
942
-
943
- self._add_image_abspath(masters[0]) # make sure abspath is populated
944
- selected_master = masters[0]["abspath"]
945
- logging.info(f"For master '{master_type}', using: {selected_master}")
946
-
947
- context_master[master_type] = selected_master
948
-
949
- def add_input_files(self, stage: dict) -> None:
950
- """adds to context.input_files based on the stage input config"""
951
- input_config = stage.get("input")
952
- input_required = 0
953
- if input_config:
954
- # if there is an "input" dict, we assume input.required is true if unset
955
- input_required = input_config.get("required", 0)
956
- source = input_config.get("source")
957
- if source is None:
958
- raise ValueError(
959
- f"Stage '{stage.get('name')}' has invalid 'input' configuration: missing 'source'"
960
- )
961
- if source == "path":
962
- # The path might contain context variables that need to be expanded.
963
- # path_pattern = expand_context(input_config["path"], context)
964
- path_pattern = input_config["path"]
965
- input_files = glob.glob(path_pattern, recursive=True)
966
-
967
- self.context["input_files"] = (
968
- input_files # Pass in the file list via the context dict
969
- )
970
- elif source == "repo":
971
- # Get images for this session (by pulling from repo)
972
- session = self.context.get("session")
973
- assert (
974
- session is not None
975
- ), "context.session should have been already set"
976
-
977
- images = self.get_session_images(session)
978
- logging.debug(f"Using {len(images)} files as input_files")
979
- self.context["input_files"] = [
980
- img["abspath"] for img in images
981
- ] # Pass in the file list via the context dict
982
- else:
983
- raise ValueError(
984
- f"Stage '{stage.get('name')}' has invalid 'input' source: {source}"
985
- )
986
-
987
- # FIXME compare context.output to see if it already exists and is newer than the input files, if so skip processing
988
- else:
989
- # The script doesn't mention input, therefore assume it doesn't want input_files
990
- if "input_files" in self.context:
991
- del self.context["input_files"]
992
-
993
- if input_required and len(self.context.get("input_files", [])) < input_required:
994
- raise RuntimeError(f"Stage requires at least {input_required} input files")
995
-
996
- def add_output_path(self, stage: dict) -> None:
997
- """Adds output path information to context based on the stage output config.
998
-
999
- Sets the following context variables:
1000
- - context.output.root_path - base path of the destination repo
1001
- - context.output.base_path - full path without file extension
1002
- - context.output.suffix - file extension (e.g., .fits or .fit.gz)
1003
- - context.output.full_path - complete output file path
1004
- - context.output.repo - the destination Repo (if applicable)
1005
- """
1006
- output_config = stage.get("output")
1007
- if not output_config:
1008
- # No output configuration, remove any existing output from context
1009
- if "output" in self.context:
1010
- del self.context["output"]
1011
- return
1012
-
1013
- dest = output_config.get("dest")
1014
- if not dest:
1015
- raise ValueError(
1016
- f"Stage '{stage.get('description', 'unknown')}' has 'output' config but missing 'dest'"
1017
- )
1018
-
1019
- if dest == "repo":
1020
- # Find the destination repo by type/kind
1021
- output_type = output_config.get("type")
1022
- if not output_type:
1023
- raise ValueError(
1024
- f"Stage '{stage.get('description', 'unknown')}' has output.dest='repo' but missing 'type'"
1025
- )
1026
-
1027
- # Find the repo with matching kind
1028
- dest_repo = self.repo_manager.get_repo_by_kind(output_type)
1029
- if not dest_repo:
1030
- raise ValueError(
1031
- f"No repository found with kind '{output_type}' for output destination"
1032
- )
1033
-
1034
- repo_base = dest_repo.get_path()
1035
- if not repo_base:
1036
- raise ValueError(f"Repository '{dest_repo.url}' has no filesystem path")
1037
-
1038
- repo_relative: str | None = dest_repo.get("repo.relative")
1039
- if not repo_relative:
1040
- raise ValueError(
1041
- f"Repository '{dest_repo.url}' is missing 'repo.relative' configuration"
1042
- )
1043
-
1044
- # we support context variables in the relative path
1045
- repo_relative = expand_context_unsafe(repo_relative, self.context)
1046
- full_path = repo_base / repo_relative
1047
-
1048
- # base_path but without spaces - because Siril doesn't like that
1049
- full_path = Path(str(full_path).replace(" ", r"_"))
1050
-
1051
- base_path = full_path.parent / full_path.stem
1052
-
1053
- # Set context variables as documented in the TOML
1054
- self.context["output"] = {
1055
- # "root_path": repo_relative, not needed I think
1056
- "base_path": base_path,
1057
- # "suffix": full_path.suffix, not needed I think
1058
- "full_path": full_path,
1059
- "repo": dest_repo,
1060
- }
1061
- else:
1062
- raise ValueError(
1063
- f"Unsupported output destination type: {dest}. Only 'repo' is currently supported."
1064
- )
1065
-
1066
- def run_stage(self, stage: dict) -> None:
1067
- """
1068
- Executes a single processing stage.
1069
-
1070
- Args:
1071
- stage: A dictionary representing the stage configuration, containing
1072
- at least 'tool' and 'script' keys.
1073
- """
1074
- stage_desc = stage.get("description", "(missing description)")
1075
- stage_disabled = stage.get("disabled", False)
1076
- if stage_disabled:
1077
- logging.info(f"Skipping disabled stage: {stage_desc}")
1078
- return
1079
-
1080
- logging.info(f"Running stage: {stage_desc}")
1081
-
1082
- tool_dict = stage.get("tool")
1083
- if not tool_dict:
1084
- raise ValueError(
1085
- f"Stage '{stage.get('name')}' is missing a 'tool' definition."
1086
- )
1087
- tool_name = tool_dict.get("name")
1088
- if not tool_name:
1089
- raise ValueError(
1090
- f"Stage '{stage.get('name')}' is missing a 'tool.name' definition."
1091
- )
1092
- tool = tools.get(tool_name)
1093
- if not tool:
1094
- raise ValueError(
1095
- f"Tool '{tool_name}' for stage '{stage.get('name')}' not found."
1096
- )
1097
- logging.debug(f"Using tool: {tool_name}")
1098
- tool.set_defaults()
1099
-
1100
- # Allow stage to override tool timeout if specified
1101
- tool_timeout = tool_dict.get("timeout")
1102
- if tool_timeout is not None:
1103
- tool.timeout = float(tool_timeout)
1104
- logging.debug(f"Using tool timeout: {tool.timeout} seconds")
1105
-
1106
- script_filename = stage.get("script-file", tool.default_script_file)
1107
- if script_filename:
1108
- source = stage.source # type: ignore (was monkeypatched by repo)
1109
- script = source.read(script_filename)
1110
- else:
1111
- script = stage.get("script")
1112
-
1113
- if script is None:
1114
- raise ValueError(
1115
- f"Stage '{stage.get('name')}' is missing a 'script' or 'script-file' definition."
1116
- )
1117
-
1118
- # This allows recipe TOML to define their own default variables.
1119
- # (apply all of the changes to context that the task demands)
1120
- stage_context = stage.get("context", {})
1121
- self.context.update(stage_context)
1122
- self.add_input_files(stage)
1123
- self.add_input_masters(stage)
1124
- self.add_output_path(stage)
1125
-
1126
- # if the output path already exists and is newer than all input files, skip processing
1127
- output_info: dict | None = self.context.get("output")
1128
- if output_info:
1129
- output_path = output_info.get("full_path")
1130
-
1131
- if output_path and os.path.exists(output_path):
1132
- logging.info(
1133
- f"Output file already exists, skipping processing: {output_path}"
1134
- )
1135
- return
1136
-
1137
- tool.run_in_temp_dir(script, context=self.context)
1138
-
1139
- # verify context.output was created if it was specified
1140
- output_info: dict | None = self.context.get("output")
1141
- if output_info:
1142
- output_path = output_info.get("full_path")
847
+ def filter_sessions_with_lights(self, sessions: list[SessionRow]) -> list[SessionRow]:
848
+ """Filter sessions to only those that contain light frames."""
849
+ filtered_sessions: list[SessionRow] = []
850
+ for s in sessions:
851
+ imagetyp_val = s.get(get_column_name(Database.IMAGETYP_KEY))
852
+ if imagetyp_val is None:
853
+ continue
854
+ if self.aliases.normalize(str(imagetyp_val)) == "light":
855
+ filtered_sessions.append(s)
856
+ return filtered_sessions
1143
857
 
1144
- if not output_path or not os.path.exists(output_path):
1145
- raise RuntimeError(f"Expected output file not found: {output_path}")
1146
- else:
1147
- self.add_image_to_db(output_info["repo"], Path(output_path), force=True)
858
+ def filter_sessions_by_target(
859
+ self, sessions: list[SessionRow], target: str
860
+ ) -> list[SessionRow]:
861
+ """Filter sessions to only those that match the given target name."""
862
+ filtered_sessions: list[SessionRow] = []
863
+ for s in sessions:
864
+ obj_val = s.get(get_column_name(Database.OBJECT_KEY))
865
+ if obj_val is None:
866
+ continue
867
+ if normalize_target_name(str(obj_val)) == target:
868
+ filtered_sessions.append(s)
869
+ return filtered_sessions