PyPI - starbash - Versions diffs - 0.1.9__py3-none-any.whl → 0.1.15__py3-none-any.whl - Mend

starbash 0.1.9py3-none-any.whl → 0.1.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

repo/__init__.py +1 -1
repo/manager.py +14 -23
repo/repo.py +52 -10
starbash/__init__.py +10 -3
starbash/aliases.py +145 -0
starbash/analytics.py +3 -2
starbash/app.py +512 -473
starbash/check_version.py +18 -0
starbash/commands/__init__.py +2 -1
starbash/commands/info.py +88 -14
starbash/commands/process.py +76 -24
starbash/commands/repo.py +41 -68
starbash/commands/select.py +141 -142
starbash/commands/user.py +88 -23
starbash/database.py +219 -112
starbash/defaults/starbash.toml +24 -3
starbash/exception.py +21 -0
starbash/main.py +29 -7
starbash/paths.py +35 -5
starbash/processing.py +724 -0
starbash/recipes/README.md +3 -0
starbash/recipes/master_bias/starbash.toml +16 -19
starbash/recipes/master_dark/starbash.toml +33 -0
starbash/recipes/master_flat/starbash.toml +26 -18
starbash/recipes/osc.py +190 -0
starbash/recipes/osc_dual_duo/starbash.toml +54 -44
starbash/recipes/osc_simple/starbash.toml +82 -0
starbash/recipes/osc_single_duo/starbash.toml +51 -32
starbash/recipes/seestar/starbash.toml +82 -0
starbash/recipes/starbash.toml +30 -9
starbash/selection.py +32 -36
starbash/templates/repo/master.toml +7 -3
starbash/templates/repo/processed.toml +15 -0
starbash/templates/userconfig.toml +9 -0
starbash/toml.py +13 -13
starbash/tool.py +230 -96
starbash-0.1.15.dist-info/METADATA +216 -0
starbash-0.1.15.dist-info/RECORD +45 -0
starbash/recipes/osc_dual_duo/starbash.py +0 -151
starbash-0.1.9.dist-info/METADATA +0 -145
starbash-0.1.9.dist-info/RECORD +0 -37
{starbash-0.1.9.dist-info → starbash-0.1.15.dist-info}/WHEEL +0 -0
{starbash-0.1.9.dist-info → starbash-0.1.15.dist-info}/entry_points.txt +0 -0
{starbash-0.1.9.dist-info → starbash-0.1.15.dist-info}/licenses/LICENSE +0 -0

starbash/app.py CHANGED Viewed

@@ -1,32 +1,18 @@
 import logging
-from importlib import resources
-import os
+import shutil
+from datetime import datetime
 from pathlib import Path
-import tempfile
-import typer
-import tomlkit
-from tomlkit.toml_file import TOMLFile
-import glob
 from typing import Any
+import rich.console
+import typer
 from astropy.io import fits
-import itertools
-from rich.progress import track
 from rich.logging import RichHandler
-import shutil
-from datetime import datetime
-import rich.console
-import copy
+from rich.progress import track
 import starbash
-from starbash import console, _is_test_env, to_shortdate
-from starbash.database import Database, SessionRow, ImageRow, get_column_name
-from repo import Repo, repo_suffix
-from starbash.toml import toml_from_template
-from starbash.tool import Tool, expand_context, expand_context_unsafe
-from repo import RepoManager
-from starbash.tool import tools
-from starbash.paths import get_user_config_dir, get_user_data_dir
-from starbash.selection import Selection, where_tuple
+from repo import Repo, RepoManager, repo_suffix
+from starbash.aliases import Aliases, normalize_target_name
 from starbash.analytics import (
     NopAnalytics,
     analytics_exception,
@@ -34,17 +20,30 @@ from starbash.analytics import (
     analytics_shutdown,
     analytics_start_transaction,
 )
-# Type aliases for better documentation
+from starbash.check_version import check_version
+from starbash.database import (
+    Database,
+    ImageRow,
+    SearchCondition,
+    SessionRow,
+    get_column_name,
+)
+from starbash.paths import get_user_config_dir, get_user_config_path
+from starbash.selection import Selection, build_search_conditions
+from starbash.toml import toml_from_template
+from starbash.tool import preflight_tools
-def setup_logging(stderr: bool = False):
+def setup_logging(console: rich.console.Console):
     """
     Configures basic logging.
     """
-    console = rich.console.Console(stderr=stderr)
+    from starbash import _is_test_env  # Lazy import to avoid circular dependency
     handlers = (
-        [RichHandler(console=console, rich_tracebacks=True)] if not _is_test_env else []
+        [RichHandler(console=console, rich_tracebacks=True, markup=True)]
+        if not _is_test_env
+        else []
     )
     logging.basicConfig(
         level=starbash.log_filter_level,  # use the global log filter level
@@ -54,12 +53,6 @@ def setup_logging(stderr: bool = False):
     )
-def get_user_config_path() -> Path:
-    """Returns the path to the user config file."""
-    config_dir = get_user_config_dir()
-    return config_dir / repo_suffix
 def create_user() -> Path:
     """Create user directories if they don't exist yet."""
     path = get_user_config_path()
@@ -70,7 +63,12 @@ def create_user() -> Path:
 def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
-    """Copy images to the specified output directory (using symbolic links if possible)."""
+    """Copy images to the specified output directory (using symbolic links if possible).
+    This function requires that "abspath" already be populated in each ImageRow.  Normally
+    the caller does this by calling Starbash._add_image_abspath() on the image.
+    """
+    from starbash import console  # Lazy import to avoid circular dependency
     # Export images
     console.print(f"[cyan]Exporting {len(images)} images to {output_dir}...[/cyan]")
@@ -81,7 +79,7 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
     for image in images:
         # Get the source path from the image metadata
-        source_path = Path(image.get("path", ""))
+        source_path = Path(image.get("abspath", ""))
         if not source_path.exists():
             console.print(f"[red]Warning: Source file not found: {source_path}[/red]")
@@ -109,7 +107,7 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
                 error_count += 1
     # Print summary
-    console.print(f"[green]Export complete![/green]")
+    console.print("[green]Export complete![/green]")
     if linked_count > 0:
         console.print(f"  Linked: {linked_count} files")
     if copied_count > 0:
@@ -118,14 +116,6 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
         console.print(f"  [red]Errors: {error_count} files[/red]")
-def imagetyp_equals(imagetyp1: str, imagetyp2: str) -> bool:
-    """Imagetyps (BIAS, Dark, FLAT, flats) have a number of slightly different convetions.
-    Do a sloppy equality check.
-    Eventually handle non english variants by using the repos aliases table."""
-    return imagetyp1.strip().lower() == imagetyp2.strip().lower()
 class Starbash:
     """The main Starbash application class."""
@@ -133,23 +123,55 @@ class Starbash:
         """
         Initializes the Starbash application by loading configurations
         and setting up the repository manager.
+        Args:
+            cmd (str): The command name or identifier for the current Starbash session.
+            stderr_logging (bool): Whether to enable logging to stderr.
+            no_progress (bool): Whether to disable the (asynchronous) progress display (because it breaks typer.ask)
         """
-        setup_logging(stderr=stderr_logging)
+        from starbash import _is_test_env  # Lazy import to avoid circular dependency
+        # It is important to disable fancy colors and line wrapping if running under test - because
+        # those tests will be string parsing our output.
+        console = rich.console.Console(
+            force_terminal=False if _is_test_env else None,
+            width=999999 if _is_test_env else None,  # Disable line wrapping in tests
+            stderr=stderr_logging,
+        )
+        starbash.console = console  # Update the global console to use the progress version
+        setup_logging(starbash.console)
         logging.info("Starbash starting...")
         # Load app defaults and initialize the repository manager
+        self._init_repos()
+        self._init_analytics(cmd)  # after init repos so we have user prefs
+        check_version()
+        self._init_aliases()
+        logging.info(f"Repo manager initialized with {len(self.repo_manager.repos)} repos.")
+        # self.repo_manager.dump()
+        self._db = None  # Lazy initialization - only create when accessed
+        # Initialize selection state (stored in user config repo)
+        self.selection = Selection(self.user_repo)
+        preflight_tools()
+    def _init_repos(self) -> None:
+        """Initialize all repositories managed by the RepoManager."""
         self.repo_manager = RepoManager()
         self.repo_manager.add_repo("pkg://defaults")
         # Add user prefs as a repo
         self.user_repo = self.repo_manager.add_repo("file://" + str(create_user()))
+    def _init_analytics(self, cmd: str) -> None:
         self.analytics = NopAnalytics()
         if self.user_repo.get("analytics.enabled", True):
             include_user = self.user_repo.get("analytics.include_user", False)
-            user_email = (
-                self.user_repo.get("user.email", None) if include_user else None
-            )
+            user_email = self.user_repo.get("user.email", None) if include_user else None
             if user_email is not None:
                 user_email = str(user_email)
             analytics_setup(allowed=True, user_email=user_email)
@@ -157,19 +179,10 @@ class Starbash:
             self.analytics = analytics_start_transaction(name="App session", op=cmd)
             self.analytics.__enter__()
-        logging.info(
-            f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
-        )
-        # self.repo_manager.dump()
-        self._db = None  # Lazy initialization - only create when accessed
-        self.session_query = None  # None means search all sessions
-        # Initialize selection state (stored in user config repo)
-        self.selection = Selection(self.user_repo)
-        # FIXME, call reindex somewhere and also index whenever new repos are added
-        # self.reindex_repos()
+    def _init_aliases(self) -> None:
+        alias_dict = self.repo_manager.get("aliases", {})
+        assert isinstance(alias_dict, dict), "Aliases config must be a dictionary"
+        self.aliases = Aliases(alias_dict)
     @property
     def db(self) -> Database:
@@ -214,36 +227,87 @@ class Starbash:
         self.close()
         return handled
-    def _add_session(self, f: str, image_doc_id: int, header: dict) -> None:
+    def _add_session(self, header: dict) -> None:
         """We just added a new image, create or update its session entry as needed."""
-        filter = header.get(Database.FILTER_KEY, "unspecified")
+        image_doc_id: int = header[Database.ID_KEY]  # this key is required to exist
         image_type = header.get(Database.IMAGETYP_KEY)
         date = header.get(Database.DATE_OBS_KEY)
         if not date or not image_type:
             logging.warning(
-                "Image %s missing either DATE-OBS or IMAGETYP FITS header, skipping...",
-                f,
+                "Image '%s' missing either DATE-OBS or IMAGETYP FITS header, skipping...",
+                header.get("path", "unspecified"),
             )
         else:
             exptime = header.get(Database.EXPTIME_KEY, 0)
-            telescop = header.get(Database.TELESCOP_KEY, "unspecified")
             new = {
-                Database.FILTER_KEY: filter,
-                Database.START_KEY: date,
-                Database.END_KEY: date,  # FIXME not quite correct, should be longer by exptime
-                Database.IMAGE_DOC_KEY: image_doc_id,
-                Database.IMAGETYP_KEY: image_type,
-                Database.NUM_IMAGES_KEY: 1,
-                Database.EXPTIME_TOTAL_KEY: exptime,
-                Database.OBJECT_KEY: header.get(Database.OBJECT_KEY, "unspecified"),
-                Database.TELESCOP_KEY: telescop,
+                get_column_name(Database.START_KEY): date,
+                get_column_name(
+                    Database.END_KEY
+                ): date,  # FIXME not quite correct, should be longer by exptime
+                get_column_name(Database.IMAGE_DOC_KEY): image_doc_id,
+                get_column_name(Database.IMAGETYP_KEY): image_type,
+                get_column_name(Database.NUM_IMAGES_KEY): 1,
+                get_column_name(Database.EXPTIME_TOTAL_KEY): exptime,
+                get_column_name(Database.EXPTIME_KEY): exptime,
             }
+            filter = header.get(Database.FILTER_KEY)
+            if filter:
+                new[get_column_name(Database.FILTER_KEY)] = filter
+            telescop = header.get(Database.TELESCOP_KEY)
+            if telescop:
+                new[get_column_name(Database.TELESCOP_KEY)] = telescop
+            obj = header.get(Database.OBJECT_KEY)
+            if obj:
+                new[get_column_name(Database.OBJECT_KEY)] = obj
             session = self.db.get_session(new)
             self.db.upsert_session(new, existing=session)
-    def guess_sessions(
-        self, ref_session: SessionRow, want_type: str
-    ) -> list[SessionRow]:
+    def add_local_repo(self, path: str, repo_type: str | None = None) -> None:
+        """Add a local repository located at the specified path.  If necessary toml config files
+        will be created at the root of the repository."""
+        p = Path(path)
+        console = starbash.console
+        repo_toml = p / repo_suffix  # the starbash.toml file at the root of the repo
+        if repo_toml.exists():
+            logging.warning("Using existing repository config file: %s", repo_toml)
+        else:
+            if repo_type:
+                console.print(f"Creating {repo_type} repository: {p}")
+                p.mkdir(parents=True, exist_ok=True)
+                toml_from_template(
+                    f"repo/{repo_type}",
+                    p / repo_suffix,
+                    overrides={
+                        "REPO_TYPE": repo_type,
+                        "REPO_PATH": str(p),
+                    },
+                )
+            else:
+                # No type specified, therefore (for now) assume we are just using this as an input
+                # repo (and it must exist)
+                if not p.exists():
+                    console.print(f"[red]Error: Repo path does not exist: {p}[/red]")
+                    raise typer.Exit(code=1)
+        console.print(f"Adding repository: {p}")
+        repo = self.user_repo.add_repo_ref(p)
+        if repo:
+            self.reindex_repo(repo)
+            # we don't yet always write default config files at roots of repos, but it would be easy to add here
+            # r.write_config()
+            self.user_repo.write_config()
+    def guess_sessions(self, ref_session: SessionRow, want_type: str) -> list[SessionRow]:
         """Given a particular session type (i.e. FLAT or BIAS etc...) and an
         existing session (which is assumed to generally be a LIGHT frame based session):
@@ -267,17 +331,6 @@ class Starbash:
         """
         # Get reference image to access CCD-TEMP and DATE-OBS
-        metadata: dict = ref_session.get("metadata", {})
-        ref_temp = metadata.get("CCD-TEMP", None)
-        ref_date_str = metadata.get(Database.DATE_OBS_KEY)
-        # Parse reference date for time delta calculations
-        ref_date = None
-        if ref_date_str:
-            try:
-                ref_date = datetime.fromisoformat(ref_date_str)
-            except (ValueError, TypeError):
-                logging.warning(f"Malformed session ref date: {ref_date_str}")
         # Build search conditions - MUST match criteria
         conditions = {
@@ -286,13 +339,42 @@ class Starbash:
         }
         # For FLAT frames, filter must match the reference session
-        if want_type.upper() == "FLAT":
-            conditions[Database.FILTER_KEY] = ref_session[
-                get_column_name(Database.FILTER_KEY)
-            ]
+        if want_type.lower() == "flat":
+            conditions[Database.FILTER_KEY] = ref_session[get_column_name(Database.FILTER_KEY)]
         # Search for candidate sessions
-        candidates = self.db.search_session(where_tuple(conditions))
+        candidates = self.db.search_session(build_search_conditions(conditions))
+        return self.score_candidates(candidates, ref_session)
+    def score_candidates(
+        self, candidates: list[dict[str, Any]], ref_session: SessionRow
+    ) -> list[SessionRow]:
+        """Given a list of images or sessions, try to rank that list by desirability.
+        Return a list of possible images/sessions which would be acceptable.  The more desirable
+        matches are first in the list.  Possibly in the future I might have a 'score' and reason
+        given for each ranking.
+        The following critera MUST match to be acceptable:
+        * matches requested imagetyp.
+        * same filter as reference session (in the case want_type==FLAT only)
+        * same telescope as reference session
+        Quality is determined by (most important first):
+        * temperature of CCD-TEMP is closer to the reference session
+        * smaller DATE-OBS delta to the reference session
+        Eventually the code will check the following for 'nice to have' (but not now):
+        * TBD
+        Possibly eventually this code could be moved into recipes.
+        """
+        metadata: dict = ref_session.get("metadata", {})
+        ref_temp = metadata.get("CCD-TEMP", None)
+        ref_date_str = metadata.get(Database.DATE_OBS_KEY)
         # Now score and sort the candidates
         scored_candidates = []
@@ -318,61 +400,59 @@ class Starbash:
                             # If we can't parse temps, give a neutral score
                             score += 0
-                # Score by date/time proximity (secondary importance)
-                if ref_date is not None:
-                    candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
-                    if candidate_date_str:
-                        try:
-                            candidate_date = datetime.fromisoformat(candidate_date_str)
-                            time_delta = abs(
-                                (ref_date - candidate_date).total_seconds()
-                            )
-                            # Closer in time = better score
-                            # Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
-                            # Using 7-day half-life
-                            score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
-                        except (ValueError, TypeError):
-                            logging.warning(
-                                f"Could not parse candidate date: {candidate_date_str}"
-                            )
+                # Parse reference date for time delta calculations
+                candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
+                if ref_date_str and candidate_date_str:
+                    try:
+                        ref_date = datetime.fromisoformat(ref_date_str)
+                        candidate_date = datetime.fromisoformat(candidate_date_str)
+                        time_delta = abs((ref_date - candidate_date).total_seconds())
+                        # Closer in time = better score
+                        # Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
+                        # Using 7-day half-life
+                        score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
+                    except (ValueError, TypeError):
+                        logging.warning("Malformed date - ignoring entry")
                 scored_candidates.append((score, candidate))
             except (AssertionError, KeyError) as e:
                 # If we can't get the session image, log and skip this candidate
-                logging.warning(
-                    f"Could not score candidate session {candidate.get('id')}: {e}"
-                )
+                logging.warning(f"Could not score candidate session {candidate.get('id')}: {e}")
                 continue
-        # Sort by score (highest first) and return just the sessions
+        # Sort by score (highest first)
         scored_candidates.sort(key=lambda x: x[0], reverse=True)
-        return [candidate for score, candidate in scored_candidates]
+        return [candidate for _, candidate in scored_candidates]
-    def search_session(self) -> list[SessionRow]:
+    def search_session(self, conditions: list[SearchCondition] | None = None) -> list[SessionRow]:
         """Search for sessions, optionally filtered by the current selection."""
         # Get query conditions from selection
-        conditions = self.selection.get_query_conditions()
+        if conditions is None:
+            conditions = self.selection.get_query_conditions()
+        self.add_filter_not_masters(conditions)  # we never return processed masters as sessions
         return self.db.search_session(conditions)
-    def _reconstruct_image_path(self, image: ImageRow) -> ImageRow:
+    def _add_image_abspath(self, image: ImageRow) -> ImageRow:
         """Reconstruct absolute path from image row containing repo_url and relative path.
         Args:
             image: Image record with 'repo_url' and 'path' (relative) fields
         Returns:
-            Modified image record with 'path' as absolute path
+            Modified image record with 'abspath' as absolute path
         """
-        repo_url = image.get("repo_url")
-        relative_path = image.get("path")
+        if not image.get("abspath"):
+            repo_url = image.get(Database.REPO_URL_KEY)
+            relative_path = image.get("path")
-        if repo_url and relative_path:
-            repo = self.repo_manager.get_repo_by_url(repo_url)
-            if repo:
-                absolute_path = repo.resolve_path(relative_path)
-                image["path"] = str(absolute_path)
+            if repo_url and relative_path:
+                repo = self.repo_manager.get_repo_by_url(repo_url)
+                if repo:
+                    absolute_path = repo.resolve_path(relative_path)
+                    image["abspath"] = str(absolute_path)
         return image
@@ -380,15 +460,73 @@ class Starbash:
         """
         Get the reference ImageRow for a session with absolute path.
         """
+        from starbash.database import SearchCondition
         images = self.db.search_image(
-            {Database.ID_KEY: session[get_column_name(Database.IMAGE_DOC_KEY)]}
+            [SearchCondition("i.id", "=", session[get_column_name(Database.IMAGE_DOC_KEY)])]
         )
-        assert (
-            len(images) == 1
-        ), f"Expected exactly one reference for session, found {len(images)}"
-        return self._reconstruct_image_path(images[0])
+        assert len(images) == 1, f"Expected exactly one reference for session, found {len(images)}"
+        return self._add_image_abspath(images[0])
+    def get_master_images(
+        self, imagetyp: str | None = None, reference_session: SessionRow | None = None
+    ) -> list[ImageRow]:
+        """Return a list of the specified master imagetyp (bias, flat etc...)
+        (or any type if not specified).
+        The first image will be the 'best' remaining entries progressively worse matches.
+        (the following is not yet implemented)
+        If reference_session is provided it will be used to refine the search as follows:
+        * The telescope must match
+        * The image resolutions and binnings must match
+        * The filter must match (for FLAT frames only)
+        * Preferably the master date_obs would be either before or slightly after (<24 hrs) the reference session start time
+        * Preferably the master date_obs should be the closest in date to the reference session start time
+        * The camera temperature should be as close as possible to the reference session camera temperature
+        """
+        master_repo = self.repo_manager.get_repo_by_kind("master")
+        if master_repo is None:
+            logging.warning("No master repo configured - skipping master frame load.")
+            return []
+        # Search for images in the master repo only
+        from starbash.database import SearchCondition
+        search_conditions = [SearchCondition("r.url", "=", master_repo.url)]
+        if imagetyp:
+            search_conditions.append(SearchCondition("i.imagetyp", "=", imagetyp))
+        images = self.db.search_image(search_conditions)
-    def get_session_images(self, session: SessionRow) -> list[ImageRow]:
+        # FIXME - move this into a general filter function
+        # For flat frames, filter images based on matching reference_session filter
+        if reference_session and imagetyp and self.aliases.normalize(imagetyp) == "flat":
+            ref_filter = self.aliases.normalize(
+                reference_session.get(get_column_name(Database.FILTER_KEY), "None")
+            )
+            if ref_filter:
+                # Filter images to only those with matching filter in metadata
+                filtered_images = []
+                for img in images:
+                    img_filter = img.get(Database.FILTER_KEY, "None")
+                    if img_filter == ref_filter:
+                        filtered_images.append(img)
+                images = filtered_images
+        return images
+    def add_filter_not_masters(self, conditions: list[SearchCondition]) -> None:
+        """Add conditions to filter out master and processed repos from image searches."""
+        master_repo = self.repo_manager.get_repo_by_kind("master")
+        if master_repo is not None:
+            conditions.append(SearchCondition("r.url", "<>", master_repo.url))
+        processed_repo = self.repo_manager.get_repo_by_kind("processed")
+        if processed_repo is not None:
+            conditions.append(SearchCondition("r.url", "<>", processed_repo.url))
+    def get_session_images(self, session: SessionRow, processed_ok: bool = False) -> list[ImageRow]:
         """
         Get all images belonging to a specific session.
@@ -399,6 +537,9 @@ class Starbash:
         Args:
             session_id: The database ID of the session
+            processed_ok: If True, include images which were processed by apps (i.e. stacked or other procesing)
+            Normally image pipelines don't want to accidentially consume those files.
         Returns:
             List of image records (dictionaries with path, metadata, etc.)
             Returns empty list if session not found or has no images.
@@ -406,20 +547,48 @@ class Starbash:
         Raises:
             ValueError: If session_id is not found in the database
         """
-        # Query images that match ALL session criteria including date range
-        conditions = {
-            Database.FILTER_KEY: session[get_column_name(Database.FILTER_KEY)],
-            Database.IMAGETYP_KEY: session[get_column_name(Database.IMAGETYP_KEY)],
-            Database.OBJECT_KEY: session[get_column_name(Database.OBJECT_KEY)],
-            Database.TELESCOP_KEY: session[get_column_name(Database.TELESCOP_KEY)],
-            "date_start": session[get_column_name(Database.START_KEY)],
-            "date_end": session[get_column_name(Database.END_KEY)],
-        }
+        from starbash.database import SearchCondition
-        # Single query with all conditions
+        # Query images that match ALL session criteria including date range
+        # Note: We need to search JSON metadata for FILTER, IMAGETYP, OBJECT, TELESCOP
+        # since they're not indexed columns in the images table
+        conditions = [
+            SearchCondition("i.date_obs", ">=", session[get_column_name(Database.START_KEY)]),
+            SearchCondition("i.date_obs", "<=", session[get_column_name(Database.END_KEY)]),
+            SearchCondition("i.imagetyp", "=", session[get_column_name(Database.IMAGETYP_KEY)]),
+        ]
+        # Note: not needed here, because we filter this earlier - when building the
+        # list of candidate sessions.
+        # we never want to return 'master' or 'processed' images as part of the session image paths
+        # (because we will be passing these tool siril or whatever to generate masters or
+        # some other downstream image)
+        # self.add_filter_not_masters(conditions)
+        # Single query with indexed date conditions
         images = self.db.search_image(conditions)
+        # We no lognger filter by target(object) because it might not be set anyways
+        filtered_images = []
+        for img in images:
+            # "HISTORY" nodes are added by processing tools (Siril etc...), we never want to accidentally read those images
+            has_history = img.get("HISTORY")
+            # images that were stacked seem to always have a STACKCNT header set
+            is_stacked = img.get("STACKCNT")
+            if (
+                img.get(Database.FILTER_KEY) == session[get_column_name(Database.FILTER_KEY)]
+                # and img.get(Database.OBJECT_KEY)
+                # == session[get_column_name(Database.OBJECT_KEY)]
+                and img.get(Database.TELESCOP_KEY)
+                == session[get_column_name(Database.TELESCOP_KEY)]
+                and (processed_ok or (not has_history and not is_stacked))
+            ):
+                filtered_images.append(img)
         # Reconstruct absolute paths for all images
-        return [self._reconstruct_image_path(img) for img in images] if images else []
+        return [self._add_image_abspath(img) for img in filtered_images]
     def remove_repo_ref(self, url: str) -> None:
         """
@@ -437,7 +606,7 @@ class Starbash:
         repo_refs = self.user_repo.config.get("repo-ref")
         if not repo_refs:
-            raise ValueError(f"No repository references found in user configuration.")
+            raise ValueError("No repository references found in user configuration.")
         # Find and remove the matching repo-ref
         found = False
@@ -447,6 +616,7 @@ class Starbash:
             # Match by converting to file:// URL format if needed
             if ref_dir == url or f"file://{ref_dir}" == url:
                 repo_refs.remove(ref)
                 found = True
                 break
@@ -456,24 +626,91 @@ class Starbash:
         # Write the updated config
         self.user_repo.write_config()
-    def reindex_repo(self, repo: Repo, force: bool = False):
+    def add_image(self, repo: Repo, f: Path, force: bool = False) -> dict[str, Any] | None:
+        """Read FITS header from file and add/update image entry in the database."""
+        path = repo.get_path()
+        if not path:
+            raise ValueError(f"Repo path not found for {repo}")
+        whitelist = None
+        config = self.repo_manager.merged.get("config")
+        if config:
+            whitelist = config.get("fits-whitelist", None)
+        # Convert absolute path to relative path within repo
+        relative_path = f.relative_to(path)
+        found = self.db.get_image(repo.url, str(relative_path))
+        # for debugging sometimes we want to limit scanning to a single directory or file
+        # debug_target = "masters-raw/2025-09-09/DARK"
+        debug_target = None
+        if debug_target:
+            if str(relative_path).startswith(debug_target):
+                logging.error("Debugging %s...", f)
+                found = False
+            else:
+                found = True  # skip processing
+                force = False
+        if not found or force:
+            # Read and log the primary header (HDU 0)
+            with fits.open(str(f), memmap=False) as hdul:
+                # convert headers to dict
+                hdu0: Any = hdul[0]
+                header = hdu0.header
+                if type(header).__name__ == "Unknown":
+                    raise ValueError("FITS header has Unknown type: %s", f)
+                items = header.items()
+                headers = {}
+                for key, value in items:
+                    if (not whitelist) or (key in whitelist):
+                        headers[key] = value
+                # Some device software (old Asiair versions) fails to populate TELESCOP, in that case fall back to
+                # CREATOR (see doc/fits/malformedasimaster.txt for an example)
+                if Database.TELESCOP_KEY not in headers:
+                    creator = headers.get("CREATOR")
+                    if creator:
+                        headers[Database.TELESCOP_KEY] = creator
+                logging.debug("Headers for %s: %s", f, headers)
+                # Store relative path in database
+                headers["path"] = str(relative_path)
+                image_doc_id = self.db.upsert_image(headers, repo.url)
+                headers[Database.ID_KEY] = image_doc_id
+                if not found:
+                    return headers
+        return None
+    def add_image_and_session(self, repo: Repo, f: Path, force: bool = False) -> None:
+        """Read FITS header from file and add/update image entry in the database."""
+        headers = self.add_image(repo, f, force=force)
+        if headers:
+            # Update the session infos, but ONLY on first file scan
+            # (otherwise invariants will get messed up)
+            self._add_session(headers)
+    def reindex_repo(self, repo: Repo, subdir: str | None = None):
         """Reindex all repositories managed by the RepoManager."""
         # make sure this new repo is listed in the repos table
         self.repo_db_update()  # not really ideal, a more optimal version would just add the new repo
-        # FIXME, add a method to get just the repos that contain images
-        if repo.is_scheme("file") and repo.kind != "recipe":
-            logging.debug("Reindexing %s...", repo.url)
+        path = repo.get_path()
-            whitelist = None
-            config = self.repo_manager.merged.get("config")
-            if config:
-                whitelist = config.get("fits-whitelist", None)
+        repo_kind = repo.kind()
+        if path and repo.is_scheme("file") and repo_kind != "recipe":
+            logging.debug("Reindexing %s...", repo.url)
-            path = repo.get_path()
-            if not path:
-                raise ValueError(f"Repo path not found for {repo}")
+            if subdir:
+                path = path / subdir
+                # used to debug
             # Find all FITS files under this repo path
             for f in track(
@@ -481,350 +718,152 @@ class Starbash:
                 description=f"Indexing {repo.url}...",
             ):
                 # progress.console.print(f"Indexing {f}...")
-                try:
-                    # Convert absolute path to relative path within repo
-                    relative_path = f.relative_to(path)
-                    found = self.db.get_image(repo.url, str(relative_path))
-                    if not found or force:
-                        # Read and log the primary header (HDU 0)
-                        with fits.open(str(f), memmap=False) as hdul:
-                            # convert headers to dict
-                            hdu0: Any = hdul[0]
-                            header = hdu0.header
-                            if type(header).__name__ == "Unknown":
-                                raise ValueError("FITS header has Unknown type: %s", f)
-                            items = header.items()
-                            headers = {}
-                            for key, value in items:
-                                if (not whitelist) or (key in whitelist):
-                                    headers[key] = value
-                            logging.debug("Headers for %s: %s", f, headers)
-                            # Store relative path in database
-                            headers["path"] = str(relative_path)
-                            image_doc_id = self.db.upsert_image(headers, repo.url)
-                            if not found:
-                                # Update the session infos, but ONLY on first file scan
-                                # (otherwise invariants will get messed up)
-                                self._add_session(str(f), image_doc_id, header)
-                except Exception as e:
-                    logging.warning("Failed to read FITS header for %s: %s", f, e)
-    def reindex_repos(self, force: bool = False):
+                if repo_kind == "master":
+                    # for master repos we only add to the image table
+                    self.add_image(repo, f, force=True)
+                elif repo_kind == "processed":
+                    pass  # we never add processed images to our db
+                else:
+                    self.add_image_and_session(repo, f, force=starbash.force_regen)
+    def reindex_repos(self):
         """Reindex all repositories managed by the RepoManager."""
         logging.debug("Reindexing all repositories...")
         for repo in track(self.repo_manager.repos, description="Reindexing repos..."):
-            self.reindex_repo(repo, force=force)
-    def run_all_stages(self):
-        """On the currently active session, run all processing stages"""
-        logging.info("--- Running all stages ---")
+            self.reindex_repo(repo)
-        # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
-        pipeline_definitions = self.repo_manager.merged.getall("stages")
-        flat_pipeline_steps = list(itertools.chain.from_iterable(pipeline_definitions))
+    def get_recipes(self) -> list[Repo]:
+        """Get all recipe repos available, sorted by priority (lower number first).
-        # 2. Sort the pipeline steps by their 'priority' field.
-        try:
-            sorted_pipeline = sorted(flat_pipeline_steps, key=lambda s: s["priority"])
-        except KeyError as e:
-            # Re-raise as a ValueError with a more descriptive message.
-            raise ValueError(
-                f"invalid stage definition: a stage is missing the required 'priority' key"
-            ) from e
-        logging.info(
-            f"Found {len(sorted_pipeline)} pipeline steps to run in order of priority."
-        )
-        self.init_context()
-        # 4. Iterate through the sorted pipeline and execute the associated tasks.
-        for step in sorted_pipeline:
-            step_name = step.get("name")
-            if not step_name:
-                raise ValueError("Invalid pipeline step found: missing 'name' key.")
-            self.run_pipeline_step(step_name)
-    def run_pipeline_step(self, step_name: str):
-        logging.info(f"--- Running pipeline step: '{step_name}' ---")
-        # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
-        task_definitions = self.repo_manager.merged.getall("stage")
-        all_tasks = list(itertools.chain.from_iterable(task_definitions))
-        # Find all tasks that should run during this pipeline step.
-        tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
-        for task in tasks_to_run:
-            self.run_stage(task)
-    def run_master_stages(self):
-        """Generate any missing master frames
-        Steps:
-        * set all_tasks to be all tasks for when == "setup.masters"
-        * loop over all currently unfiltered sessions
-        * for each session loop across all_tasks
-        * if task input.type == the imagetyp for this current session
-        *    add_input_to_context() add the input files to the context (from the session)
-        *    run_stage(task) to generate the new master frame
+        Recipes without a priority are placed at the end of the list.
         """
-        sessions = self.search_session()
-        for session in sessions:
-            imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
-            logging.debug(
-                f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
-            )
+        recipes = [r for r in self.repo_manager.repos if r.kind() == "recipe"]
-            # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
-            task_definitions = self.repo_manager.merged.getall("stage")
-            all_tasks = list(itertools.chain.from_iterable(task_definitions))
-            # Find all tasks that should run during the "setup.masters" step.
-            tasks_to_run = [
-                task for task in all_tasks if task.get("when") == "setup.masters"
-            ]
-            for task in tasks_to_run:
-                input_config = task.get("input", {})
-                input_type = input_config.get("type")
-                if imagetyp_equals(input_type, imagetyp):
-                    logging.info(
-                        f"  Running master stage task for imagetyp '{imagetyp}'"
-                    )
+        # Sort recipes by priority (lower number first). If no priority specified,
+        # use float('inf') to push those to the end of the list.
+        def priority_key(r: Repo) -> float:
+            priority = r.get("recipe.priority")
+            return float(priority) if priority is not None else float("inf")
-                    # Create a default process dir in /tmp, though more advanced 'session' based workflows will
-                    # probably override this and place it somewhere persistent.
-                    with tempfile.TemporaryDirectory(prefix="session_tmp_") as temp_dir:
-                        logging.debug(
-                            f"Created temporary session directory: {temp_dir}"
-                        )
-                        self.init_context()
-                        self.context["process_dir"] = temp_dir
-                        self.add_session_to_context(session)
-                        self.run_stage(task)
-    def init_context(self) -> None:
-        """Do common session init"""
-        # Context is preserved through all stages, so each stage can add new symbols to it for use by later stages
-        self.context = {}
-        # Update the context with runtime values.
-        runtime_context = {
-            "masters": "/workspaces/starbash/images/masters",  # FIXME find this the correct way
-        }
-        self.context.update(runtime_context)
-    def add_session_to_context(self, session: SessionRow) -> None:
-        """adds to context from the indicated session:
-        * input_files - all of the files mentioned in the session
-        * instrument - for the session
-        * date - the localtimezone date of the session
-        * imagetyp - the imagetyp of the session
-        * session - the current session row (joined with a typical image) (can be used to
-        find things like telescope, temperature ...)
-        """
-        # Get images for this session
-        images = self.get_session_images(session)
-        logging.debug(f"Adding {len(images)} files as context.input_files")
-        self.context["input_files"] = [
-            img["path"] for img in images
-        ]  # Pass in the file list via the context dict
-        # it is okay to give them the actual session row, because we're never using it again
-        self.context["session"] = session
+        recipes.sort(key=priority_key)
-        instrument = session.get(get_column_name(Database.TELESCOP_KEY))
-        if instrument:
-            self.context["instrument"] = instrument
+        return recipes
-        imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
-        if imagetyp:
-            self.context["imagetyp"] = imagetyp
-        date = session.get(get_column_name(Database.START_KEY))
-        if date:
-            self.context["date"] = to_shortdate(date)
-    def add_input_files(self, stage: dict) -> None:
-        """adds to context.input_files based on the stage input config"""
-        input_config = stage.get("input")
-        input_required = False
-        if input_config:
-            # if there is an "input" dict, we assume input.required is true if unset
-            input_required = input_config.get("required", True)
-            source = input_config.get("source")
-            if source is None:
-                raise ValueError(
-                    f"Stage '{stage.get('name')}' has invalid 'input' configuration: missing 'source'"
-                )
-            if source == "path":
-                # The path might contain context variables that need to be expanded.
-                # path_pattern = expand_context(input_config["path"], context)
-                path_pattern = input_config["path"]
-                input_files = glob.glob(path_pattern, recursive=True)
-                self.context["input_files"] = (
-                    input_files  # Pass in the file list via the context dict
-                )
-            elif source == "repo":
-                # We expect that higher level code has already added the correct input files
-                # to the context
-                if not "input_files" in self.context:
-                    raise RuntimeError(
-                        "Input config specifies 'repo' but no 'input_files' found in context"
-                    )
-            else:
-                raise ValueError(
-                    f"Stage '{stage.get('name')}' has invalid 'input' source: {source}"
-                )
-            # FIXME compare context.output to see if it already exists and is newer than the input files, if so skip processing
-        else:
-            # The script doesn't mention input, therefore assume it doesn't want input_files
-            if "input_files" in self.context:
-                del self.context["input_files"]
+    def get_recipe_for_session(self, session: SessionRow, step: dict[str, Any]) -> Repo | None:
+        """Try to find a recipe that can be used to process the given session for the given step name
+        (master-dark, master-bias, light, stack, etc...)
-        if input_required and not "input_files" in self.context:
-            raise RuntimeError("No input files found for stage")
+        * if a recipe doesn't have a matching recipe.stage.<step_name> it is not considered
+        * As part of this checking we will look at recipe.auto.require.* conditions to see if the recipe
+        is suitable for this session.
+        * the imagetyp of this session matches step.input
-    def add_output_path(self, stage: dict) -> None:
-        """Adds output path information to context based on the stage output config.
-        Sets the following context variables:
-        - context.output.root_path - base path of the destination repo
-        - context.output.base_path - full path without file extension
-        - context.output.suffix - file extension (e.g., .fits or .fit.gz)
-        - context.output.full_path - complete output file path
+        Currently we return just one Repo but eventually we should support multiple matching recipes
+        and make the user pick (by throwing an exception?).
         """
-        output_config = stage.get("output")
-        if not output_config:
-            # No output configuration, remove any existing output from context
-            if "output" in self.context:
-                del self.context["output"]
-            return
+        # Get all recipe repos - FIXME add a getall(kind) to RepoManager
+        recipe_repos = self.get_recipes()
-        dest = output_config.get("dest")
-        if not dest:
-            raise ValueError(
-                f"Stage '{stage.get('description', 'unknown')}' has 'output' config but missing 'dest'"
-            )
-        if dest == "repo":
-            # Find the destination repo by type/kind
-            output_type = output_config.get("type")
-            if not output_type:
-                raise ValueError(
-                    f"Stage '{stage.get('description', 'unknown')}' has output.dest='repo' but missing 'type'"
-                )
+        step_name = step.get("name")
+        if not step_name:
+            raise ValueError("Invalid pipeline step found: missing 'name' key.")
-            # Find the repo with matching kind
-            dest_repo = self.repo_manager.get_repo_by_kind(output_type)
-            if not dest_repo:
-                raise ValueError(
-                    f"No repository found with kind '{output_type}' for output destination"
-                )
+        input_name = step.get("input")
+        if not input_name:
+            raise ValueError("Invalid pipeline step found: missing 'input' key.")
-            repo_base = dest_repo.get_path()
-            if not repo_base:
-                raise ValueError(f"Repository '{dest_repo.url}' has no filesystem path")
+        # if input type is recipe we don't check for filetype match - because we'll just use files already in
+        # the tempdir
+        if input_name != "recipe":
+            imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
-            repo_relative: str | None = dest_repo.get("repo.relative")
-            if not repo_relative:
-                raise ValueError(
-                    f"Repository '{dest_repo.url}' is missing 'repo.relative' configuration"
+            if not imagetyp or input_name != self.aliases.normalize(imagetyp):
+                logging.debug(
+                    f"Session imagetyp '{imagetyp}' does not match step input '{input_name}', skipping"
                 )
+                return None
-            # we support context variables in the relative path
-            repo_relative = expand_context_unsafe(repo_relative, self.context)
-            full_path = repo_base / repo_relative
-            # base_path but without spaces - because Siril doesn't like that
-            full_path = Path(str(full_path).replace(" ", r"_"))
-            base_path = full_path.parent / full_path.stem
-            # Set context variables as documented in the TOML
-            self.context["output"] = {
-                # "root_path": repo_relative, not needed I think
-                "base_path": base_path,
-                # "suffix": full_path.suffix, not needed I think
-                "full_path": full_path,
-            }
+        # Get session metadata for checking requirements
+        session_metadata = session.get("metadata", {})
-        else:
-            raise ValueError(
-                f"Unsupported output destination type: {dest}. Only 'repo' is currently supported."
-            )
-    def run_stage(self, stage: dict) -> None:
-        """
-        Executes a single processing stage.
+        for repo in recipe_repos:
+            # Check if this recipe has the requested stage
+            stage_config = repo.get(f"recipe.stage.{step_name}")
+            if not stage_config:
+                logging.debug(f"Recipe {repo.url} does not have stage '{step_name}', skipping")
+                continue
-        Args:
-            stage: A dictionary representing the stage configuration, containing
-                   at least 'tool' and 'script' keys.
-        """
-        stage_desc = stage.get("description", "(missing description)")
-        stage_disabled = stage.get("disabled", False)
-        if stage_disabled:
-            logging.info(f"Skipping disabled stage: {stage_desc}")
-            return
+            # Check auto.require conditions if they exist
-        logging.info(f"Running stage: {stage_desc}")
+            # If requirements are specified, check if session matches
+            required_filters = repo.get("recipe.auto.require.filter", [])
+            if required_filters:
+                session_filter = self.aliases.normalize(
+                    session_metadata.get(Database.FILTER_KEY), lenient=True
+                )
-        tool_name = stage.get("tool")
-        if not tool_name:
-            raise ValueError(
-                f"Stage '{stage.get('name')}' is missing a 'tool' definition."
-            )
-        tool: Tool | None = tools.get(tool_name)
-        if not tool:
-            raise ValueError(
-                f"Tool '{tool_name}' for stage '{stage.get('name')}' not found."
-            )
-        logging.debug(f"  Using tool: {tool_name}")
+                # Session must have AT LEAST one filter that matches one of the required filters
+                if not session_filter or session_filter not in required_filters:
+                    logging.debug(
+                        f"Recipe {repo.url} requires filters {required_filters}, "
+                        f"session has '{session_filter}', skipping"
+                    )
+                    continue
-        script_filename = stage.get("script-file", tool.default_script_file)
-        if script_filename:
-            source = stage.source  # type: ignore (was monkeypatched by repo)
-            script = source.read(script_filename)
-        else:
-            script = stage.get("script")
+            required_color = repo.get("recipe.auto.require.color", False)
+            if required_color:
+                session_bayer = session_metadata.get("BAYERPAT")
-        if script is None:
-            raise ValueError(
-                f"Stage '{stage.get('name')}' is missing a 'script' or 'script-file' definition."
-            )
+                # Session must be color (i.e. have a BAYERPAT header)
+                if not session_bayer:
+                    logging.debug(
+                        f"Recipe {repo.url} requires a color camera, "
+                        f"but session has no BAYERPAT header, skipping"
+                    )
+                    continue
+            required_cameras = repo.get("recipe.auto.require.camera", [])
+            if required_cameras:
+                session_camera = self.aliases.normalize(
+                    session_metadata.get("INSTRUME"), lenient=True
+                )  # Camera identifier
+                # Session must have a camera that matches one of the required cameras
+                if not session_camera or session_camera not in required_cameras:
+                    logging.debug(
+                        f"Recipe {repo.url} requires cameras {required_cameras}, "
+                        f"session has '{session_camera}', skipping"
+                    )
+                    continue
-        # This allows recipe TOML to define their own default variables.
-        # (apply all of the changes to context that the task demands)
-        stage_context = stage.get("context", {})
-        self.context.update(stage_context)
-        self.add_input_files(stage)
-        self.add_output_path(stage)
-        # if the output path already exists and is newer than all input files, skip processing
-        output_info: dict | None = self.context.get("output")
-        if output_info:
-            output_path = output_info.get("full_path")
-            if output_path and os.path.exists(output_path):
-                logging.info(
-                    f"Output file already exists, skipping processing: {output_path}"
-                )
-                return
+            # This recipe matches!
+            logging.info(f"Selected recipe {repo.url} for stage '{step_name}' ")
+            return repo
-        tool.run_in_temp_dir(script, context=self.context)
+        # No matching recipe found
+        return None
-        # verify context.output was created if it was specified
-        output_info: dict | None = self.context.get("output")
-        if output_info:
-            output_path = output_info.get("full_path")
+    def filter_sessions_with_lights(self, sessions: list[SessionRow]) -> list[SessionRow]:
+        """Filter sessions to only those that contain light frames."""
+        filtered_sessions: list[SessionRow] = []
+        for s in sessions:
+            imagetyp_val = s.get(get_column_name(Database.IMAGETYP_KEY))
+            if imagetyp_val is None:
+                continue
+            if self.aliases.normalize(str(imagetyp_val)) == "light":
+                filtered_sessions.append(s)
+        return filtered_sessions
-            if not output_path or not os.path.exists(output_path):
-                raise RuntimeError(f"Expected output file not found: {output_path}")
+    def filter_sessions_by_target(
+        self, sessions: list[SessionRow], target: str
+    ) -> list[SessionRow]:
+        """Filter sessions to only those that match the given target name."""
+        filtered_sessions: list[SessionRow] = []
+        for s in sessions:
+            obj_val = s.get(get_column_name(Database.OBJECT_KEY))
+            if obj_val is None:
+                continue
+            if normalize_target_name(str(obj_val)) == target:
+                filtered_sessions.append(s)
+        return filtered_sessions

starbash 0.1.9__py3-none-any.whl → 0.1.15__py3-none-any.whl

starbash 0.1.9py3-none-any.whl → 0.1.15py3-none-any.whl