PyPI - starbash - Versions diffs - 0.1.9__py3-none-any.whl → 0.1.10__py3-none-any.whl - Mend

starbash 0.1.9py3-none-any.whl → 0.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of starbash might be problematic. Click here for more details.

Files changed (24) hide show

starbash/aliases.py +100 -0
starbash/app.py +412 -189
starbash/commands/info.py +69 -0
starbash/commands/repo.py +19 -3
starbash/commands/select.py +8 -1
starbash/database.py +187 -94
starbash/defaults/starbash.toml +21 -3
starbash/paths.py +18 -2
starbash/recipes/master_bias/starbash.toml +10 -14
starbash/recipes/master_dark/starbash.toml +36 -0
starbash/recipes/master_flat/starbash.toml +27 -17
starbash/recipes/osc_dual_duo/starbash.py +1 -5
starbash/recipes/osc_dual_duo/starbash.toml +8 -4
starbash/recipes/osc_single_duo/starbash.toml +4 -4
starbash/recipes/starbash.toml +23 -3
starbash/selection.py +6 -1
starbash/templates/repo/processed.toml +10 -0
starbash/tool.py +127 -67
{starbash-0.1.9.dist-info → starbash-0.1.10.dist-info}/METADATA +5 -3
starbash-0.1.10.dist-info/RECORD +40 -0
starbash-0.1.9.dist-info/RECORD +0 -37
{starbash-0.1.9.dist-info → starbash-0.1.10.dist-info}/WHEEL +0 -0
{starbash-0.1.9.dist-info → starbash-0.1.10.dist-info}/entry_points.txt +0 -0
{starbash-0.1.9.dist-info → starbash-0.1.10.dist-info}/licenses/LICENSE +0 -0

starbash/app.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import cmd
 import logging
 from importlib import resources
 import os
@@ -19,6 +20,7 @@ import copy
 import starbash
 from starbash import console, _is_test_env, to_shortdate
+from starbash.aliases import Aliases
 from starbash.database import Database, SessionRow, ImageRow, get_column_name
 from repo import Repo, repo_suffix
 from starbash.toml import toml_from_template
@@ -70,7 +72,11 @@ def create_user() -> Path:
 def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
-    """Copy images to the specified output directory (using symbolic links if possible)."""
+    """Copy images to the specified output directory (using symbolic links if possible).
+    This function requires that "abspath" already be populated in each ImageRow.  Normally
+    the caller does this by calling Starbash._add_image_abspath() on the image.
+    """
     # Export images
     console.print(f"[cyan]Exporting {len(images)} images to {output_dir}...[/cyan]")
@@ -81,7 +87,7 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
     for image in images:
         # Get the source path from the image metadata
-        source_path = Path(image.get("path", ""))
+        source_path = Path(image.get("abspath", ""))
         if not source_path.exists():
             console.print(f"[red]Warning: Source file not found: {source_path}[/red]")
@@ -118,14 +124,6 @@ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
         console.print(f"  [red]Errors: {error_count} files[/red]")
-def imagetyp_equals(imagetyp1: str, imagetyp2: str) -> bool:
-    """Imagetyps (BIAS, Dark, FLAT, flats) have a number of slightly different convetions.
-    Do a sloppy equality check.
-    Eventually handle non english variants by using the repos aliases table."""
-    return imagetyp1.strip().lower() == imagetyp2.strip().lower()
 class Starbash:
     """The main Starbash application class."""
@@ -138,12 +136,29 @@ class Starbash:
         logging.info("Starbash starting...")
         # Load app defaults and initialize the repository manager
+        self._init_repos()
+        self._init_analytics(cmd)
+        self._init_aliases()
+        logging.info(
+            f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
+        )
+        # self.repo_manager.dump()
+        self._db = None  # Lazy initialization - only create when accessed
+        # Initialize selection state (stored in user config repo)
+        self.selection = Selection(self.user_repo)
+    def _init_repos(self) -> None:
+        """Initialize all repositories managed by the RepoManager."""
         self.repo_manager = RepoManager()
         self.repo_manager.add_repo("pkg://defaults")
         # Add user prefs as a repo
         self.user_repo = self.repo_manager.add_repo("file://" + str(create_user()))
+    def _init_analytics(self, cmd: str) -> None:
         self.analytics = NopAnalytics()
         if self.user_repo.get("analytics.enabled", True):
             include_user = self.user_repo.get("analytics.include_user", False)
@@ -157,19 +172,10 @@ class Starbash:
             self.analytics = analytics_start_transaction(name="App session", op=cmd)
             self.analytics.__enter__()
-        logging.info(
-            f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
-        )
-        # self.repo_manager.dump()
-        self._db = None  # Lazy initialization - only create when accessed
-        self.session_query = None  # None means search all sessions
-        # Initialize selection state (stored in user config repo)
-        self.selection = Selection(self.user_repo)
-        # FIXME, call reindex somewhere and also index whenever new repos are added
-        # self.reindex_repos()
+    def _init_aliases(self) -> None:
+        alias_dict = self.repo_manager.get("aliases", {})
+        assert isinstance(alias_dict, dict), "Aliases config must be a dictionary"
+        self.aliases = Aliases(alias_dict)
     @property
     def db(self) -> Database:
@@ -214,30 +220,42 @@ class Starbash:
         self.close()
         return handled
-    def _add_session(self, f: str, image_doc_id: int, header: dict) -> None:
+    def _add_session(self, image_doc_id: int, header: dict) -> None:
         """We just added a new image, create or update its session entry as needed."""
-        filter = header.get(Database.FILTER_KEY, "unspecified")
         image_type = header.get(Database.IMAGETYP_KEY)
         date = header.get(Database.DATE_OBS_KEY)
         if not date or not image_type:
             logging.warning(
-                "Image %s missing either DATE-OBS or IMAGETYP FITS header, skipping...",
-                f,
+                "Image '%s' missing either DATE-OBS or IMAGETYP FITS header, skipping...",
+                header.get("path", "unspecified"),
             )
         else:
             exptime = header.get(Database.EXPTIME_KEY, 0)
-            telescop = header.get(Database.TELESCOP_KEY, "unspecified")
             new = {
-                Database.FILTER_KEY: filter,
-                Database.START_KEY: date,
-                Database.END_KEY: date,  # FIXME not quite correct, should be longer by exptime
-                Database.IMAGE_DOC_KEY: image_doc_id,
-                Database.IMAGETYP_KEY: image_type,
-                Database.NUM_IMAGES_KEY: 1,
-                Database.EXPTIME_TOTAL_KEY: exptime,
-                Database.OBJECT_KEY: header.get(Database.OBJECT_KEY, "unspecified"),
-                Database.TELESCOP_KEY: telescop,
+                get_column_name(Database.START_KEY): date,
+                get_column_name(
+                    Database.END_KEY
+                ): date,  # FIXME not quite correct, should be longer by exptime
+                get_column_name(Database.IMAGE_DOC_KEY): image_doc_id,
+                get_column_name(Database.IMAGETYP_KEY): image_type,
+                get_column_name(Database.NUM_IMAGES_KEY): 1,
+                get_column_name(Database.EXPTIME_TOTAL_KEY): exptime,
+                get_column_name(Database.EXPTIME_KEY): exptime,
             }
+            filter = header.get(Database.FILTER_KEY)
+            if filter:
+                new[get_column_name(Database.FILTER_KEY)] = filter
+            telescop = header.get(Database.TELESCOP_KEY)
+            if telescop:
+                new[get_column_name(Database.TELESCOP_KEY)] = telescop
+            obj = header.get(Database.OBJECT_KEY)
+            if obj:
+                new[get_column_name(Database.OBJECT_KEY)] = obj
             session = self.db.get_session(new)
             self.db.upsert_session(new, existing=session)
@@ -267,17 +285,6 @@ class Starbash:
         """
         # Get reference image to access CCD-TEMP and DATE-OBS
-        metadata: dict = ref_session.get("metadata", {})
-        ref_temp = metadata.get("CCD-TEMP", None)
-        ref_date_str = metadata.get(Database.DATE_OBS_KEY)
-        # Parse reference date for time delta calculations
-        ref_date = None
-        if ref_date_str:
-            try:
-                ref_date = datetime.fromisoformat(ref_date_str)
-            except (ValueError, TypeError):
-                logging.warning(f"Malformed session ref date: {ref_date_str}")
         # Build search conditions - MUST match criteria
         conditions = {
@@ -294,6 +301,37 @@ class Starbash:
         # Search for candidate sessions
         candidates = self.db.search_session(where_tuple(conditions))
+        return self.score_candidates(candidates, ref_session)
+    def score_candidates(
+        self, candidates: list[dict[str, Any]], ref_session: SessionRow
+    ) -> list[SessionRow]:
+        """Given a list of images or sessions, try to rank that list by desirability.
+        Return a list of possible images/sessions which would be acceptable.  The more desirable
+        matches are first in the list.  Possibly in the future I might have a 'score' and reason
+        given for each ranking.
+        The following critera MUST match to be acceptable:
+        * matches requested imagetyp.
+        * same filter as reference session (in the case want_type==FLAT only)
+        * same telescope as reference session
+        Quality is determined by (most important first):
+        * temperature of CCD-TEMP is closer to the reference session
+        * smaller DATE-OBS delta to the reference session
+        Eventually the code will check the following for 'nice to have' (but not now):
+        * TBD
+        Possibly eventually this code could be moved into recipes.
+        """
+        metadata: dict = ref_session.get("metadata", {})
+        ref_temp = metadata.get("CCD-TEMP", None)
+        ref_date_str = metadata.get(Database.DATE_OBS_KEY)
         # Now score and sort the candidates
         scored_candidates = []
@@ -318,23 +356,19 @@ class Starbash:
                             # If we can't parse temps, give a neutral score
                             score += 0
-                # Score by date/time proximity (secondary importance)
-                if ref_date is not None:
-                    candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
-                    if candidate_date_str:
-                        try:
-                            candidate_date = datetime.fromisoformat(candidate_date_str)
-                            time_delta = abs(
-                                (ref_date - candidate_date).total_seconds()
-                            )
-                            # Closer in time = better score
-                            # Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
-                            # Using 7-day half-life
-                            score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
-                        except (ValueError, TypeError):
-                            logging.warning(
-                                f"Could not parse candidate date: {candidate_date_str}"
-                            )
+                # Parse reference date for time delta calculations
+                candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
+                if ref_date_str and candidate_date_str:
+                    try:
+                        ref_date = datetime.fromisoformat(ref_date_str)
+                        candidate_date = datetime.fromisoformat(candidate_date_str)
+                        time_delta = abs((ref_date - candidate_date).total_seconds())
+                        # Closer in time = better score
+                        # Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
+                        # Using 7-day half-life
+                        score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
+                    except (ValueError, TypeError):
+                        logging.warning(f"Malformed date - ignoring entry")
                 scored_candidates.append((score, candidate))
@@ -345,10 +379,10 @@ class Starbash:
                 )
                 continue
-        # Sort by score (highest first) and return just the sessions
+        # Sort by score (highest first)
         scored_candidates.sort(key=lambda x: x[0], reverse=True)
-        return [candidate for score, candidate in scored_candidates]
+        return [candidate for _, candidate in scored_candidates]
     def search_session(self) -> list[SessionRow]:
         """Search for sessions, optionally filtered by the current selection."""
@@ -356,23 +390,24 @@ class Starbash:
         conditions = self.selection.get_query_conditions()
         return self.db.search_session(conditions)
-    def _reconstruct_image_path(self, image: ImageRow) -> ImageRow:
+    def _add_image_abspath(self, image: ImageRow) -> ImageRow:
         """Reconstruct absolute path from image row containing repo_url and relative path.
         Args:
             image: Image record with 'repo_url' and 'path' (relative) fields
         Returns:
-            Modified image record with 'path' as absolute path
+            Modified image record with 'abspath' as absolute path
         """
-        repo_url = image.get("repo_url")
-        relative_path = image.get("path")
+        if not image.get("abspath"):
+            repo_url = image.get(Database.REPO_URL_KEY)
+            relative_path = image.get("path")
-        if repo_url and relative_path:
-            repo = self.repo_manager.get_repo_by_url(repo_url)
-            if repo:
-                absolute_path = repo.resolve_path(relative_path)
-                image["path"] = str(absolute_path)
+            if repo_url and relative_path:
+                repo = self.repo_manager.get_repo_by_url(repo_url)
+                if repo:
+                    absolute_path = repo.resolve_path(relative_path)
+                    image["abspath"] = str(absolute_path)
         return image
@@ -380,13 +415,52 @@ class Starbash:
         """
         Get the reference ImageRow for a session with absolute path.
         """
+        from starbash.database import SearchCondition
         images = self.db.search_image(
-            {Database.ID_KEY: session[get_column_name(Database.IMAGE_DOC_KEY)]}
+            [
+                SearchCondition(
+                    "i.id", "=", session[get_column_name(Database.IMAGE_DOC_KEY)]
+                )
+            ]
         )
         assert (
             len(images) == 1
         ), f"Expected exactly one reference for session, found {len(images)}"
-        return self._reconstruct_image_path(images[0])
+        return self._add_image_abspath(images[0])
+    def get_master_images(
+        self, imagetyp: str | None = None, reference_session: SessionRow | None = None
+    ) -> list[ImageRow]:
+        """Return a list of the specified master imagetyp (bias, flat etc...)
+        (or any type if not specified).
+        The first image will be the 'best' remaining entries progressively worse matches.
+        (the following is not yet implemented)
+        If reference_session is provided it will be used to refine the search as follows:
+        * The telescope must match
+        * The image resolutions and binnings must match
+        * The filter must match (for FLAT frames only)
+        * Preferably the master date_obs would be either before or slightly after (<24 hrs) the reference session start time
+        * Preferably the master date_obs should be the closest in date to the reference session start time
+        * The camera temperature should be as close as possible to the reference session camera temperature
+        """
+        master_repo = self.repo_manager.get_repo_by_kind("master")
+        if master_repo is None:
+            logging.warning("No master repo configured - skipping master frame load.")
+            return []
+        # Search for images in the master repo only
+        from starbash.database import SearchCondition
+        search_conditions = [SearchCondition("r.url", "=", master_repo.url)]
+        if imagetyp:
+            search_conditions.append(SearchCondition("i.imagetyp", "=", imagetyp))
+        images = self.db.search_image(search_conditions)
+        return images
     def get_session_images(self, session: SessionRow) -> list[ImageRow]:
         """
@@ -406,20 +480,52 @@ class Starbash:
         Raises:
             ValueError: If session_id is not found in the database
         """
-        # Query images that match ALL session criteria including date range
-        conditions = {
-            Database.FILTER_KEY: session[get_column_name(Database.FILTER_KEY)],
-            Database.IMAGETYP_KEY: session[get_column_name(Database.IMAGETYP_KEY)],
-            Database.OBJECT_KEY: session[get_column_name(Database.OBJECT_KEY)],
-            Database.TELESCOP_KEY: session[get_column_name(Database.TELESCOP_KEY)],
-            "date_start": session[get_column_name(Database.START_KEY)],
-            "date_end": session[get_column_name(Database.END_KEY)],
-        }
+        from starbash.database import SearchCondition
-        # Single query with all conditions
+        # Query images that match ALL session criteria including date range
+        # Note: We need to search JSON metadata for FILTER, IMAGETYP, OBJECT, TELESCOP
+        # since they're not indexed columns in the images table
+        conditions = [
+            SearchCondition(
+                "i.date_obs", ">=", session[get_column_name(Database.START_KEY)]
+            ),
+            SearchCondition(
+                "i.date_obs", "<=", session[get_column_name(Database.END_KEY)]
+            ),
+            SearchCondition(
+                "i.imagetyp", "=", session[get_column_name(Database.IMAGETYP_KEY)]
+            ),
+        ]
+        # we never want to return 'master' images as part of the session image paths
+        # (because we will be passing these tool siril or whatever to generate masters or
+        # some other downstream image)
+        master_repo = self.repo_manager.get_repo_by_kind("master")
+        if master_repo is not None:
+            conditions.append(SearchCondition("r.url", "<>", master_repo.url))
+        # Single query with indexed date conditions
         images = self.db.search_image(conditions)
+        # We no lognger filter by target(object) because it might not be set anyways
+        filtered_images = []
+        for img in images:
+            if (
+                img.get(Database.FILTER_KEY)
+                == session[get_column_name(Database.FILTER_KEY)]
+                # and img.get(Database.OBJECT_KEY)
+                # == session[get_column_name(Database.OBJECT_KEY)]
+                and img.get(Database.TELESCOP_KEY)
+                == session[get_column_name(Database.TELESCOP_KEY)]
+            ):
+                filtered_images.append(img)
         # Reconstruct absolute paths for all images
-        return [self._reconstruct_image_path(img) for img in images] if images else []
+        return (
+            [self._add_image_abspath(img) for img in filtered_images]
+            if filtered_images
+            else []
+        )
     def remove_repo_ref(self, url: str) -> None:
         """
@@ -447,6 +553,7 @@ class Starbash:
             # Match by converting to file:// URL format if needed
             if ref_dir == url or f"file://{ref_dir}" == url:
                 repo_refs.remove(ref)
                 found = True
                 break
@@ -456,24 +563,76 @@ class Starbash:
         # Write the updated config
         self.user_repo.write_config()
-    def reindex_repo(self, repo: Repo, force: bool = False):
+    def add_image_to_db(self, repo: Repo, f: Path, force: bool = False) -> None:
+        """Read FITS header from file and add/update image entry in the database."""
+        path = repo.get_path()
+        if not path:
+            raise ValueError(f"Repo path not found for {repo}")
+        whitelist = None
+        config = self.repo_manager.merged.get("config")
+        if config:
+            whitelist = config.get("fits-whitelist", None)
+        try:
+            # Convert absolute path to relative path within repo
+            relative_path = f.relative_to(path)
+            found = self.db.get_image(repo.url, str(relative_path))
+            # for debugging sometimes we want to limit scanning to a single directory or file
+            # debug_target = "masters-raw/2025-09-09/DARK"
+            debug_target = None
+            if debug_target:
+                if str(relative_path).startswith(debug_target):
+                    logging.error("Debugging %s...", f)
+                    found = False
+                else:
+                    found = True  # skip processing
+                    force = False
+            if not found or force:
+                # Read and log the primary header (HDU 0)
+                with fits.open(str(f), memmap=False) as hdul:
+                    # convert headers to dict
+                    hdu0: Any = hdul[0]
+                    header = hdu0.header
+                    if type(header).__name__ == "Unknown":
+                        raise ValueError("FITS header has Unknown type: %s", f)
+                    items = header.items()
+                    headers = {}
+                    for key, value in items:
+                        if (not whitelist) or (key in whitelist):
+                            headers[key] = value
+                    logging.debug("Headers for %s: %s", f, headers)
+                    # Store relative path in database
+                    headers["path"] = str(relative_path)
+                    image_doc_id = self.db.upsert_image(headers, repo.url)
+                    if not found:
+                        # Update the session infos, but ONLY on first file scan
+                        # (otherwise invariants will get messed up)
+                        self._add_session(image_doc_id, header)
+        except Exception as e:
+            logging.warning("Failed to read FITS header for %s: %s", f, e)
+    def reindex_repo(self, repo: Repo, force: bool = False, subdir: str | None = None):
         """Reindex all repositories managed by the RepoManager."""
         # make sure this new repo is listed in the repos table
         self.repo_db_update()  # not really ideal, a more optimal version would just add the new repo
-        # FIXME, add a method to get just the repos that contain images
-        if repo.is_scheme("file") and repo.kind != "recipe":
-            logging.debug("Reindexing %s...", repo.url)
+        path = repo.get_path()
-            whitelist = None
-            config = self.repo_manager.merged.get("config")
-            if config:
-                whitelist = config.get("fits-whitelist", None)
+        if path and repo.is_scheme("file") and repo.kind != "recipe":
+            logging.debug("Reindexing %s...", repo.url)
-            path = repo.get_path()
-            if not path:
-                raise ValueError(f"Repo path not found for {repo}")
+            if subdir:
+                path = path / subdir
+                # used to debug
             # Find all FITS files under this repo path
             for f in track(
@@ -481,37 +640,7 @@ class Starbash:
                 description=f"Indexing {repo.url}...",
             ):
                 # progress.console.print(f"Indexing {f}...")
-                try:
-                    # Convert absolute path to relative path within repo
-                    relative_path = f.relative_to(path)
-                    found = self.db.get_image(repo.url, str(relative_path))
-                    if not found or force:
-                        # Read and log the primary header (HDU 0)
-                        with fits.open(str(f), memmap=False) as hdul:
-                            # convert headers to dict
-                            hdu0: Any = hdul[0]
-                            header = hdu0.header
-                            if type(header).__name__ == "Unknown":
-                                raise ValueError("FITS header has Unknown type: %s", f)
-                            items = header.items()
-                            headers = {}
-                            for key, value in items:
-                                if (not whitelist) or (key in whitelist):
-                                    headers[key] = value
-                            logging.debug("Headers for %s: %s", f, headers)
-                            # Store relative path in database
-                            headers["path"] = str(relative_path)
-                            image_doc_id = self.db.upsert_image(headers, repo.url)
-                            if not found:
-                                # Update the session infos, but ONLY on first file scan
-                                # (otherwise invariants will get messed up)
-                                self._add_session(str(f), image_doc_id, header)
-                except Exception as e:
-                    logging.warning("Failed to read FITS header for %s: %s", f, e)
+                self.add_image_to_db(repo, f, force=force)
     def reindex_repos(self, force: bool = False):
         """Reindex all repositories managed by the RepoManager."""
@@ -520,12 +649,14 @@ class Starbash:
         for repo in track(self.repo_manager.repos, description="Reindexing repos..."):
             self.reindex_repo(repo, force=force)
-    def run_all_stages(self):
-        """On the currently active session, run all processing stages"""
-        logging.info("--- Running all stages ---")
+    def _get_stages(self, name: str) -> list[dict[str, Any]]:
+        """Get all pipeline stages defined in the merged configuration.
+        Returns:
+            List of stage definitions (dictionaries with 'name' and 'priority')
+        """
         # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
-        pipeline_definitions = self.repo_manager.merged.getall("stages")
+        pipeline_definitions = self.repo_manager.merged.getall(name)
         flat_pipeline_steps = list(itertools.chain.from_iterable(pipeline_definitions))
         # 2. Sort the pipeline steps by their 'priority' field.
@@ -537,9 +668,17 @@ class Starbash:
                 f"invalid stage definition: a stage is missing the required 'priority' key"
             ) from e
-        logging.info(
+        logging.debug(
             f"Found {len(sorted_pipeline)} pipeline steps to run in order of priority."
         )
+        return sorted_pipeline
+    def run_all_stages(self):
+        """On the currently active session, run all processing stages"""
+        logging.info("--- Running all stages ---")
+        # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
+        sorted_pipeline = self._get_stages("stages")
         self.init_context()
         # 4. Iterate through the sorted pipeline and execute the associated tasks.
@@ -565,7 +704,7 @@ class Starbash:
         """Generate any missing master frames
         Steps:
-        * set all_tasks to be all tasks for when == "setup.masters"
+        * set all_tasks to be all tasks for when == "setup.master.bias"
         * loop over all currently unfiltered sessions
         * for each session loop across all_tasks
         * if task input.type == the imagetyp for this current session
@@ -574,38 +713,60 @@ class Starbash:
         """
         sessions = self.search_session()
         for session in sessions:
-            imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
-            logging.debug(
-                f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
-            )
-            # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
-            task_definitions = self.repo_manager.merged.getall("stage")
-            all_tasks = list(itertools.chain.from_iterable(task_definitions))
+            try:
+                imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
+                logging.debug(
+                    f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
+                )
-            # Find all tasks that should run during the "setup.masters" step.
-            tasks_to_run = [
-                task for task in all_tasks if task.get("when") == "setup.masters"
-            ]
+                sorted_pipeline = self._get_stages("master-stages")
-            for task in tasks_to_run:
-                input_config = task.get("input", {})
-                input_type = input_config.get("type")
-                if imagetyp_equals(input_type, imagetyp):
-                    logging.info(
-                        f"  Running master stage task for imagetyp '{imagetyp}'"
-                    )
-                    # Create a default process dir in /tmp, though more advanced 'session' based workflows will
-                    # probably override this and place it somewhere persistent.
-                    with tempfile.TemporaryDirectory(prefix="session_tmp_") as temp_dir:
-                        logging.debug(
-                            f"Created temporary session directory: {temp_dir}"
+                # 4. Iterate through the sorted pipeline and execute the associated tasks.
+                # FIXME unify the master vs normal step running code
+                for step in sorted_pipeline:
+                    step_name = step.get("name")
+                    if not step_name:
+                        raise ValueError(
+                            "Invalid pipeline step found: missing 'name' key."
                         )
-                        self.init_context()
-                        self.context["process_dir"] = temp_dir
-                        self.add_session_to_context(session)
-                        self.run_stage(task)
+                    # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
+                    task_definitions = self.repo_manager.merged.getall("stage")
+                    all_tasks = list(itertools.chain.from_iterable(task_definitions))
+                    # Find all tasks that should run during this step
+                    tasks_to_run = [
+                        task for task in all_tasks if task.get("when") == step_name
+                    ]
+                    for task in tasks_to_run:
+                        input_config = task.get("input", {})
+                        input_type = input_config.get("type")
+                        if not input_type:
+                            raise ValueError(
+                                f"Task for step '{step_name}' missing required input.type"
+                            )
+                        if self.aliases.equals(input_type, imagetyp):
+                            logging.debug(
+                                f"Running {step_name} task for imagetyp '{imagetyp}'"
+                            )
+                            # Create a default process dir in /tmp, though more advanced 'session' based workflows will
+                            # probably override this and place it somewhere persistent.
+                            with tempfile.TemporaryDirectory(
+                                prefix="session_tmp_"
+                            ) as temp_dir:
+                                logging.debug(
+                                    f"Created temporary session directory: {temp_dir}"
+                                )
+                                self.init_context()
+                                self.context["process_dir"] = temp_dir
+                                self.add_session_to_context(session)
+                                self.run_stage(task)
+            except RuntimeError as e:
+                logging.error(
+                    f"Skipping session {session[get_column_name(Database.ID_KEY)]}: {e}"
+                )
     def init_context(self) -> None:
         """Do common session init"""
@@ -615,26 +776,19 @@ class Starbash:
         # Update the context with runtime values.
         runtime_context = {
-            "masters": "/workspaces/starbash/images/masters",  # FIXME find this the correct way
+            # "masters": "/workspaces/starbash/images/masters",  # FIXME find this the correct way
         }
         self.context.update(runtime_context)
     def add_session_to_context(self, session: SessionRow) -> None:
         """adds to context from the indicated session:
-        * input_files - all of the files mentioned in the session
         * instrument - for the session
         * date - the localtimezone date of the session
         * imagetyp - the imagetyp of the session
         * session - the current session row (joined with a typical image) (can be used to
         find things like telescope, temperature ...)
+        * session_config - a short human readable description of the session - suitable for logs or filenames
         """
-        # Get images for this session
-        images = self.get_session_images(session)
-        logging.debug(f"Adding {len(images)} files as context.input_files")
-        self.context["input_files"] = [
-            img["path"] for img in images
-        ]  # Pass in the file list via the context dict
         # it is okay to give them the actual session row, because we're never using it again
         self.context["session"] = session
@@ -644,19 +798,67 @@ class Starbash:
         imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
         if imagetyp:
+            imagetyp = self.aliases.normalize(imagetyp)
             self.context["imagetyp"] = imagetyp
+            # add a short human readable description of the session - suitable for logs or in filenames
+            session_config = f"{imagetyp}"
+            metadata = session.get("metadata", {})
+            filter = metadata.get(Database.FILTER_KEY)
+            if (imagetyp == "flat" or imagetyp == "light") and filter:
+                # we only care about filters in these cases
+                session_config += f"_{filter}"
+            if imagetyp == "dark":
+                exptime = session.get(get_column_name(Database.EXPTIME_KEY))
+                if exptime:
+                    session_config += f"_{int(float(exptime))}s"
+            self.context["session_config"] = session_config
         date = session.get(get_column_name(Database.START_KEY))
         if date:
             self.context["date"] = to_shortdate(date)
+    def add_input_masters(self, stage: dict) -> None:
+        """based on input.masters add the correct master frames as context.master.<type> filepaths"""
+        session = self.context.get("session")
+        assert session is not None, "context.session should have been already set"
+        input_config = stage.get("input", {})
+        master_types: list[str] = input_config.get("masters", [])
+        for master_type in master_types:
+            masters = self.get_master_images(
+                imagetyp=master_type, reference_session=session
+            )
+            if not masters:
+                raise RuntimeError(
+                    f"No master frames of type '{master_type}' found for stage '{stage.get('name')}'"
+                )
+            context_master = self.context.setdefault("master", {})
+            if len(masters) > 1:
+                logging.debug(
+                    f"Multiple ({len(masters)}) master frames of type '{master_type}' found, using first. FIXME."
+                )
+            # Try to rank the images by desirability
+            masters = self.score_candidates(masters, session)
+            self._add_image_abspath(masters[0])  # make sure abspath is populated
+            selected_master = masters[0]["abspath"]
+            logging.info(f"For master '{master_type}', using: {selected_master}")
+            context_master[master_type] = selected_master
     def add_input_files(self, stage: dict) -> None:
         """adds to context.input_files based on the stage input config"""
         input_config = stage.get("input")
-        input_required = False
+        input_required = 0
         if input_config:
             # if there is an "input" dict, we assume input.required is true if unset
-            input_required = input_config.get("required", True)
+            input_required = input_config.get("required", 0)
             source = input_config.get("source")
             if source is None:
                 raise ValueError(
@@ -672,12 +874,17 @@ class Starbash:
                     input_files  # Pass in the file list via the context dict
                 )
             elif source == "repo":
-                # We expect that higher level code has already added the correct input files
-                # to the context
-                if not "input_files" in self.context:
-                    raise RuntimeError(
-                        "Input config specifies 'repo' but no 'input_files' found in context"
-                    )
+                # Get images for this session (by pulling from repo)
+                session = self.context.get("session")
+                assert (
+                    session is not None
+                ), "context.session should have been already set"
+                images = self.get_session_images(session)
+                logging.debug(f"Using {len(images)} files as input_files")
+                self.context["input_files"] = [
+                    img["abspath"] for img in images
+                ]  # Pass in the file list via the context dict
             else:
                 raise ValueError(
                     f"Stage '{stage.get('name')}' has invalid 'input' source: {source}"
@@ -689,8 +896,8 @@ class Starbash:
             if "input_files" in self.context:
                 del self.context["input_files"]
-        if input_required and not "input_files" in self.context:
-            raise RuntimeError("No input files found for stage")
+        if input_required and len(self.context.get("input_files", [])) < input_required:
+            raise RuntimeError(f"Stage requires at least {input_required} input files")
     def add_output_path(self, stage: dict) -> None:
         """Adds output path information to context based on the stage output config.
@@ -700,6 +907,7 @@ class Starbash:
         - context.output.base_path - full path without file extension
         - context.output.suffix - file extension (e.g., .fits or .fit.gz)
         - context.output.full_path - complete output file path
+        - context.output.repo - the destination Repo (if applicable)
         """
         output_config = stage.get("output")
         if not output_config:
@@ -754,8 +962,8 @@ class Starbash:
                 "base_path": base_path,
                 # "suffix": full_path.suffix, not needed I think
                 "full_path": full_path,
+                "repo": dest_repo,
             }
         else:
             raise ValueError(
                 f"Unsupported output destination type: {dest}. Only 'repo' is currently supported."
@@ -777,17 +985,29 @@ class Starbash:
         logging.info(f"Running stage: {stage_desc}")
-        tool_name = stage.get("tool")
-        if not tool_name:
+        tool_dict = stage.get("tool")
+        if not tool_dict:
             raise ValueError(
                 f"Stage '{stage.get('name')}' is missing a 'tool' definition."
             )
-        tool: Tool | None = tools.get(tool_name)
+        tool_name = tool_dict.get("name")
+        if not tool_name:
+            raise ValueError(
+                f"Stage '{stage.get('name')}' is missing a 'tool.name' definition."
+            )
+        tool = tools.get(tool_name)
         if not tool:
             raise ValueError(
                 f"Tool '{tool_name}' for stage '{stage.get('name')}' not found."
             )
         logging.debug(f"  Using tool: {tool_name}")
+        tool.set_defaults()
+        # Allow stage to override tool timeout if specified
+        tool_timeout = tool_dict.get("timeout")
+        if tool_timeout is not None:
+            tool.timeout = float(tool_timeout)
+            logging.debug(f"Using tool timeout: {tool.timeout} seconds")
         script_filename = stage.get("script-file", tool.default_script_file)
         if script_filename:
@@ -806,6 +1026,7 @@ class Starbash:
         stage_context = stage.get("context", {})
         self.context.update(stage_context)
         self.add_input_files(stage)
+        self.add_input_masters(stage)
         self.add_output_path(stage)
         # if the output path already exists and is newer than all input files, skip processing
@@ -828,3 +1049,5 @@ class Starbash:
             if not output_path or not os.path.exists(output_path):
                 raise RuntimeError(f"Expected output file not found: {output_path}")
+            else:
+                self.add_image_to_db(output_info["repo"], Path(output_path), force=True)

starbash 0.1.9__py3-none-any.whl → 0.1.10__py3-none-any.whl

Potentially problematic release.

starbash 0.1.9py3-none-any.whl → 0.1.10py3-none-any.whl