starbash 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of starbash might be problematic. Click here for more details.

starbash/app.py CHANGED
@@ -1,6 +1,9 @@
1
+ import cmd
1
2
  import logging
2
3
  from importlib import resources
4
+ import os
3
5
  from pathlib import Path
6
+ import tempfile
4
7
  import typer
5
8
  import tomlkit
6
9
  from tomlkit.toml_file import TOMLFile
@@ -11,16 +14,21 @@ import itertools
11
14
  from rich.progress import track
12
15
  from rich.logging import RichHandler
13
16
  import shutil
17
+ from datetime import datetime
18
+ import rich.console
19
+ import copy
14
20
 
15
21
  import starbash
16
- from starbash import console, _is_test_env
17
- from starbash.database import Database
18
- from repo.manager import Repo
19
- from starbash.tool import Tool
22
+ from starbash import console, _is_test_env, to_shortdate
23
+ from starbash.aliases import Aliases
24
+ from starbash.database import Database, SessionRow, ImageRow, get_column_name
25
+ from repo import Repo, repo_suffix
26
+ from starbash.toml import toml_from_template
27
+ from starbash.tool import Tool, expand_context, expand_context_unsafe
20
28
  from repo import RepoManager
21
29
  from starbash.tool import tools
22
30
  from starbash.paths import get_user_config_dir, get_user_data_dir
23
- from starbash.selection import Selection
31
+ from starbash.selection import Selection, where_tuple
24
32
  from starbash.analytics import (
25
33
  NopAnalytics,
26
34
  analytics_exception,
@@ -29,12 +37,17 @@ from starbash.analytics import (
29
37
  analytics_start_transaction,
30
38
  )
31
39
 
40
+ # Type aliases for better documentation
32
41
 
33
- def setup_logging():
42
+
43
+ def setup_logging(stderr: bool = False):
34
44
  """
35
45
  Configures basic logging.
36
46
  """
37
- handlers = [RichHandler(rich_tracebacks=True)] if not _is_test_env else []
47
+ console = rich.console.Console(stderr=stderr)
48
+ handlers = (
49
+ [RichHandler(console=console, rich_tracebacks=True)] if not _is_test_env else []
50
+ )
38
51
  logging.basicConfig(
39
52
  level=starbash.log_filter_level, # use the global log filter level
40
53
  format="%(message)s",
@@ -46,26 +59,24 @@ def setup_logging():
46
59
  def get_user_config_path() -> Path:
47
60
  """Returns the path to the user config file."""
48
61
  config_dir = get_user_config_dir()
49
- return config_dir / "starbash.toml"
62
+ return config_dir / repo_suffix
50
63
 
51
64
 
52
65
  def create_user() -> Path:
53
66
  """Create user directories if they don't exist yet."""
54
67
  path = get_user_config_path()
55
68
  if not path.exists():
56
- tomlstr = (
57
- resources.files("starbash")
58
- .joinpath("templates/userconfig.toml")
59
- .read_text()
60
- )
61
- toml = tomlkit.parse(tomlstr)
62
- TOMLFile(path).write(toml)
69
+ toml_from_template("userconfig", path)
63
70
  logging.info(f"Created user config file: {path}")
64
71
  return get_user_config_dir()
65
72
 
66
73
 
67
- def copy_images_to_dir(images: list[dict[str, Any]], output_dir: Path) -> None:
68
- """Copy images to the specified output directory (using symbolic links if possible)."""
74
+ def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
75
+ """Copy images to the specified output directory (using symbolic links if possible).
76
+
77
+ This function requires that "abspath" already be populated in each ImageRow. Normally
78
+ the caller does this by calling Starbash._add_image_abspath() on the image.
79
+ """
69
80
 
70
81
  # Export images
71
82
  console.print(f"[cyan]Exporting {len(images)} images to {output_dir}...[/cyan]")
@@ -76,7 +87,7 @@ def copy_images_to_dir(images: list[dict[str, Any]], output_dir: Path) -> None:
76
87
 
77
88
  for image in images:
78
89
  # Get the source path from the image metadata
79
- source_path = Path(image.get("path", ""))
90
+ source_path = Path(image.get("abspath", ""))
80
91
 
81
92
  if not source_path.exists():
82
93
  console.print(f"[red]Warning: Source file not found: {source_path}[/red]")
@@ -116,21 +127,38 @@ def copy_images_to_dir(images: list[dict[str, Any]], output_dir: Path) -> None:
116
127
  class Starbash:
117
128
  """The main Starbash application class."""
118
129
 
119
- def __init__(self, cmd: str = "unspecified"):
130
+ def __init__(self, cmd: str = "unspecified", stderr_logging: bool = False):
120
131
  """
121
132
  Initializes the Starbash application by loading configurations
122
133
  and setting up the repository manager.
123
134
  """
124
- setup_logging()
135
+ setup_logging(stderr=stderr_logging)
125
136
  logging.info("Starbash starting...")
126
137
 
127
138
  # Load app defaults and initialize the repository manager
139
+ self._init_repos()
140
+ self._init_analytics(cmd)
141
+ self._init_aliases()
142
+
143
+ logging.info(
144
+ f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
145
+ )
146
+ # self.repo_manager.dump()
147
+
148
+ self._db = None # Lazy initialization - only create when accessed
149
+
150
+ # Initialize selection state (stored in user config repo)
151
+ self.selection = Selection(self.user_repo)
152
+
153
+ def _init_repos(self) -> None:
154
+ """Initialize all repositories managed by the RepoManager."""
128
155
  self.repo_manager = RepoManager()
129
156
  self.repo_manager.add_repo("pkg://defaults")
130
157
 
131
158
  # Add user prefs as a repo
132
159
  self.user_repo = self.repo_manager.add_repo("file://" + str(create_user()))
133
160
 
161
+ def _init_analytics(self, cmd: str) -> None:
134
162
  self.analytics = NopAnalytics()
135
163
  if self.user_repo.get("analytics.enabled", True):
136
164
  include_user = self.user_repo.get("analytics.include_user", False)
@@ -144,28 +172,41 @@ class Starbash:
144
172
  self.analytics = analytics_start_transaction(name="App session", op=cmd)
145
173
  self.analytics.__enter__()
146
174
 
147
- logging.info(
148
- f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
149
- )
150
- # self.repo_manager.dump()
151
-
152
- self.db = Database()
153
- self.session_query = None # None means search all sessions
154
-
155
- # Initialize selection state
156
- data_dir = get_user_data_dir()
157
- selection_file = data_dir / "selection.json"
158
- self.selection = Selection(selection_file)
175
+ def _init_aliases(self) -> None:
176
+ alias_dict = self.repo_manager.get("aliases", {})
177
+ assert isinstance(alias_dict, dict), "Aliases config must be a dictionary"
178
+ self.aliases = Aliases(alias_dict)
179
+
180
+ @property
181
+ def db(self) -> Database:
182
+ """Lazy initialization of database - only created as needed."""
183
+ if self._db is None:
184
+ self._db = Database()
185
+ # Ensure all repos are registered in the database
186
+ self.repo_db_update()
187
+ return self._db
188
+
189
+ def repo_db_update(self) -> None:
190
+ """Update the database with all managed repositories.
191
+
192
+ Iterates over all repos in the RepoManager and ensures each one
193
+ has a record in the repos table. This is called during lazy database
194
+ initialization to prepare repo_id values for image insertion.
195
+ """
196
+ if self._db is None:
197
+ return
159
198
 
160
- # FIXME, call reindex somewhere and also index whenever new repos are added
161
- # self.reindex_repos()
199
+ for repo in self.repo_manager.repos:
200
+ self._db.upsert_repo(repo.url)
201
+ logging.debug(f"Registered repo in database: {repo.url}")
162
202
 
163
203
  # --- Lifecycle ---
164
204
  def close(self) -> None:
165
205
  self.analytics.__exit__(None, None, None)
166
206
 
167
207
  analytics_shutdown()
168
- self.db.close()
208
+ if self._db is not None:
209
+ self._db.close()
169
210
 
170
211
  # Context manager support
171
212
  def __enter__(self) -> "Starbash":
@@ -179,39 +220,249 @@ class Starbash:
179
220
  self.close()
180
221
  return handled
181
222
 
182
- def _add_session(self, f: str, image_doc_id: int, header: dict) -> None:
183
- filter = header.get(Database.FILTER_KEY, "unspecified")
223
+ def _add_session(self, image_doc_id: int, header: dict) -> None:
224
+ """We just added a new image, create or update its session entry as needed."""
184
225
  image_type = header.get(Database.IMAGETYP_KEY)
185
226
  date = header.get(Database.DATE_OBS_KEY)
186
227
  if not date or not image_type:
187
228
  logging.warning(
188
- "Image %s missing either DATE-OBS or IMAGETYP FITS header, skipping...",
189
- f,
229
+ "Image '%s' missing either DATE-OBS or IMAGETYP FITS header, skipping...",
230
+ header.get("path", "unspecified"),
190
231
  )
191
232
  else:
192
233
  exptime = header.get(Database.EXPTIME_KEY, 0)
193
- telescop = header.get(Database.TELESCOP_KEY, "unspecified")
234
+
194
235
  new = {
195
- Database.FILTER_KEY: filter,
196
- Database.START_KEY: date,
197
- Database.END_KEY: date, # FIXME not quite correct, should be longer by exptime
198
- Database.IMAGE_DOC_KEY: image_doc_id,
199
- Database.IMAGETYP_KEY: image_type,
200
- Database.NUM_IMAGES_KEY: 1,
201
- Database.EXPTIME_TOTAL_KEY: exptime,
202
- Database.OBJECT_KEY: header.get(Database.OBJECT_KEY, "unspecified"),
203
- Database.TELESCOP_KEY: telescop,
236
+ get_column_name(Database.START_KEY): date,
237
+ get_column_name(
238
+ Database.END_KEY
239
+ ): date, # FIXME not quite correct, should be longer by exptime
240
+ get_column_name(Database.IMAGE_DOC_KEY): image_doc_id,
241
+ get_column_name(Database.IMAGETYP_KEY): image_type,
242
+ get_column_name(Database.NUM_IMAGES_KEY): 1,
243
+ get_column_name(Database.EXPTIME_TOTAL_KEY): exptime,
244
+ get_column_name(Database.EXPTIME_KEY): exptime,
204
245
  }
246
+
247
+ filter = header.get(Database.FILTER_KEY)
248
+ if filter:
249
+ new[get_column_name(Database.FILTER_KEY)] = filter
250
+
251
+ telescop = header.get(Database.TELESCOP_KEY)
252
+ if telescop:
253
+ new[get_column_name(Database.TELESCOP_KEY)] = telescop
254
+
255
+ obj = header.get(Database.OBJECT_KEY)
256
+ if obj:
257
+ new[get_column_name(Database.OBJECT_KEY)] = obj
258
+
205
259
  session = self.db.get_session(new)
206
260
  self.db.upsert_session(new, existing=session)
207
261
 
208
- def search_session(self) -> list[dict[str, Any]]:
262
+ def guess_sessions(
263
+ self, ref_session: SessionRow, want_type: str
264
+ ) -> list[SessionRow]:
265
+ """Given a particular session type (i.e. FLAT or BIAS etc...) and an
266
+ existing session (which is assumed to generally be a LIGHT frame based session):
267
+
268
+ Return a list of possible sessions which would be acceptable. The more desirable
269
+ matches are first in the list. Possibly in the future I might have a 'score' and reason
270
+ given for each ranking.
271
+
272
+ The following critera MUST match to be acceptable:
273
+ * matches requested imagetyp.
274
+ * same filter as reference session (in the case want_type==FLAT only)
275
+ * same telescope as reference session
276
+
277
+ Quality is determined by (most important first):
278
+ * temperature of CCD-TEMP is closer to the reference session
279
+ * smaller DATE-OBS delta to the reference session
280
+
281
+ Eventually the code will check the following for 'nice to have' (but not now):
282
+ * TBD
283
+
284
+ Possibly eventually this code could be moved into recipes.
285
+
286
+ """
287
+ # Get reference image to access CCD-TEMP and DATE-OBS
288
+
289
+ # Build search conditions - MUST match criteria
290
+ conditions = {
291
+ Database.IMAGETYP_KEY: want_type,
292
+ Database.TELESCOP_KEY: ref_session[get_column_name(Database.TELESCOP_KEY)],
293
+ }
294
+
295
+ # For FLAT frames, filter must match the reference session
296
+ if want_type.upper() == "FLAT":
297
+ conditions[Database.FILTER_KEY] = ref_session[
298
+ get_column_name(Database.FILTER_KEY)
299
+ ]
300
+
301
+ # Search for candidate sessions
302
+ candidates = self.db.search_session(where_tuple(conditions))
303
+
304
+ return self.score_candidates(candidates, ref_session)
305
+
306
+ def score_candidates(
307
+ self, candidates: list[dict[str, Any]], ref_session: SessionRow
308
+ ) -> list[SessionRow]:
309
+ """Given a list of images or sessions, try to rank that list by desirability.
310
+
311
+ Return a list of possible images/sessions which would be acceptable. The more desirable
312
+ matches are first in the list. Possibly in the future I might have a 'score' and reason
313
+ given for each ranking.
314
+
315
+ The following critera MUST match to be acceptable:
316
+ * matches requested imagetyp.
317
+ * same filter as reference session (in the case want_type==FLAT only)
318
+ * same telescope as reference session
319
+
320
+ Quality is determined by (most important first):
321
+ * temperature of CCD-TEMP is closer to the reference session
322
+ * smaller DATE-OBS delta to the reference session
323
+
324
+ Eventually the code will check the following for 'nice to have' (but not now):
325
+ * TBD
326
+
327
+ Possibly eventually this code could be moved into recipes.
328
+
329
+ """
330
+
331
+ metadata: dict = ref_session.get("metadata", {})
332
+ ref_temp = metadata.get("CCD-TEMP", None)
333
+ ref_date_str = metadata.get(Database.DATE_OBS_KEY)
334
+
335
+ # Now score and sort the candidates
336
+ scored_candidates = []
337
+
338
+ for candidate in candidates:
339
+ score = 0.0
340
+
341
+ # Get candidate image metadata to access CCD-TEMP and DATE-OBS
342
+ try:
343
+ candidate_image = candidate.get("metadata", {})
344
+
345
+ # Score by CCD-TEMP difference (most important)
346
+ # Lower temperature difference = better score
347
+ if ref_temp is not None:
348
+ candidate_temp = candidate_image.get("CCD-TEMP")
349
+ if candidate_temp is not None:
350
+ try:
351
+ temp_diff = abs(float(ref_temp) - float(candidate_temp))
352
+ # Use exponential decay: closer temps get much better scores
353
+ # Perfect match (0°C diff) = 1000, 1°C diff ≈ 368, 2°C diff ≈ 135
354
+ score += 1000 * (2.718 ** (-temp_diff))
355
+ except (ValueError, TypeError):
356
+ # If we can't parse temps, give a neutral score
357
+ score += 0
358
+
359
+ # Parse reference date for time delta calculations
360
+ candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
361
+ if ref_date_str and candidate_date_str:
362
+ try:
363
+ ref_date = datetime.fromisoformat(ref_date_str)
364
+ candidate_date = datetime.fromisoformat(candidate_date_str)
365
+ time_delta = abs((ref_date - candidate_date).total_seconds())
366
+ # Closer in time = better score
367
+ # Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
368
+ # Using 7-day half-life
369
+ score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
370
+ except (ValueError, TypeError):
371
+ logging.warning(f"Malformed date - ignoring entry")
372
+
373
+ scored_candidates.append((score, candidate))
374
+
375
+ except (AssertionError, KeyError) as e:
376
+ # If we can't get the session image, log and skip this candidate
377
+ logging.warning(
378
+ f"Could not score candidate session {candidate.get('id')}: {e}"
379
+ )
380
+ continue
381
+
382
+ # Sort by score (highest first)
383
+ scored_candidates.sort(key=lambda x: x[0], reverse=True)
384
+
385
+ return [candidate for _, candidate in scored_candidates]
386
+
387
+ def search_session(self) -> list[SessionRow]:
209
388
  """Search for sessions, optionally filtered by the current selection."""
210
389
  # Get query conditions from selection
211
390
  conditions = self.selection.get_query_conditions()
212
391
  return self.db.search_session(conditions)
213
392
 
214
- def get_session_images(self, session_id: int) -> list[dict[str, Any]]:
393
+ def _add_image_abspath(self, image: ImageRow) -> ImageRow:
394
+ """Reconstruct absolute path from image row containing repo_url and relative path.
395
+
396
+ Args:
397
+ image: Image record with 'repo_url' and 'path' (relative) fields
398
+
399
+ Returns:
400
+ Modified image record with 'abspath' as absolute path
401
+ """
402
+ if not image.get("abspath"):
403
+ repo_url = image.get(Database.REPO_URL_KEY)
404
+ relative_path = image.get("path")
405
+
406
+ if repo_url and relative_path:
407
+ repo = self.repo_manager.get_repo_by_url(repo_url)
408
+ if repo:
409
+ absolute_path = repo.resolve_path(relative_path)
410
+ image["abspath"] = str(absolute_path)
411
+
412
+ return image
413
+
414
+ def get_session_image(self, session: SessionRow) -> ImageRow:
415
+ """
416
+ Get the reference ImageRow for a session with absolute path.
417
+ """
418
+ from starbash.database import SearchCondition
419
+
420
+ images = self.db.search_image(
421
+ [
422
+ SearchCondition(
423
+ "i.id", "=", session[get_column_name(Database.IMAGE_DOC_KEY)]
424
+ )
425
+ ]
426
+ )
427
+ assert (
428
+ len(images) == 1
429
+ ), f"Expected exactly one reference for session, found {len(images)}"
430
+ return self._add_image_abspath(images[0])
431
+
432
+ def get_master_images(
433
+ self, imagetyp: str | None = None, reference_session: SessionRow | None = None
434
+ ) -> list[ImageRow]:
435
+ """Return a list of the specified master imagetyp (bias, flat etc...)
436
+ (or any type if not specified).
437
+
438
+ The first image will be the 'best' remaining entries progressively worse matches.
439
+
440
+ (the following is not yet implemented)
441
+ If reference_session is provided it will be used to refine the search as follows:
442
+ * The telescope must match
443
+ * The image resolutions and binnings must match
444
+ * The filter must match (for FLAT frames only)
445
+ * Preferably the master date_obs would be either before or slightly after (<24 hrs) the reference session start time
446
+ * Preferably the master date_obs should be the closest in date to the reference session start time
447
+ * The camera temperature should be as close as possible to the reference session camera temperature
448
+ """
449
+ master_repo = self.repo_manager.get_repo_by_kind("master")
450
+
451
+ if master_repo is None:
452
+ logging.warning("No master repo configured - skipping master frame load.")
453
+ return []
454
+
455
+ # Search for images in the master repo only
456
+ from starbash.database import SearchCondition
457
+
458
+ search_conditions = [SearchCondition("r.url", "=", master_repo.url)]
459
+ if imagetyp:
460
+ search_conditions.append(SearchCondition("i.imagetyp", "=", imagetyp))
461
+
462
+ images = self.db.search_image(search_conditions)
463
+ return images
464
+
465
+ def get_session_images(self, session: SessionRow) -> list[ImageRow]:
215
466
  """
216
467
  Get all images belonging to a specific session.
217
468
 
@@ -229,24 +480,52 @@ class Starbash:
229
480
  Raises:
230
481
  ValueError: If session_id is not found in the database
231
482
  """
232
- # First get the session details
233
- session = self.db.get_session_by_id(session_id)
234
- if session is None:
235
- raise ValueError(f"Session with id {session_id} not found")
483
+ from starbash.database import SearchCondition
236
484
 
237
485
  # Query images that match ALL session criteria including date range
238
- conditions = {
239
- Database.FILTER_KEY: session[Database.FILTER_KEY],
240
- Database.IMAGETYP_KEY: session[Database.IMAGETYP_KEY],
241
- Database.OBJECT_KEY: session[Database.OBJECT_KEY],
242
- Database.TELESCOP_KEY: session[Database.TELESCOP_KEY],
243
- "date_start": session[Database.START_KEY],
244
- "date_end": session[Database.END_KEY],
245
- }
246
-
247
- # Single query with all conditions
486
+ # Note: We need to search JSON metadata for FILTER, IMAGETYP, OBJECT, TELESCOP
487
+ # since they're not indexed columns in the images table
488
+ conditions = [
489
+ SearchCondition(
490
+ "i.date_obs", ">=", session[get_column_name(Database.START_KEY)]
491
+ ),
492
+ SearchCondition(
493
+ "i.date_obs", "<=", session[get_column_name(Database.END_KEY)]
494
+ ),
495
+ SearchCondition(
496
+ "i.imagetyp", "=", session[get_column_name(Database.IMAGETYP_KEY)]
497
+ ),
498
+ ]
499
+
500
+ # we never want to return 'master' images as part of the session image paths
501
+ # (because we will be passing these tool siril or whatever to generate masters or
502
+ # some other downstream image)
503
+ master_repo = self.repo_manager.get_repo_by_kind("master")
504
+ if master_repo is not None:
505
+ conditions.append(SearchCondition("r.url", "<>", master_repo.url))
506
+
507
+ # Single query with indexed date conditions
248
508
  images = self.db.search_image(conditions)
249
- return images if images else []
509
+
510
+ # We no lognger filter by target(object) because it might not be set anyways
511
+ filtered_images = []
512
+ for img in images:
513
+ if (
514
+ img.get(Database.FILTER_KEY)
515
+ == session[get_column_name(Database.FILTER_KEY)]
516
+ # and img.get(Database.OBJECT_KEY)
517
+ # == session[get_column_name(Database.OBJECT_KEY)]
518
+ and img.get(Database.TELESCOP_KEY)
519
+ == session[get_column_name(Database.TELESCOP_KEY)]
520
+ ):
521
+ filtered_images.append(img)
522
+
523
+ # Reconstruct absolute paths for all images
524
+ return (
525
+ [self._add_image_abspath(img) for img in filtered_images]
526
+ if filtered_images
527
+ else []
528
+ )
250
529
 
251
530
  def remove_repo_ref(self, url: str) -> None:
252
531
  """
@@ -258,6 +537,8 @@ class Starbash:
258
537
  Raises:
259
538
  ValueError: If the repository URL is not found in user configuration
260
539
  """
540
+ self.db.remove_repo(url)
541
+
261
542
  # Get the repo-ref list from user config
262
543
  repo_refs = self.user_repo.config.get("repo-ref")
263
544
 
@@ -272,6 +553,7 @@ class Starbash:
272
553
  # Match by converting to file:// URL format if needed
273
554
  if ref_dir == url or f"file://{ref_dir}" == url:
274
555
  repo_refs.remove(ref)
556
+
275
557
  found = True
276
558
  break
277
559
 
@@ -281,20 +563,76 @@ class Starbash:
281
563
  # Write the updated config
282
564
  self.user_repo.write_config()
283
565
 
284
- def reindex_repo(self, repo: Repo, force: bool = False):
566
+ def add_image_to_db(self, repo: Repo, f: Path, force: bool = False) -> None:
567
+ """Read FITS header from file and add/update image entry in the database."""
568
+
569
+ path = repo.get_path()
570
+ if not path:
571
+ raise ValueError(f"Repo path not found for {repo}")
572
+
573
+ whitelist = None
574
+ config = self.repo_manager.merged.get("config")
575
+ if config:
576
+ whitelist = config.get("fits-whitelist", None)
577
+
578
+ try:
579
+ # Convert absolute path to relative path within repo
580
+ relative_path = f.relative_to(path)
581
+
582
+ found = self.db.get_image(repo.url, str(relative_path))
583
+
584
+ # for debugging sometimes we want to limit scanning to a single directory or file
585
+ # debug_target = "masters-raw/2025-09-09/DARK"
586
+ debug_target = None
587
+ if debug_target:
588
+ if str(relative_path).startswith(debug_target):
589
+ logging.error("Debugging %s...", f)
590
+ found = False
591
+ else:
592
+ found = True # skip processing
593
+ force = False
594
+
595
+ if not found or force:
596
+ # Read and log the primary header (HDU 0)
597
+ with fits.open(str(f), memmap=False) as hdul:
598
+ # convert headers to dict
599
+ hdu0: Any = hdul[0]
600
+ header = hdu0.header
601
+ if type(header).__name__ == "Unknown":
602
+ raise ValueError("FITS header has Unknown type: %s", f)
603
+
604
+ items = header.items()
605
+ headers = {}
606
+ for key, value in items:
607
+ if (not whitelist) or (key in whitelist):
608
+ headers[key] = value
609
+ logging.debug("Headers for %s: %s", f, headers)
610
+ # Store relative path in database
611
+ headers["path"] = str(relative_path)
612
+ image_doc_id = self.db.upsert_image(headers, repo.url)
613
+
614
+ if not found:
615
+ # Update the session infos, but ONLY on first file scan
616
+ # (otherwise invariants will get messed up)
617
+ self._add_session(image_doc_id, header)
618
+
619
+ except Exception as e:
620
+ logging.warning("Failed to read FITS header for %s: %s", f, e)
621
+
622
+ def reindex_repo(self, repo: Repo, force: bool = False, subdir: str | None = None):
285
623
  """Reindex all repositories managed by the RepoManager."""
286
- # FIXME, add a method to get just the repos that contain images
287
- if repo.is_scheme("file") and repo.kind != "recipe":
288
- logging.debug("Reindexing %s...", repo.url)
289
624
 
290
- whitelist = None
291
- config = self.repo_manager.merged.get("config")
292
- if config:
293
- whitelist = config.get("fits-whitelist", None)
625
+ # make sure this new repo is listed in the repos table
626
+ self.repo_db_update() # not really ideal, a more optimal version would just add the new repo
627
+
628
+ path = repo.get_path()
294
629
 
295
- path = repo.get_path()
296
- if not path:
297
- raise ValueError(f"Repo path not found for {repo}")
630
+ if path and repo.is_scheme("file") and repo.kind != "recipe":
631
+ logging.debug("Reindexing %s...", repo.url)
632
+
633
+ if subdir:
634
+ path = path / subdir
635
+ # used to debug
298
636
 
299
637
  # Find all FITS files under this repo path
300
638
  for f in track(
@@ -302,33 +640,7 @@ class Starbash:
302
640
  description=f"Indexing {repo.url}...",
303
641
  ):
304
642
  # progress.console.print(f"Indexing {f}...")
305
- try:
306
- found = self.db.get_image(str(f))
307
- if not found or force:
308
- # Read and log the primary header (HDU 0)
309
- with fits.open(str(f), memmap=False) as hdul:
310
- # convert headers to dict
311
- hdu0: Any = hdul[0]
312
- header = hdu0.header
313
- if type(header).__name__ == "Unknown":
314
- raise ValueError("FITS header has Unknown type: %s", f)
315
-
316
- items = header.items()
317
- headers = {}
318
- for key, value in items:
319
- if (not whitelist) or (key in whitelist):
320
- headers[key] = value
321
- logging.debug("Headers for %s: %s", f, headers)
322
- headers["path"] = str(f)
323
- image_doc_id = self.db.upsert_image(headers)
324
-
325
- if not found:
326
- # Update the session infos, but ONLY on first file scan
327
- # (otherwise invariants will get messed up)
328
- self._add_session(str(f), image_doc_id, header)
329
-
330
- except Exception as e:
331
- logging.warning("Failed to read FITS header for %s: %s", f, e)
643
+ self.add_image_to_db(repo, f, force=force)
332
644
 
333
645
  def reindex_repos(self, force: bool = False):
334
646
  """Reindex all repositories managed by the RepoManager."""
@@ -337,16 +649,14 @@ class Starbash:
337
649
  for repo in track(self.repo_manager.repos, description="Reindexing repos..."):
338
650
  self.reindex_repo(repo, force=force)
339
651
 
340
- def test_processing(self):
341
- """A crude test of image processing pipeline - FIXME move into testing"""
342
- self.run_all_stages()
343
-
344
- def run_all_stages(self):
345
- """On the currently active session, run all processing stages"""
346
- logging.info("--- Running all stages ---")
652
+ def _get_stages(self, name: str) -> list[dict[str, Any]]:
653
+ """Get all pipeline stages defined in the merged configuration.
347
654
 
655
+ Returns:
656
+ List of stage definitions (dictionaries with 'name' and 'priority')
657
+ """
348
658
  # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
349
- pipeline_definitions = self.repo_manager.merged.getall("stages")
659
+ pipeline_definitions = self.repo_manager.merged.getall(name)
350
660
  flat_pipeline_steps = list(itertools.chain.from_iterable(pipeline_definitions))
351
661
 
352
662
  # 2. Sort the pipeline steps by their 'priority' field.
@@ -358,30 +668,107 @@ class Starbash:
358
668
  f"invalid stage definition: a stage is missing the required 'priority' key"
359
669
  ) from e
360
670
 
361
- # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
362
- task_definitions = self.repo_manager.merged.getall("stage")
363
- all_tasks = list(itertools.chain.from_iterable(task_definitions))
364
-
365
- logging.info(
671
+ logging.debug(
366
672
  f"Found {len(sorted_pipeline)} pipeline steps to run in order of priority."
367
673
  )
674
+ return sorted_pipeline
675
+
676
+ def run_all_stages(self):
677
+ """On the currently active session, run all processing stages"""
678
+ logging.info("--- Running all stages ---")
368
679
 
369
- self.start_session()
680
+ # 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
681
+ sorted_pipeline = self._get_stages("stages")
682
+
683
+ self.init_context()
370
684
  # 4. Iterate through the sorted pipeline and execute the associated tasks.
371
685
  for step in sorted_pipeline:
372
686
  step_name = step.get("name")
373
687
  if not step_name:
374
688
  raise ValueError("Invalid pipeline step found: missing 'name' key.")
689
+ self.run_pipeline_step(step_name)
375
690
 
376
- logging.info(
377
- f"--- Running pipeline step: '{step_name}' (Priority: {step['priority']}) ---"
378
- )
379
- # Find all tasks that should run during this pipeline step.
380
- tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
381
- for task in tasks_to_run:
382
- self.run_stage(task)
691
+ def run_pipeline_step(self, step_name: str):
692
+ logging.info(f"--- Running pipeline step: '{step_name}' ---")
383
693
 
384
- def start_session(self) -> None:
694
+ # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
695
+ task_definitions = self.repo_manager.merged.getall("stage")
696
+ all_tasks = list(itertools.chain.from_iterable(task_definitions))
697
+
698
+ # Find all tasks that should run during this pipeline step.
699
+ tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
700
+ for task in tasks_to_run:
701
+ self.run_stage(task)
702
+
703
+ def run_master_stages(self):
704
+ """Generate any missing master frames
705
+
706
+ Steps:
707
+ * set all_tasks to be all tasks for when == "setup.master.bias"
708
+ * loop over all currently unfiltered sessions
709
+ * for each session loop across all_tasks
710
+ * if task input.type == the imagetyp for this current session
711
+ * add_input_to_context() add the input files to the context (from the session)
712
+ * run_stage(task) to generate the new master frame
713
+ """
714
+ sessions = self.search_session()
715
+ for session in sessions:
716
+ try:
717
+ imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
718
+ logging.debug(
719
+ f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
720
+ )
721
+
722
+ sorted_pipeline = self._get_stages("master-stages")
723
+
724
+ # 4. Iterate through the sorted pipeline and execute the associated tasks.
725
+ # FIXME unify the master vs normal step running code
726
+ for step in sorted_pipeline:
727
+ step_name = step.get("name")
728
+ if not step_name:
729
+ raise ValueError(
730
+ "Invalid pipeline step found: missing 'name' key."
731
+ )
732
+
733
+ # 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
734
+ task_definitions = self.repo_manager.merged.getall("stage")
735
+ all_tasks = list(itertools.chain.from_iterable(task_definitions))
736
+
737
+ # Find all tasks that should run during this step
738
+ tasks_to_run = [
739
+ task for task in all_tasks if task.get("when") == step_name
740
+ ]
741
+
742
+ for task in tasks_to_run:
743
+ input_config = task.get("input", {})
744
+ input_type = input_config.get("type")
745
+ if not input_type:
746
+ raise ValueError(
747
+ f"Task for step '{step_name}' missing required input.type"
748
+ )
749
+ if self.aliases.equals(input_type, imagetyp):
750
+ logging.debug(
751
+ f"Running {step_name} task for imagetyp '{imagetyp}'"
752
+ )
753
+
754
+ # Create a default process dir in /tmp, though more advanced 'session' based workflows will
755
+ # probably override this and place it somewhere persistent.
756
+ with tempfile.TemporaryDirectory(
757
+ prefix="session_tmp_"
758
+ ) as temp_dir:
759
+ logging.debug(
760
+ f"Created temporary session directory: {temp_dir}"
761
+ )
762
+ self.init_context()
763
+ self.context["process_dir"] = temp_dir
764
+ self.add_session_to_context(session)
765
+ self.run_stage(task)
766
+ except RuntimeError as e:
767
+ logging.error(
768
+ f"Skipping session {session[get_column_name(Database.ID_KEY)]}: {e}"
769
+ )
770
+
771
+ def init_context(self) -> None:
385
772
  """Do common session init"""
386
773
 
387
774
  # Context is preserved through all stages, so each stage can add new symbols to it for use by later stages
@@ -389,11 +776,199 @@ class Starbash:
389
776
 
390
777
  # Update the context with runtime values.
391
778
  runtime_context = {
392
- "process_dir": "/workspaces/starbash/images/process", # FIXME - create/find this more correctly per session
393
- "masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
779
+ # "masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
394
780
  }
395
781
  self.context.update(runtime_context)
396
782
 
783
+ def add_session_to_context(self, session: SessionRow) -> None:
784
+ """adds to context from the indicated session:
785
+ * instrument - for the session
786
+ * date - the localtimezone date of the session
787
+ * imagetyp - the imagetyp of the session
788
+ * session - the current session row (joined with a typical image) (can be used to
789
+ find things like telescope, temperature ...)
790
+ * session_config - a short human readable description of the session - suitable for logs or filenames
791
+ """
792
+ # it is okay to give them the actual session row, because we're never using it again
793
+ self.context["session"] = session
794
+
795
+ instrument = session.get(get_column_name(Database.TELESCOP_KEY))
796
+ if instrument:
797
+ self.context["instrument"] = instrument
798
+
799
+ imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
800
+ if imagetyp:
801
+ imagetyp = self.aliases.normalize(imagetyp)
802
+ self.context["imagetyp"] = imagetyp
803
+
804
+ # add a short human readable description of the session - suitable for logs or in filenames
805
+ session_config = f"{imagetyp}"
806
+
807
+ metadata = session.get("metadata", {})
808
+ filter = metadata.get(Database.FILTER_KEY)
809
+ if (imagetyp == "flat" or imagetyp == "light") and filter:
810
+ # we only care about filters in these cases
811
+ session_config += f"_{filter}"
812
+ if imagetyp == "dark":
813
+ exptime = session.get(get_column_name(Database.EXPTIME_KEY))
814
+ if exptime:
815
+ session_config += f"_{int(float(exptime))}s"
816
+
817
+ self.context["session_config"] = session_config
818
+
819
+ date = session.get(get_column_name(Database.START_KEY))
820
+ if date:
821
+ self.context["date"] = to_shortdate(date)
822
+
823
+ def add_input_masters(self, stage: dict) -> None:
824
+ """based on input.masters add the correct master frames as context.master.<type> filepaths"""
825
+ session = self.context.get("session")
826
+ assert session is not None, "context.session should have been already set"
827
+
828
+ input_config = stage.get("input", {})
829
+ master_types: list[str] = input_config.get("masters", [])
830
+ for master_type in master_types:
831
+ masters = self.get_master_images(
832
+ imagetyp=master_type, reference_session=session
833
+ )
834
+ if not masters:
835
+ raise RuntimeError(
836
+ f"No master frames of type '{master_type}' found for stage '{stage.get('name')}'"
837
+ )
838
+
839
+ context_master = self.context.setdefault("master", {})
840
+
841
+ if len(masters) > 1:
842
+ logging.debug(
843
+ f"Multiple ({len(masters)}) master frames of type '{master_type}' found, using first. FIXME."
844
+ )
845
+
846
+ # Try to rank the images by desirability
847
+ masters = self.score_candidates(masters, session)
848
+
849
+ self._add_image_abspath(masters[0]) # make sure abspath is populated
850
+ selected_master = masters[0]["abspath"]
851
+ logging.info(f"For master '{master_type}', using: {selected_master}")
852
+
853
+ context_master[master_type] = selected_master
854
+
855
+ def add_input_files(self, stage: dict) -> None:
856
+ """adds to context.input_files based on the stage input config"""
857
+ input_config = stage.get("input")
858
+ input_required = 0
859
+ if input_config:
860
+ # if there is an "input" dict, we assume input.required is true if unset
861
+ input_required = input_config.get("required", 0)
862
+ source = input_config.get("source")
863
+ if source is None:
864
+ raise ValueError(
865
+ f"Stage '{stage.get('name')}' has invalid 'input' configuration: missing 'source'"
866
+ )
867
+ if source == "path":
868
+ # The path might contain context variables that need to be expanded.
869
+ # path_pattern = expand_context(input_config["path"], context)
870
+ path_pattern = input_config["path"]
871
+ input_files = glob.glob(path_pattern, recursive=True)
872
+
873
+ self.context["input_files"] = (
874
+ input_files # Pass in the file list via the context dict
875
+ )
876
+ elif source == "repo":
877
+ # Get images for this session (by pulling from repo)
878
+ session = self.context.get("session")
879
+ assert (
880
+ session is not None
881
+ ), "context.session should have been already set"
882
+
883
+ images = self.get_session_images(session)
884
+ logging.debug(f"Using {len(images)} files as input_files")
885
+ self.context["input_files"] = [
886
+ img["abspath"] for img in images
887
+ ] # Pass in the file list via the context dict
888
+ else:
889
+ raise ValueError(
890
+ f"Stage '{stage.get('name')}' has invalid 'input' source: {source}"
891
+ )
892
+
893
+ # FIXME compare context.output to see if it already exists and is newer than the input files, if so skip processing
894
+ else:
895
+ # The script doesn't mention input, therefore assume it doesn't want input_files
896
+ if "input_files" in self.context:
897
+ del self.context["input_files"]
898
+
899
+ if input_required and len(self.context.get("input_files", [])) < input_required:
900
+ raise RuntimeError(f"Stage requires at least {input_required} input files")
901
+
902
+ def add_output_path(self, stage: dict) -> None:
903
+ """Adds output path information to context based on the stage output config.
904
+
905
+ Sets the following context variables:
906
+ - context.output.root_path - base path of the destination repo
907
+ - context.output.base_path - full path without file extension
908
+ - context.output.suffix - file extension (e.g., .fits or .fit.gz)
909
+ - context.output.full_path - complete output file path
910
+ - context.output.repo - the destination Repo (if applicable)
911
+ """
912
+ output_config = stage.get("output")
913
+ if not output_config:
914
+ # No output configuration, remove any existing output from context
915
+ if "output" in self.context:
916
+ del self.context["output"]
917
+ return
918
+
919
+ dest = output_config.get("dest")
920
+ if not dest:
921
+ raise ValueError(
922
+ f"Stage '{stage.get('description', 'unknown')}' has 'output' config but missing 'dest'"
923
+ )
924
+
925
+ if dest == "repo":
926
+ # Find the destination repo by type/kind
927
+ output_type = output_config.get("type")
928
+ if not output_type:
929
+ raise ValueError(
930
+ f"Stage '{stage.get('description', 'unknown')}' has output.dest='repo' but missing 'type'"
931
+ )
932
+
933
+ # Find the repo with matching kind
934
+ dest_repo = self.repo_manager.get_repo_by_kind(output_type)
935
+ if not dest_repo:
936
+ raise ValueError(
937
+ f"No repository found with kind '{output_type}' for output destination"
938
+ )
939
+
940
+ repo_base = dest_repo.get_path()
941
+ if not repo_base:
942
+ raise ValueError(f"Repository '{dest_repo.url}' has no filesystem path")
943
+
944
+ repo_relative: str | None = dest_repo.get("repo.relative")
945
+ if not repo_relative:
946
+ raise ValueError(
947
+ f"Repository '{dest_repo.url}' is missing 'repo.relative' configuration"
948
+ )
949
+
950
+ # we support context variables in the relative path
951
+ repo_relative = expand_context_unsafe(repo_relative, self.context)
952
+ full_path = repo_base / repo_relative
953
+
954
+ # base_path but without spaces - because Siril doesn't like that
955
+ full_path = Path(str(full_path).replace(" ", r"_"))
956
+
957
+ base_path = full_path.parent / full_path.stem
958
+
959
+ # Set context variables as documented in the TOML
960
+ self.context["output"] = {
961
+ # "root_path": repo_relative, not needed I think
962
+ "base_path": base_path,
963
+ # "suffix": full_path.suffix, not needed I think
964
+ "full_path": full_path,
965
+ "repo": dest_repo,
966
+ }
967
+ else:
968
+ raise ValueError(
969
+ f"Unsupported output destination type: {dest}. Only 'repo' is currently supported."
970
+ )
971
+
397
972
  def run_stage(self, stage: dict) -> None:
398
973
  """
399
974
  Executes a single processing stage.
@@ -410,17 +985,29 @@ class Starbash:
410
985
 
411
986
  logging.info(f"Running stage: {stage_desc}")
412
987
 
413
- tool_name = stage.get("tool")
414
- if not tool_name:
988
+ tool_dict = stage.get("tool")
989
+ if not tool_dict:
415
990
  raise ValueError(
416
991
  f"Stage '{stage.get('name')}' is missing a 'tool' definition."
417
992
  )
418
- tool: Tool | None = tools.get(tool_name)
993
+ tool_name = tool_dict.get("name")
994
+ if not tool_name:
995
+ raise ValueError(
996
+ f"Stage '{stage.get('name')}' is missing a 'tool.name' definition."
997
+ )
998
+ tool = tools.get(tool_name)
419
999
  if not tool:
420
1000
  raise ValueError(
421
1001
  f"Tool '{tool_name}' for stage '{stage.get('name')}' not found."
422
1002
  )
423
1003
  logging.debug(f" Using tool: {tool_name}")
1004
+ tool.set_defaults()
1005
+
1006
+ # Allow stage to override tool timeout if specified
1007
+ tool_timeout = tool_dict.get("timeout")
1008
+ if tool_timeout is not None:
1009
+ tool.timeout = float(tool_timeout)
1010
+ logging.debug(f"Using tool timeout: {tool.timeout} seconds")
424
1011
 
425
1012
  script_filename = stage.get("script-file", tool.default_script_file)
426
1013
  if script_filename:
@@ -435,30 +1022,32 @@ class Starbash:
435
1022
  )
436
1023
 
437
1024
  # This allows recipe TOML to define their own default variables.
1025
+ # (apply all of the changes to context that the task demands)
438
1026
  stage_context = stage.get("context", {})
439
1027
  self.context.update(stage_context)
1028
+ self.add_input_files(stage)
1029
+ self.add_input_masters(stage)
1030
+ self.add_output_path(stage)
440
1031
 
441
- # Assume no files for this stage
442
- if "input_files" in self.context:
443
- del self.context["input_files"]
1032
+ # if the output path already exists and is newer than all input files, skip processing
1033
+ output_info: dict | None = self.context.get("output")
1034
+ if output_info:
1035
+ output_path = output_info.get("full_path")
444
1036
 
445
- input_files = []
446
- input_config = stage.get("input")
447
- input_required = False
448
- if input_config:
449
- # if there is an "input" dict, we assume input.required is true if unset
450
- input_required = input_config.get("required", True)
451
- if "path" in input_config:
452
- # The path might contain context variables that need to be expanded.
453
- # path_pattern = expand_context(input_config["path"], context)
454
- path_pattern = input_config["path"]
455
- input_files = glob.glob(path_pattern, recursive=True)
1037
+ if output_path and os.path.exists(output_path):
1038
+ logging.info(
1039
+ f"Output file already exists, skipping processing: {output_path}"
1040
+ )
1041
+ return
456
1042
 
457
- self.context["input_files"] = (
458
- input_files # Pass in the file list via the context dict
459
- )
1043
+ tool.run_in_temp_dir(script, context=self.context)
460
1044
 
461
- if input_required and not input_files:
462
- raise RuntimeError("No input files found for stage")
1045
+ # verify context.output was created if it was specified
1046
+ output_info: dict | None = self.context.get("output")
1047
+ if output_info:
1048
+ output_path = output_info.get("full_path")
463
1049
 
464
- tool.run_in_temp_dir(script, context=self.context)
1050
+ if not output_path or not os.path.exists(output_path):
1051
+ raise RuntimeError(f"Expected output file not found: {output_path}")
1052
+ else:
1053
+ self.add_image_to_db(output_info["repo"], Path(output_path), force=True)