starbash 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of starbash might be problematic. Click here for more details.
- repo/__init__.py +2 -1
- repo/manager.py +31 -268
- repo/repo.py +294 -0
- starbash/__init__.py +20 -0
- starbash/aliases.py +100 -0
- starbash/analytics.py +4 -0
- starbash/app.py +740 -151
- starbash/commands/__init__.py +0 -17
- starbash/commands/info.py +72 -3
- starbash/commands/process.py +154 -0
- starbash/commands/repo.py +185 -78
- starbash/commands/select.py +135 -44
- starbash/database.py +397 -155
- starbash/defaults/starbash.toml +35 -0
- starbash/main.py +4 -1
- starbash/paths.py +18 -2
- starbash/recipes/master_bias/starbash.toml +32 -19
- starbash/recipes/master_dark/starbash.toml +36 -0
- starbash/recipes/master_flat/starbash.toml +27 -17
- starbash/recipes/osc_dual_duo/starbash.py +1 -5
- starbash/recipes/osc_dual_duo/starbash.toml +8 -4
- starbash/recipes/osc_single_duo/starbash.toml +4 -4
- starbash/recipes/starbash.toml +28 -3
- starbash/selection.py +115 -46
- starbash/templates/repo/master.toml +13 -0
- starbash/templates/repo/processed.toml +10 -0
- starbash/templates/userconfig.toml +1 -1
- starbash/toml.py +29 -0
- starbash/tool.py +199 -67
- {starbash-0.1.8.dist-info → starbash-0.1.10.dist-info}/METADATA +20 -13
- starbash-0.1.10.dist-info/RECORD +40 -0
- starbash-0.1.8.dist-info/RECORD +0 -33
- {starbash-0.1.8.dist-info → starbash-0.1.10.dist-info}/WHEEL +0 -0
- {starbash-0.1.8.dist-info → starbash-0.1.10.dist-info}/entry_points.txt +0 -0
- {starbash-0.1.8.dist-info → starbash-0.1.10.dist-info}/licenses/LICENSE +0 -0
starbash/app.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
import cmd
|
|
1
2
|
import logging
|
|
2
3
|
from importlib import resources
|
|
4
|
+
import os
|
|
3
5
|
from pathlib import Path
|
|
6
|
+
import tempfile
|
|
4
7
|
import typer
|
|
5
8
|
import tomlkit
|
|
6
9
|
from tomlkit.toml_file import TOMLFile
|
|
@@ -11,16 +14,21 @@ import itertools
|
|
|
11
14
|
from rich.progress import track
|
|
12
15
|
from rich.logging import RichHandler
|
|
13
16
|
import shutil
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
import rich.console
|
|
19
|
+
import copy
|
|
14
20
|
|
|
15
21
|
import starbash
|
|
16
|
-
from starbash import console, _is_test_env
|
|
17
|
-
from starbash.
|
|
18
|
-
from
|
|
19
|
-
from
|
|
22
|
+
from starbash import console, _is_test_env, to_shortdate
|
|
23
|
+
from starbash.aliases import Aliases
|
|
24
|
+
from starbash.database import Database, SessionRow, ImageRow, get_column_name
|
|
25
|
+
from repo import Repo, repo_suffix
|
|
26
|
+
from starbash.toml import toml_from_template
|
|
27
|
+
from starbash.tool import Tool, expand_context, expand_context_unsafe
|
|
20
28
|
from repo import RepoManager
|
|
21
29
|
from starbash.tool import tools
|
|
22
30
|
from starbash.paths import get_user_config_dir, get_user_data_dir
|
|
23
|
-
from starbash.selection import Selection
|
|
31
|
+
from starbash.selection import Selection, where_tuple
|
|
24
32
|
from starbash.analytics import (
|
|
25
33
|
NopAnalytics,
|
|
26
34
|
analytics_exception,
|
|
@@ -29,12 +37,17 @@ from starbash.analytics import (
|
|
|
29
37
|
analytics_start_transaction,
|
|
30
38
|
)
|
|
31
39
|
|
|
40
|
+
# Type aliases for better documentation
|
|
32
41
|
|
|
33
|
-
|
|
42
|
+
|
|
43
|
+
def setup_logging(stderr: bool = False):
|
|
34
44
|
"""
|
|
35
45
|
Configures basic logging.
|
|
36
46
|
"""
|
|
37
|
-
|
|
47
|
+
console = rich.console.Console(stderr=stderr)
|
|
48
|
+
handlers = (
|
|
49
|
+
[RichHandler(console=console, rich_tracebacks=True)] if not _is_test_env else []
|
|
50
|
+
)
|
|
38
51
|
logging.basicConfig(
|
|
39
52
|
level=starbash.log_filter_level, # use the global log filter level
|
|
40
53
|
format="%(message)s",
|
|
@@ -46,26 +59,24 @@ def setup_logging():
|
|
|
46
59
|
def get_user_config_path() -> Path:
|
|
47
60
|
"""Returns the path to the user config file."""
|
|
48
61
|
config_dir = get_user_config_dir()
|
|
49
|
-
return config_dir /
|
|
62
|
+
return config_dir / repo_suffix
|
|
50
63
|
|
|
51
64
|
|
|
52
65
|
def create_user() -> Path:
|
|
53
66
|
"""Create user directories if they don't exist yet."""
|
|
54
67
|
path = get_user_config_path()
|
|
55
68
|
if not path.exists():
|
|
56
|
-
|
|
57
|
-
resources.files("starbash")
|
|
58
|
-
.joinpath("templates/userconfig.toml")
|
|
59
|
-
.read_text()
|
|
60
|
-
)
|
|
61
|
-
toml = tomlkit.parse(tomlstr)
|
|
62
|
-
TOMLFile(path).write(toml)
|
|
69
|
+
toml_from_template("userconfig", path)
|
|
63
70
|
logging.info(f"Created user config file: {path}")
|
|
64
71
|
return get_user_config_dir()
|
|
65
72
|
|
|
66
73
|
|
|
67
|
-
def copy_images_to_dir(images: list[
|
|
68
|
-
"""Copy images to the specified output directory (using symbolic links if possible).
|
|
74
|
+
def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
|
|
75
|
+
"""Copy images to the specified output directory (using symbolic links if possible).
|
|
76
|
+
|
|
77
|
+
This function requires that "abspath" already be populated in each ImageRow. Normally
|
|
78
|
+
the caller does this by calling Starbash._add_image_abspath() on the image.
|
|
79
|
+
"""
|
|
69
80
|
|
|
70
81
|
# Export images
|
|
71
82
|
console.print(f"[cyan]Exporting {len(images)} images to {output_dir}...[/cyan]")
|
|
@@ -76,7 +87,7 @@ def copy_images_to_dir(images: list[dict[str, Any]], output_dir: Path) -> None:
|
|
|
76
87
|
|
|
77
88
|
for image in images:
|
|
78
89
|
# Get the source path from the image metadata
|
|
79
|
-
source_path = Path(image.get("
|
|
90
|
+
source_path = Path(image.get("abspath", ""))
|
|
80
91
|
|
|
81
92
|
if not source_path.exists():
|
|
82
93
|
console.print(f"[red]Warning: Source file not found: {source_path}[/red]")
|
|
@@ -116,21 +127,38 @@ def copy_images_to_dir(images: list[dict[str, Any]], output_dir: Path) -> None:
|
|
|
116
127
|
class Starbash:
|
|
117
128
|
"""The main Starbash application class."""
|
|
118
129
|
|
|
119
|
-
def __init__(self, cmd: str = "unspecified"):
|
|
130
|
+
def __init__(self, cmd: str = "unspecified", stderr_logging: bool = False):
|
|
120
131
|
"""
|
|
121
132
|
Initializes the Starbash application by loading configurations
|
|
122
133
|
and setting up the repository manager.
|
|
123
134
|
"""
|
|
124
|
-
setup_logging()
|
|
135
|
+
setup_logging(stderr=stderr_logging)
|
|
125
136
|
logging.info("Starbash starting...")
|
|
126
137
|
|
|
127
138
|
# Load app defaults and initialize the repository manager
|
|
139
|
+
self._init_repos()
|
|
140
|
+
self._init_analytics(cmd)
|
|
141
|
+
self._init_aliases()
|
|
142
|
+
|
|
143
|
+
logging.info(
|
|
144
|
+
f"Repo manager initialized with {len(self.repo_manager.repos)} repos."
|
|
145
|
+
)
|
|
146
|
+
# self.repo_manager.dump()
|
|
147
|
+
|
|
148
|
+
self._db = None # Lazy initialization - only create when accessed
|
|
149
|
+
|
|
150
|
+
# Initialize selection state (stored in user config repo)
|
|
151
|
+
self.selection = Selection(self.user_repo)
|
|
152
|
+
|
|
153
|
+
def _init_repos(self) -> None:
|
|
154
|
+
"""Initialize all repositories managed by the RepoManager."""
|
|
128
155
|
self.repo_manager = RepoManager()
|
|
129
156
|
self.repo_manager.add_repo("pkg://defaults")
|
|
130
157
|
|
|
131
158
|
# Add user prefs as a repo
|
|
132
159
|
self.user_repo = self.repo_manager.add_repo("file://" + str(create_user()))
|
|
133
160
|
|
|
161
|
+
def _init_analytics(self, cmd: str) -> None:
|
|
134
162
|
self.analytics = NopAnalytics()
|
|
135
163
|
if self.user_repo.get("analytics.enabled", True):
|
|
136
164
|
include_user = self.user_repo.get("analytics.include_user", False)
|
|
@@ -144,28 +172,41 @@ class Starbash:
|
|
|
144
172
|
self.analytics = analytics_start_transaction(name="App session", op=cmd)
|
|
145
173
|
self.analytics.__enter__()
|
|
146
174
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
175
|
+
def _init_aliases(self) -> None:
|
|
176
|
+
alias_dict = self.repo_manager.get("aliases", {})
|
|
177
|
+
assert isinstance(alias_dict, dict), "Aliases config must be a dictionary"
|
|
178
|
+
self.aliases = Aliases(alias_dict)
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def db(self) -> Database:
|
|
182
|
+
"""Lazy initialization of database - only created as needed."""
|
|
183
|
+
if self._db is None:
|
|
184
|
+
self._db = Database()
|
|
185
|
+
# Ensure all repos are registered in the database
|
|
186
|
+
self.repo_db_update()
|
|
187
|
+
return self._db
|
|
188
|
+
|
|
189
|
+
def repo_db_update(self) -> None:
|
|
190
|
+
"""Update the database with all managed repositories.
|
|
191
|
+
|
|
192
|
+
Iterates over all repos in the RepoManager and ensures each one
|
|
193
|
+
has a record in the repos table. This is called during lazy database
|
|
194
|
+
initialization to prepare repo_id values for image insertion.
|
|
195
|
+
"""
|
|
196
|
+
if self._db is None:
|
|
197
|
+
return
|
|
159
198
|
|
|
160
|
-
|
|
161
|
-
|
|
199
|
+
for repo in self.repo_manager.repos:
|
|
200
|
+
self._db.upsert_repo(repo.url)
|
|
201
|
+
logging.debug(f"Registered repo in database: {repo.url}")
|
|
162
202
|
|
|
163
203
|
# --- Lifecycle ---
|
|
164
204
|
def close(self) -> None:
|
|
165
205
|
self.analytics.__exit__(None, None, None)
|
|
166
206
|
|
|
167
207
|
analytics_shutdown()
|
|
168
|
-
self.
|
|
208
|
+
if self._db is not None:
|
|
209
|
+
self._db.close()
|
|
169
210
|
|
|
170
211
|
# Context manager support
|
|
171
212
|
def __enter__(self) -> "Starbash":
|
|
@@ -179,39 +220,249 @@ class Starbash:
|
|
|
179
220
|
self.close()
|
|
180
221
|
return handled
|
|
181
222
|
|
|
182
|
-
def _add_session(self,
|
|
183
|
-
|
|
223
|
+
def _add_session(self, image_doc_id: int, header: dict) -> None:
|
|
224
|
+
"""We just added a new image, create or update its session entry as needed."""
|
|
184
225
|
image_type = header.get(Database.IMAGETYP_KEY)
|
|
185
226
|
date = header.get(Database.DATE_OBS_KEY)
|
|
186
227
|
if not date or not image_type:
|
|
187
228
|
logging.warning(
|
|
188
|
-
"Image %s missing either DATE-OBS or IMAGETYP FITS header, skipping...",
|
|
189
|
-
|
|
229
|
+
"Image '%s' missing either DATE-OBS or IMAGETYP FITS header, skipping...",
|
|
230
|
+
header.get("path", "unspecified"),
|
|
190
231
|
)
|
|
191
232
|
else:
|
|
192
233
|
exptime = header.get(Database.EXPTIME_KEY, 0)
|
|
193
|
-
|
|
234
|
+
|
|
194
235
|
new = {
|
|
195
|
-
Database.
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
Database.
|
|
200
|
-
Database.
|
|
201
|
-
Database.
|
|
202
|
-
Database.
|
|
203
|
-
Database.
|
|
236
|
+
get_column_name(Database.START_KEY): date,
|
|
237
|
+
get_column_name(
|
|
238
|
+
Database.END_KEY
|
|
239
|
+
): date, # FIXME not quite correct, should be longer by exptime
|
|
240
|
+
get_column_name(Database.IMAGE_DOC_KEY): image_doc_id,
|
|
241
|
+
get_column_name(Database.IMAGETYP_KEY): image_type,
|
|
242
|
+
get_column_name(Database.NUM_IMAGES_KEY): 1,
|
|
243
|
+
get_column_name(Database.EXPTIME_TOTAL_KEY): exptime,
|
|
244
|
+
get_column_name(Database.EXPTIME_KEY): exptime,
|
|
204
245
|
}
|
|
246
|
+
|
|
247
|
+
filter = header.get(Database.FILTER_KEY)
|
|
248
|
+
if filter:
|
|
249
|
+
new[get_column_name(Database.FILTER_KEY)] = filter
|
|
250
|
+
|
|
251
|
+
telescop = header.get(Database.TELESCOP_KEY)
|
|
252
|
+
if telescop:
|
|
253
|
+
new[get_column_name(Database.TELESCOP_KEY)] = telescop
|
|
254
|
+
|
|
255
|
+
obj = header.get(Database.OBJECT_KEY)
|
|
256
|
+
if obj:
|
|
257
|
+
new[get_column_name(Database.OBJECT_KEY)] = obj
|
|
258
|
+
|
|
205
259
|
session = self.db.get_session(new)
|
|
206
260
|
self.db.upsert_session(new, existing=session)
|
|
207
261
|
|
|
208
|
-
def
|
|
262
|
+
def guess_sessions(
|
|
263
|
+
self, ref_session: SessionRow, want_type: str
|
|
264
|
+
) -> list[SessionRow]:
|
|
265
|
+
"""Given a particular session type (i.e. FLAT or BIAS etc...) and an
|
|
266
|
+
existing session (which is assumed to generally be a LIGHT frame based session):
|
|
267
|
+
|
|
268
|
+
Return a list of possible sessions which would be acceptable. The more desirable
|
|
269
|
+
matches are first in the list. Possibly in the future I might have a 'score' and reason
|
|
270
|
+
given for each ranking.
|
|
271
|
+
|
|
272
|
+
The following critera MUST match to be acceptable:
|
|
273
|
+
* matches requested imagetyp.
|
|
274
|
+
* same filter as reference session (in the case want_type==FLAT only)
|
|
275
|
+
* same telescope as reference session
|
|
276
|
+
|
|
277
|
+
Quality is determined by (most important first):
|
|
278
|
+
* temperature of CCD-TEMP is closer to the reference session
|
|
279
|
+
* smaller DATE-OBS delta to the reference session
|
|
280
|
+
|
|
281
|
+
Eventually the code will check the following for 'nice to have' (but not now):
|
|
282
|
+
* TBD
|
|
283
|
+
|
|
284
|
+
Possibly eventually this code could be moved into recipes.
|
|
285
|
+
|
|
286
|
+
"""
|
|
287
|
+
# Get reference image to access CCD-TEMP and DATE-OBS
|
|
288
|
+
|
|
289
|
+
# Build search conditions - MUST match criteria
|
|
290
|
+
conditions = {
|
|
291
|
+
Database.IMAGETYP_KEY: want_type,
|
|
292
|
+
Database.TELESCOP_KEY: ref_session[get_column_name(Database.TELESCOP_KEY)],
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
# For FLAT frames, filter must match the reference session
|
|
296
|
+
if want_type.upper() == "FLAT":
|
|
297
|
+
conditions[Database.FILTER_KEY] = ref_session[
|
|
298
|
+
get_column_name(Database.FILTER_KEY)
|
|
299
|
+
]
|
|
300
|
+
|
|
301
|
+
# Search for candidate sessions
|
|
302
|
+
candidates = self.db.search_session(where_tuple(conditions))
|
|
303
|
+
|
|
304
|
+
return self.score_candidates(candidates, ref_session)
|
|
305
|
+
|
|
306
|
+
def score_candidates(
|
|
307
|
+
self, candidates: list[dict[str, Any]], ref_session: SessionRow
|
|
308
|
+
) -> list[SessionRow]:
|
|
309
|
+
"""Given a list of images or sessions, try to rank that list by desirability.
|
|
310
|
+
|
|
311
|
+
Return a list of possible images/sessions which would be acceptable. The more desirable
|
|
312
|
+
matches are first in the list. Possibly in the future I might have a 'score' and reason
|
|
313
|
+
given for each ranking.
|
|
314
|
+
|
|
315
|
+
The following critera MUST match to be acceptable:
|
|
316
|
+
* matches requested imagetyp.
|
|
317
|
+
* same filter as reference session (in the case want_type==FLAT only)
|
|
318
|
+
* same telescope as reference session
|
|
319
|
+
|
|
320
|
+
Quality is determined by (most important first):
|
|
321
|
+
* temperature of CCD-TEMP is closer to the reference session
|
|
322
|
+
* smaller DATE-OBS delta to the reference session
|
|
323
|
+
|
|
324
|
+
Eventually the code will check the following for 'nice to have' (but not now):
|
|
325
|
+
* TBD
|
|
326
|
+
|
|
327
|
+
Possibly eventually this code could be moved into recipes.
|
|
328
|
+
|
|
329
|
+
"""
|
|
330
|
+
|
|
331
|
+
metadata: dict = ref_session.get("metadata", {})
|
|
332
|
+
ref_temp = metadata.get("CCD-TEMP", None)
|
|
333
|
+
ref_date_str = metadata.get(Database.DATE_OBS_KEY)
|
|
334
|
+
|
|
335
|
+
# Now score and sort the candidates
|
|
336
|
+
scored_candidates = []
|
|
337
|
+
|
|
338
|
+
for candidate in candidates:
|
|
339
|
+
score = 0.0
|
|
340
|
+
|
|
341
|
+
# Get candidate image metadata to access CCD-TEMP and DATE-OBS
|
|
342
|
+
try:
|
|
343
|
+
candidate_image = candidate.get("metadata", {})
|
|
344
|
+
|
|
345
|
+
# Score by CCD-TEMP difference (most important)
|
|
346
|
+
# Lower temperature difference = better score
|
|
347
|
+
if ref_temp is not None:
|
|
348
|
+
candidate_temp = candidate_image.get("CCD-TEMP")
|
|
349
|
+
if candidate_temp is not None:
|
|
350
|
+
try:
|
|
351
|
+
temp_diff = abs(float(ref_temp) - float(candidate_temp))
|
|
352
|
+
# Use exponential decay: closer temps get much better scores
|
|
353
|
+
# Perfect match (0°C diff) = 1000, 1°C diff ≈ 368, 2°C diff ≈ 135
|
|
354
|
+
score += 1000 * (2.718 ** (-temp_diff))
|
|
355
|
+
except (ValueError, TypeError):
|
|
356
|
+
# If we can't parse temps, give a neutral score
|
|
357
|
+
score += 0
|
|
358
|
+
|
|
359
|
+
# Parse reference date for time delta calculations
|
|
360
|
+
candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
|
|
361
|
+
if ref_date_str and candidate_date_str:
|
|
362
|
+
try:
|
|
363
|
+
ref_date = datetime.fromisoformat(ref_date_str)
|
|
364
|
+
candidate_date = datetime.fromisoformat(candidate_date_str)
|
|
365
|
+
time_delta = abs((ref_date - candidate_date).total_seconds())
|
|
366
|
+
# Closer in time = better score
|
|
367
|
+
# Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
|
|
368
|
+
# Using 7-day half-life
|
|
369
|
+
score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
|
|
370
|
+
except (ValueError, TypeError):
|
|
371
|
+
logging.warning(f"Malformed date - ignoring entry")
|
|
372
|
+
|
|
373
|
+
scored_candidates.append((score, candidate))
|
|
374
|
+
|
|
375
|
+
except (AssertionError, KeyError) as e:
|
|
376
|
+
# If we can't get the session image, log and skip this candidate
|
|
377
|
+
logging.warning(
|
|
378
|
+
f"Could not score candidate session {candidate.get('id')}: {e}"
|
|
379
|
+
)
|
|
380
|
+
continue
|
|
381
|
+
|
|
382
|
+
# Sort by score (highest first)
|
|
383
|
+
scored_candidates.sort(key=lambda x: x[0], reverse=True)
|
|
384
|
+
|
|
385
|
+
return [candidate for _, candidate in scored_candidates]
|
|
386
|
+
|
|
387
|
+
def search_session(self) -> list[SessionRow]:
|
|
209
388
|
"""Search for sessions, optionally filtered by the current selection."""
|
|
210
389
|
# Get query conditions from selection
|
|
211
390
|
conditions = self.selection.get_query_conditions()
|
|
212
391
|
return self.db.search_session(conditions)
|
|
213
392
|
|
|
214
|
-
def
|
|
393
|
+
def _add_image_abspath(self, image: ImageRow) -> ImageRow:
|
|
394
|
+
"""Reconstruct absolute path from image row containing repo_url and relative path.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
image: Image record with 'repo_url' and 'path' (relative) fields
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
Modified image record with 'abspath' as absolute path
|
|
401
|
+
"""
|
|
402
|
+
if not image.get("abspath"):
|
|
403
|
+
repo_url = image.get(Database.REPO_URL_KEY)
|
|
404
|
+
relative_path = image.get("path")
|
|
405
|
+
|
|
406
|
+
if repo_url and relative_path:
|
|
407
|
+
repo = self.repo_manager.get_repo_by_url(repo_url)
|
|
408
|
+
if repo:
|
|
409
|
+
absolute_path = repo.resolve_path(relative_path)
|
|
410
|
+
image["abspath"] = str(absolute_path)
|
|
411
|
+
|
|
412
|
+
return image
|
|
413
|
+
|
|
414
|
+
def get_session_image(self, session: SessionRow) -> ImageRow:
|
|
415
|
+
"""
|
|
416
|
+
Get the reference ImageRow for a session with absolute path.
|
|
417
|
+
"""
|
|
418
|
+
from starbash.database import SearchCondition
|
|
419
|
+
|
|
420
|
+
images = self.db.search_image(
|
|
421
|
+
[
|
|
422
|
+
SearchCondition(
|
|
423
|
+
"i.id", "=", session[get_column_name(Database.IMAGE_DOC_KEY)]
|
|
424
|
+
)
|
|
425
|
+
]
|
|
426
|
+
)
|
|
427
|
+
assert (
|
|
428
|
+
len(images) == 1
|
|
429
|
+
), f"Expected exactly one reference for session, found {len(images)}"
|
|
430
|
+
return self._add_image_abspath(images[0])
|
|
431
|
+
|
|
432
|
+
def get_master_images(
|
|
433
|
+
self, imagetyp: str | None = None, reference_session: SessionRow | None = None
|
|
434
|
+
) -> list[ImageRow]:
|
|
435
|
+
"""Return a list of the specified master imagetyp (bias, flat etc...)
|
|
436
|
+
(or any type if not specified).
|
|
437
|
+
|
|
438
|
+
The first image will be the 'best' remaining entries progressively worse matches.
|
|
439
|
+
|
|
440
|
+
(the following is not yet implemented)
|
|
441
|
+
If reference_session is provided it will be used to refine the search as follows:
|
|
442
|
+
* The telescope must match
|
|
443
|
+
* The image resolutions and binnings must match
|
|
444
|
+
* The filter must match (for FLAT frames only)
|
|
445
|
+
* Preferably the master date_obs would be either before or slightly after (<24 hrs) the reference session start time
|
|
446
|
+
* Preferably the master date_obs should be the closest in date to the reference session start time
|
|
447
|
+
* The camera temperature should be as close as possible to the reference session camera temperature
|
|
448
|
+
"""
|
|
449
|
+
master_repo = self.repo_manager.get_repo_by_kind("master")
|
|
450
|
+
|
|
451
|
+
if master_repo is None:
|
|
452
|
+
logging.warning("No master repo configured - skipping master frame load.")
|
|
453
|
+
return []
|
|
454
|
+
|
|
455
|
+
# Search for images in the master repo only
|
|
456
|
+
from starbash.database import SearchCondition
|
|
457
|
+
|
|
458
|
+
search_conditions = [SearchCondition("r.url", "=", master_repo.url)]
|
|
459
|
+
if imagetyp:
|
|
460
|
+
search_conditions.append(SearchCondition("i.imagetyp", "=", imagetyp))
|
|
461
|
+
|
|
462
|
+
images = self.db.search_image(search_conditions)
|
|
463
|
+
return images
|
|
464
|
+
|
|
465
|
+
def get_session_images(self, session: SessionRow) -> list[ImageRow]:
|
|
215
466
|
"""
|
|
216
467
|
Get all images belonging to a specific session.
|
|
217
468
|
|
|
@@ -229,24 +480,52 @@ class Starbash:
|
|
|
229
480
|
Raises:
|
|
230
481
|
ValueError: If session_id is not found in the database
|
|
231
482
|
"""
|
|
232
|
-
|
|
233
|
-
session = self.db.get_session_by_id(session_id)
|
|
234
|
-
if session is None:
|
|
235
|
-
raise ValueError(f"Session with id {session_id} not found")
|
|
483
|
+
from starbash.database import SearchCondition
|
|
236
484
|
|
|
237
485
|
# Query images that match ALL session criteria including date range
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
486
|
+
# Note: We need to search JSON metadata for FILTER, IMAGETYP, OBJECT, TELESCOP
|
|
487
|
+
# since they're not indexed columns in the images table
|
|
488
|
+
conditions = [
|
|
489
|
+
SearchCondition(
|
|
490
|
+
"i.date_obs", ">=", session[get_column_name(Database.START_KEY)]
|
|
491
|
+
),
|
|
492
|
+
SearchCondition(
|
|
493
|
+
"i.date_obs", "<=", session[get_column_name(Database.END_KEY)]
|
|
494
|
+
),
|
|
495
|
+
SearchCondition(
|
|
496
|
+
"i.imagetyp", "=", session[get_column_name(Database.IMAGETYP_KEY)]
|
|
497
|
+
),
|
|
498
|
+
]
|
|
499
|
+
|
|
500
|
+
# we never want to return 'master' images as part of the session image paths
|
|
501
|
+
# (because we will be passing these tool siril or whatever to generate masters or
|
|
502
|
+
# some other downstream image)
|
|
503
|
+
master_repo = self.repo_manager.get_repo_by_kind("master")
|
|
504
|
+
if master_repo is not None:
|
|
505
|
+
conditions.append(SearchCondition("r.url", "<>", master_repo.url))
|
|
506
|
+
|
|
507
|
+
# Single query with indexed date conditions
|
|
248
508
|
images = self.db.search_image(conditions)
|
|
249
|
-
|
|
509
|
+
|
|
510
|
+
# We no lognger filter by target(object) because it might not be set anyways
|
|
511
|
+
filtered_images = []
|
|
512
|
+
for img in images:
|
|
513
|
+
if (
|
|
514
|
+
img.get(Database.FILTER_KEY)
|
|
515
|
+
== session[get_column_name(Database.FILTER_KEY)]
|
|
516
|
+
# and img.get(Database.OBJECT_KEY)
|
|
517
|
+
# == session[get_column_name(Database.OBJECT_KEY)]
|
|
518
|
+
and img.get(Database.TELESCOP_KEY)
|
|
519
|
+
== session[get_column_name(Database.TELESCOP_KEY)]
|
|
520
|
+
):
|
|
521
|
+
filtered_images.append(img)
|
|
522
|
+
|
|
523
|
+
# Reconstruct absolute paths for all images
|
|
524
|
+
return (
|
|
525
|
+
[self._add_image_abspath(img) for img in filtered_images]
|
|
526
|
+
if filtered_images
|
|
527
|
+
else []
|
|
528
|
+
)
|
|
250
529
|
|
|
251
530
|
def remove_repo_ref(self, url: str) -> None:
|
|
252
531
|
"""
|
|
@@ -258,6 +537,8 @@ class Starbash:
|
|
|
258
537
|
Raises:
|
|
259
538
|
ValueError: If the repository URL is not found in user configuration
|
|
260
539
|
"""
|
|
540
|
+
self.db.remove_repo(url)
|
|
541
|
+
|
|
261
542
|
# Get the repo-ref list from user config
|
|
262
543
|
repo_refs = self.user_repo.config.get("repo-ref")
|
|
263
544
|
|
|
@@ -272,6 +553,7 @@ class Starbash:
|
|
|
272
553
|
# Match by converting to file:// URL format if needed
|
|
273
554
|
if ref_dir == url or f"file://{ref_dir}" == url:
|
|
274
555
|
repo_refs.remove(ref)
|
|
556
|
+
|
|
275
557
|
found = True
|
|
276
558
|
break
|
|
277
559
|
|
|
@@ -281,20 +563,76 @@ class Starbash:
|
|
|
281
563
|
# Write the updated config
|
|
282
564
|
self.user_repo.write_config()
|
|
283
565
|
|
|
284
|
-
def
|
|
566
|
+
def add_image_to_db(self, repo: Repo, f: Path, force: bool = False) -> None:
|
|
567
|
+
"""Read FITS header from file and add/update image entry in the database."""
|
|
568
|
+
|
|
569
|
+
path = repo.get_path()
|
|
570
|
+
if not path:
|
|
571
|
+
raise ValueError(f"Repo path not found for {repo}")
|
|
572
|
+
|
|
573
|
+
whitelist = None
|
|
574
|
+
config = self.repo_manager.merged.get("config")
|
|
575
|
+
if config:
|
|
576
|
+
whitelist = config.get("fits-whitelist", None)
|
|
577
|
+
|
|
578
|
+
try:
|
|
579
|
+
# Convert absolute path to relative path within repo
|
|
580
|
+
relative_path = f.relative_to(path)
|
|
581
|
+
|
|
582
|
+
found = self.db.get_image(repo.url, str(relative_path))
|
|
583
|
+
|
|
584
|
+
# for debugging sometimes we want to limit scanning to a single directory or file
|
|
585
|
+
# debug_target = "masters-raw/2025-09-09/DARK"
|
|
586
|
+
debug_target = None
|
|
587
|
+
if debug_target:
|
|
588
|
+
if str(relative_path).startswith(debug_target):
|
|
589
|
+
logging.error("Debugging %s...", f)
|
|
590
|
+
found = False
|
|
591
|
+
else:
|
|
592
|
+
found = True # skip processing
|
|
593
|
+
force = False
|
|
594
|
+
|
|
595
|
+
if not found or force:
|
|
596
|
+
# Read and log the primary header (HDU 0)
|
|
597
|
+
with fits.open(str(f), memmap=False) as hdul:
|
|
598
|
+
# convert headers to dict
|
|
599
|
+
hdu0: Any = hdul[0]
|
|
600
|
+
header = hdu0.header
|
|
601
|
+
if type(header).__name__ == "Unknown":
|
|
602
|
+
raise ValueError("FITS header has Unknown type: %s", f)
|
|
603
|
+
|
|
604
|
+
items = header.items()
|
|
605
|
+
headers = {}
|
|
606
|
+
for key, value in items:
|
|
607
|
+
if (not whitelist) or (key in whitelist):
|
|
608
|
+
headers[key] = value
|
|
609
|
+
logging.debug("Headers for %s: %s", f, headers)
|
|
610
|
+
# Store relative path in database
|
|
611
|
+
headers["path"] = str(relative_path)
|
|
612
|
+
image_doc_id = self.db.upsert_image(headers, repo.url)
|
|
613
|
+
|
|
614
|
+
if not found:
|
|
615
|
+
# Update the session infos, but ONLY on first file scan
|
|
616
|
+
# (otherwise invariants will get messed up)
|
|
617
|
+
self._add_session(image_doc_id, header)
|
|
618
|
+
|
|
619
|
+
except Exception as e:
|
|
620
|
+
logging.warning("Failed to read FITS header for %s: %s", f, e)
|
|
621
|
+
|
|
622
|
+
def reindex_repo(self, repo: Repo, force: bool = False, subdir: str | None = None):
|
|
285
623
|
"""Reindex all repositories managed by the RepoManager."""
|
|
286
|
-
# FIXME, add a method to get just the repos that contain images
|
|
287
|
-
if repo.is_scheme("file") and repo.kind != "recipe":
|
|
288
|
-
logging.debug("Reindexing %s...", repo.url)
|
|
289
624
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
625
|
+
# make sure this new repo is listed in the repos table
|
|
626
|
+
self.repo_db_update() # not really ideal, a more optimal version would just add the new repo
|
|
627
|
+
|
|
628
|
+
path = repo.get_path()
|
|
294
629
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
630
|
+
if path and repo.is_scheme("file") and repo.kind != "recipe":
|
|
631
|
+
logging.debug("Reindexing %s...", repo.url)
|
|
632
|
+
|
|
633
|
+
if subdir:
|
|
634
|
+
path = path / subdir
|
|
635
|
+
# used to debug
|
|
298
636
|
|
|
299
637
|
# Find all FITS files under this repo path
|
|
300
638
|
for f in track(
|
|
@@ -302,33 +640,7 @@ class Starbash:
|
|
|
302
640
|
description=f"Indexing {repo.url}...",
|
|
303
641
|
):
|
|
304
642
|
# progress.console.print(f"Indexing {f}...")
|
|
305
|
-
|
|
306
|
-
found = self.db.get_image(str(f))
|
|
307
|
-
if not found or force:
|
|
308
|
-
# Read and log the primary header (HDU 0)
|
|
309
|
-
with fits.open(str(f), memmap=False) as hdul:
|
|
310
|
-
# convert headers to dict
|
|
311
|
-
hdu0: Any = hdul[0]
|
|
312
|
-
header = hdu0.header
|
|
313
|
-
if type(header).__name__ == "Unknown":
|
|
314
|
-
raise ValueError("FITS header has Unknown type: %s", f)
|
|
315
|
-
|
|
316
|
-
items = header.items()
|
|
317
|
-
headers = {}
|
|
318
|
-
for key, value in items:
|
|
319
|
-
if (not whitelist) or (key in whitelist):
|
|
320
|
-
headers[key] = value
|
|
321
|
-
logging.debug("Headers for %s: %s", f, headers)
|
|
322
|
-
headers["path"] = str(f)
|
|
323
|
-
image_doc_id = self.db.upsert_image(headers)
|
|
324
|
-
|
|
325
|
-
if not found:
|
|
326
|
-
# Update the session infos, but ONLY on first file scan
|
|
327
|
-
# (otherwise invariants will get messed up)
|
|
328
|
-
self._add_session(str(f), image_doc_id, header)
|
|
329
|
-
|
|
330
|
-
except Exception as e:
|
|
331
|
-
logging.warning("Failed to read FITS header for %s: %s", f, e)
|
|
643
|
+
self.add_image_to_db(repo, f, force=force)
|
|
332
644
|
|
|
333
645
|
def reindex_repos(self, force: bool = False):
|
|
334
646
|
"""Reindex all repositories managed by the RepoManager."""
|
|
@@ -337,16 +649,14 @@ class Starbash:
|
|
|
337
649
|
for repo in track(self.repo_manager.repos, description="Reindexing repos..."):
|
|
338
650
|
self.reindex_repo(repo, force=force)
|
|
339
651
|
|
|
340
|
-
def
|
|
341
|
-
"""
|
|
342
|
-
self.run_all_stages()
|
|
343
|
-
|
|
344
|
-
def run_all_stages(self):
|
|
345
|
-
"""On the currently active session, run all processing stages"""
|
|
346
|
-
logging.info("--- Running all stages ---")
|
|
652
|
+
def _get_stages(self, name: str) -> list[dict[str, Any]]:
|
|
653
|
+
"""Get all pipeline stages defined in the merged configuration.
|
|
347
654
|
|
|
655
|
+
Returns:
|
|
656
|
+
List of stage definitions (dictionaries with 'name' and 'priority')
|
|
657
|
+
"""
|
|
348
658
|
# 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
|
|
349
|
-
pipeline_definitions = self.repo_manager.merged.getall(
|
|
659
|
+
pipeline_definitions = self.repo_manager.merged.getall(name)
|
|
350
660
|
flat_pipeline_steps = list(itertools.chain.from_iterable(pipeline_definitions))
|
|
351
661
|
|
|
352
662
|
# 2. Sort the pipeline steps by their 'priority' field.
|
|
@@ -358,30 +668,107 @@ class Starbash:
|
|
|
358
668
|
f"invalid stage definition: a stage is missing the required 'priority' key"
|
|
359
669
|
) from e
|
|
360
670
|
|
|
361
|
-
|
|
362
|
-
task_definitions = self.repo_manager.merged.getall("stage")
|
|
363
|
-
all_tasks = list(itertools.chain.from_iterable(task_definitions))
|
|
364
|
-
|
|
365
|
-
logging.info(
|
|
671
|
+
logging.debug(
|
|
366
672
|
f"Found {len(sorted_pipeline)} pipeline steps to run in order of priority."
|
|
367
673
|
)
|
|
674
|
+
return sorted_pipeline
|
|
675
|
+
|
|
676
|
+
def run_all_stages(self):
|
|
677
|
+
"""On the currently active session, run all processing stages"""
|
|
678
|
+
logging.info("--- Running all stages ---")
|
|
368
679
|
|
|
369
|
-
|
|
680
|
+
# 1. Get all pipeline definitions (the `[[stages]]` tables with name and priority).
|
|
681
|
+
sorted_pipeline = self._get_stages("stages")
|
|
682
|
+
|
|
683
|
+
self.init_context()
|
|
370
684
|
# 4. Iterate through the sorted pipeline and execute the associated tasks.
|
|
371
685
|
for step in sorted_pipeline:
|
|
372
686
|
step_name = step.get("name")
|
|
373
687
|
if not step_name:
|
|
374
688
|
raise ValueError("Invalid pipeline step found: missing 'name' key.")
|
|
689
|
+
self.run_pipeline_step(step_name)
|
|
375
690
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
)
|
|
379
|
-
# Find all tasks that should run during this pipeline step.
|
|
380
|
-
tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
|
|
381
|
-
for task in tasks_to_run:
|
|
382
|
-
self.run_stage(task)
|
|
691
|
+
def run_pipeline_step(self, step_name: str):
|
|
692
|
+
logging.info(f"--- Running pipeline step: '{step_name}' ---")
|
|
383
693
|
|
|
384
|
-
|
|
694
|
+
# 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
|
|
695
|
+
task_definitions = self.repo_manager.merged.getall("stage")
|
|
696
|
+
all_tasks = list(itertools.chain.from_iterable(task_definitions))
|
|
697
|
+
|
|
698
|
+
# Find all tasks that should run during this pipeline step.
|
|
699
|
+
tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
|
|
700
|
+
for task in tasks_to_run:
|
|
701
|
+
self.run_stage(task)
|
|
702
|
+
|
|
703
|
+
def run_master_stages(self):
|
|
704
|
+
"""Generate any missing master frames
|
|
705
|
+
|
|
706
|
+
Steps:
|
|
707
|
+
* set all_tasks to be all tasks for when == "setup.master.bias"
|
|
708
|
+
* loop over all currently unfiltered sessions
|
|
709
|
+
* for each session loop across all_tasks
|
|
710
|
+
* if task input.type == the imagetyp for this current session
|
|
711
|
+
* add_input_to_context() add the input files to the context (from the session)
|
|
712
|
+
* run_stage(task) to generate the new master frame
|
|
713
|
+
"""
|
|
714
|
+
sessions = self.search_session()
|
|
715
|
+
for session in sessions:
|
|
716
|
+
try:
|
|
717
|
+
imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
|
|
718
|
+
logging.debug(
|
|
719
|
+
f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
sorted_pipeline = self._get_stages("master-stages")
|
|
723
|
+
|
|
724
|
+
# 4. Iterate through the sorted pipeline and execute the associated tasks.
|
|
725
|
+
# FIXME unify the master vs normal step running code
|
|
726
|
+
for step in sorted_pipeline:
|
|
727
|
+
step_name = step.get("name")
|
|
728
|
+
if not step_name:
|
|
729
|
+
raise ValueError(
|
|
730
|
+
"Invalid pipeline step found: missing 'name' key."
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
# 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
|
|
734
|
+
task_definitions = self.repo_manager.merged.getall("stage")
|
|
735
|
+
all_tasks = list(itertools.chain.from_iterable(task_definitions))
|
|
736
|
+
|
|
737
|
+
# Find all tasks that should run during this step
|
|
738
|
+
tasks_to_run = [
|
|
739
|
+
task for task in all_tasks if task.get("when") == step_name
|
|
740
|
+
]
|
|
741
|
+
|
|
742
|
+
for task in tasks_to_run:
|
|
743
|
+
input_config = task.get("input", {})
|
|
744
|
+
input_type = input_config.get("type")
|
|
745
|
+
if not input_type:
|
|
746
|
+
raise ValueError(
|
|
747
|
+
f"Task for step '{step_name}' missing required input.type"
|
|
748
|
+
)
|
|
749
|
+
if self.aliases.equals(input_type, imagetyp):
|
|
750
|
+
logging.debug(
|
|
751
|
+
f"Running {step_name} task for imagetyp '{imagetyp}'"
|
|
752
|
+
)
|
|
753
|
+
|
|
754
|
+
# Create a default process dir in /tmp, though more advanced 'session' based workflows will
|
|
755
|
+
# probably override this and place it somewhere persistent.
|
|
756
|
+
with tempfile.TemporaryDirectory(
|
|
757
|
+
prefix="session_tmp_"
|
|
758
|
+
) as temp_dir:
|
|
759
|
+
logging.debug(
|
|
760
|
+
f"Created temporary session directory: {temp_dir}"
|
|
761
|
+
)
|
|
762
|
+
self.init_context()
|
|
763
|
+
self.context["process_dir"] = temp_dir
|
|
764
|
+
self.add_session_to_context(session)
|
|
765
|
+
self.run_stage(task)
|
|
766
|
+
except RuntimeError as e:
|
|
767
|
+
logging.error(
|
|
768
|
+
f"Skipping session {session[get_column_name(Database.ID_KEY)]}: {e}"
|
|
769
|
+
)
|
|
770
|
+
|
|
771
|
+
def init_context(self) -> None:
|
|
385
772
|
"""Do common session init"""
|
|
386
773
|
|
|
387
774
|
# Context is preserved through all stages, so each stage can add new symbols to it for use by later stages
|
|
@@ -389,11 +776,199 @@ class Starbash:
|
|
|
389
776
|
|
|
390
777
|
# Update the context with runtime values.
|
|
391
778
|
runtime_context = {
|
|
392
|
-
"
|
|
393
|
-
"masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
|
|
779
|
+
# "masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
|
|
394
780
|
}
|
|
395
781
|
self.context.update(runtime_context)
|
|
396
782
|
|
|
783
|
+
def add_session_to_context(self, session: SessionRow) -> None:
|
|
784
|
+
"""adds to context from the indicated session:
|
|
785
|
+
* instrument - for the session
|
|
786
|
+
* date - the localtimezone date of the session
|
|
787
|
+
* imagetyp - the imagetyp of the session
|
|
788
|
+
* session - the current session row (joined with a typical image) (can be used to
|
|
789
|
+
find things like telescope, temperature ...)
|
|
790
|
+
* session_config - a short human readable description of the session - suitable for logs or filenames
|
|
791
|
+
"""
|
|
792
|
+
# it is okay to give them the actual session row, because we're never using it again
|
|
793
|
+
self.context["session"] = session
|
|
794
|
+
|
|
795
|
+
instrument = session.get(get_column_name(Database.TELESCOP_KEY))
|
|
796
|
+
if instrument:
|
|
797
|
+
self.context["instrument"] = instrument
|
|
798
|
+
|
|
799
|
+
imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
|
|
800
|
+
if imagetyp:
|
|
801
|
+
imagetyp = self.aliases.normalize(imagetyp)
|
|
802
|
+
self.context["imagetyp"] = imagetyp
|
|
803
|
+
|
|
804
|
+
# add a short human readable description of the session - suitable for logs or in filenames
|
|
805
|
+
session_config = f"{imagetyp}"
|
|
806
|
+
|
|
807
|
+
metadata = session.get("metadata", {})
|
|
808
|
+
filter = metadata.get(Database.FILTER_KEY)
|
|
809
|
+
if (imagetyp == "flat" or imagetyp == "light") and filter:
|
|
810
|
+
# we only care about filters in these cases
|
|
811
|
+
session_config += f"_{filter}"
|
|
812
|
+
if imagetyp == "dark":
|
|
813
|
+
exptime = session.get(get_column_name(Database.EXPTIME_KEY))
|
|
814
|
+
if exptime:
|
|
815
|
+
session_config += f"_{int(float(exptime))}s"
|
|
816
|
+
|
|
817
|
+
self.context["session_config"] = session_config
|
|
818
|
+
|
|
819
|
+
date = session.get(get_column_name(Database.START_KEY))
|
|
820
|
+
if date:
|
|
821
|
+
self.context["date"] = to_shortdate(date)
|
|
822
|
+
|
|
823
|
+
def add_input_masters(self, stage: dict) -> None:
|
|
824
|
+
"""based on input.masters add the correct master frames as context.master.<type> filepaths"""
|
|
825
|
+
session = self.context.get("session")
|
|
826
|
+
assert session is not None, "context.session should have been already set"
|
|
827
|
+
|
|
828
|
+
input_config = stage.get("input", {})
|
|
829
|
+
master_types: list[str] = input_config.get("masters", [])
|
|
830
|
+
for master_type in master_types:
|
|
831
|
+
masters = self.get_master_images(
|
|
832
|
+
imagetyp=master_type, reference_session=session
|
|
833
|
+
)
|
|
834
|
+
if not masters:
|
|
835
|
+
raise RuntimeError(
|
|
836
|
+
f"No master frames of type '{master_type}' found for stage '{stage.get('name')}'"
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
context_master = self.context.setdefault("master", {})
|
|
840
|
+
|
|
841
|
+
if len(masters) > 1:
|
|
842
|
+
logging.debug(
|
|
843
|
+
f"Multiple ({len(masters)}) master frames of type '{master_type}' found, using first. FIXME."
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
# Try to rank the images by desirability
|
|
847
|
+
masters = self.score_candidates(masters, session)
|
|
848
|
+
|
|
849
|
+
self._add_image_abspath(masters[0]) # make sure abspath is populated
|
|
850
|
+
selected_master = masters[0]["abspath"]
|
|
851
|
+
logging.info(f"For master '{master_type}', using: {selected_master}")
|
|
852
|
+
|
|
853
|
+
context_master[master_type] = selected_master
|
|
854
|
+
|
|
855
|
+
def add_input_files(self, stage: dict) -> None:
|
|
856
|
+
"""adds to context.input_files based on the stage input config"""
|
|
857
|
+
input_config = stage.get("input")
|
|
858
|
+
input_required = 0
|
|
859
|
+
if input_config:
|
|
860
|
+
# if there is an "input" dict, we assume input.required is true if unset
|
|
861
|
+
input_required = input_config.get("required", 0)
|
|
862
|
+
source = input_config.get("source")
|
|
863
|
+
if source is None:
|
|
864
|
+
raise ValueError(
|
|
865
|
+
f"Stage '{stage.get('name')}' has invalid 'input' configuration: missing 'source'"
|
|
866
|
+
)
|
|
867
|
+
if source == "path":
|
|
868
|
+
# The path might contain context variables that need to be expanded.
|
|
869
|
+
# path_pattern = expand_context(input_config["path"], context)
|
|
870
|
+
path_pattern = input_config["path"]
|
|
871
|
+
input_files = glob.glob(path_pattern, recursive=True)
|
|
872
|
+
|
|
873
|
+
self.context["input_files"] = (
|
|
874
|
+
input_files # Pass in the file list via the context dict
|
|
875
|
+
)
|
|
876
|
+
elif source == "repo":
|
|
877
|
+
# Get images for this session (by pulling from repo)
|
|
878
|
+
session = self.context.get("session")
|
|
879
|
+
assert (
|
|
880
|
+
session is not None
|
|
881
|
+
), "context.session should have been already set"
|
|
882
|
+
|
|
883
|
+
images = self.get_session_images(session)
|
|
884
|
+
logging.debug(f"Using {len(images)} files as input_files")
|
|
885
|
+
self.context["input_files"] = [
|
|
886
|
+
img["abspath"] for img in images
|
|
887
|
+
] # Pass in the file list via the context dict
|
|
888
|
+
else:
|
|
889
|
+
raise ValueError(
|
|
890
|
+
f"Stage '{stage.get('name')}' has invalid 'input' source: {source}"
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
# FIXME compare context.output to see if it already exists and is newer than the input files, if so skip processing
|
|
894
|
+
else:
|
|
895
|
+
# The script doesn't mention input, therefore assume it doesn't want input_files
|
|
896
|
+
if "input_files" in self.context:
|
|
897
|
+
del self.context["input_files"]
|
|
898
|
+
|
|
899
|
+
if input_required and len(self.context.get("input_files", [])) < input_required:
|
|
900
|
+
raise RuntimeError(f"Stage requires at least {input_required} input files")
|
|
901
|
+
|
|
902
|
+
def add_output_path(self, stage: dict) -> None:
|
|
903
|
+
"""Adds output path information to context based on the stage output config.
|
|
904
|
+
|
|
905
|
+
Sets the following context variables:
|
|
906
|
+
- context.output.root_path - base path of the destination repo
|
|
907
|
+
- context.output.base_path - full path without file extension
|
|
908
|
+
- context.output.suffix - file extension (e.g., .fits or .fit.gz)
|
|
909
|
+
- context.output.full_path - complete output file path
|
|
910
|
+
- context.output.repo - the destination Repo (if applicable)
|
|
911
|
+
"""
|
|
912
|
+
output_config = stage.get("output")
|
|
913
|
+
if not output_config:
|
|
914
|
+
# No output configuration, remove any existing output from context
|
|
915
|
+
if "output" in self.context:
|
|
916
|
+
del self.context["output"]
|
|
917
|
+
return
|
|
918
|
+
|
|
919
|
+
dest = output_config.get("dest")
|
|
920
|
+
if not dest:
|
|
921
|
+
raise ValueError(
|
|
922
|
+
f"Stage '{stage.get('description', 'unknown')}' has 'output' config but missing 'dest'"
|
|
923
|
+
)
|
|
924
|
+
|
|
925
|
+
if dest == "repo":
|
|
926
|
+
# Find the destination repo by type/kind
|
|
927
|
+
output_type = output_config.get("type")
|
|
928
|
+
if not output_type:
|
|
929
|
+
raise ValueError(
|
|
930
|
+
f"Stage '{stage.get('description', 'unknown')}' has output.dest='repo' but missing 'type'"
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
# Find the repo with matching kind
|
|
934
|
+
dest_repo = self.repo_manager.get_repo_by_kind(output_type)
|
|
935
|
+
if not dest_repo:
|
|
936
|
+
raise ValueError(
|
|
937
|
+
f"No repository found with kind '{output_type}' for output destination"
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
repo_base = dest_repo.get_path()
|
|
941
|
+
if not repo_base:
|
|
942
|
+
raise ValueError(f"Repository '{dest_repo.url}' has no filesystem path")
|
|
943
|
+
|
|
944
|
+
repo_relative: str | None = dest_repo.get("repo.relative")
|
|
945
|
+
if not repo_relative:
|
|
946
|
+
raise ValueError(
|
|
947
|
+
f"Repository '{dest_repo.url}' is missing 'repo.relative' configuration"
|
|
948
|
+
)
|
|
949
|
+
|
|
950
|
+
# we support context variables in the relative path
|
|
951
|
+
repo_relative = expand_context_unsafe(repo_relative, self.context)
|
|
952
|
+
full_path = repo_base / repo_relative
|
|
953
|
+
|
|
954
|
+
# base_path but without spaces - because Siril doesn't like that
|
|
955
|
+
full_path = Path(str(full_path).replace(" ", r"_"))
|
|
956
|
+
|
|
957
|
+
base_path = full_path.parent / full_path.stem
|
|
958
|
+
|
|
959
|
+
# Set context variables as documented in the TOML
|
|
960
|
+
self.context["output"] = {
|
|
961
|
+
# "root_path": repo_relative, not needed I think
|
|
962
|
+
"base_path": base_path,
|
|
963
|
+
# "suffix": full_path.suffix, not needed I think
|
|
964
|
+
"full_path": full_path,
|
|
965
|
+
"repo": dest_repo,
|
|
966
|
+
}
|
|
967
|
+
else:
|
|
968
|
+
raise ValueError(
|
|
969
|
+
f"Unsupported output destination type: {dest}. Only 'repo' is currently supported."
|
|
970
|
+
)
|
|
971
|
+
|
|
397
972
|
def run_stage(self, stage: dict) -> None:
|
|
398
973
|
"""
|
|
399
974
|
Executes a single processing stage.
|
|
@@ -410,17 +985,29 @@ class Starbash:
|
|
|
410
985
|
|
|
411
986
|
logging.info(f"Running stage: {stage_desc}")
|
|
412
987
|
|
|
413
|
-
|
|
414
|
-
if not
|
|
988
|
+
tool_dict = stage.get("tool")
|
|
989
|
+
if not tool_dict:
|
|
415
990
|
raise ValueError(
|
|
416
991
|
f"Stage '{stage.get('name')}' is missing a 'tool' definition."
|
|
417
992
|
)
|
|
418
|
-
|
|
993
|
+
tool_name = tool_dict.get("name")
|
|
994
|
+
if not tool_name:
|
|
995
|
+
raise ValueError(
|
|
996
|
+
f"Stage '{stage.get('name')}' is missing a 'tool.name' definition."
|
|
997
|
+
)
|
|
998
|
+
tool = tools.get(tool_name)
|
|
419
999
|
if not tool:
|
|
420
1000
|
raise ValueError(
|
|
421
1001
|
f"Tool '{tool_name}' for stage '{stage.get('name')}' not found."
|
|
422
1002
|
)
|
|
423
1003
|
logging.debug(f" Using tool: {tool_name}")
|
|
1004
|
+
tool.set_defaults()
|
|
1005
|
+
|
|
1006
|
+
# Allow stage to override tool timeout if specified
|
|
1007
|
+
tool_timeout = tool_dict.get("timeout")
|
|
1008
|
+
if tool_timeout is not None:
|
|
1009
|
+
tool.timeout = float(tool_timeout)
|
|
1010
|
+
logging.debug(f"Using tool timeout: {tool.timeout} seconds")
|
|
424
1011
|
|
|
425
1012
|
script_filename = stage.get("script-file", tool.default_script_file)
|
|
426
1013
|
if script_filename:
|
|
@@ -435,30 +1022,32 @@ class Starbash:
|
|
|
435
1022
|
)
|
|
436
1023
|
|
|
437
1024
|
# This allows recipe TOML to define their own default variables.
|
|
1025
|
+
# (apply all of the changes to context that the task demands)
|
|
438
1026
|
stage_context = stage.get("context", {})
|
|
439
1027
|
self.context.update(stage_context)
|
|
1028
|
+
self.add_input_files(stage)
|
|
1029
|
+
self.add_input_masters(stage)
|
|
1030
|
+
self.add_output_path(stage)
|
|
440
1031
|
|
|
441
|
-
#
|
|
442
|
-
|
|
443
|
-
|
|
1032
|
+
# if the output path already exists and is newer than all input files, skip processing
|
|
1033
|
+
output_info: dict | None = self.context.get("output")
|
|
1034
|
+
if output_info:
|
|
1035
|
+
output_path = output_info.get("full_path")
|
|
444
1036
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
input_required = input_config.get("required", True)
|
|
451
|
-
if "path" in input_config:
|
|
452
|
-
# The path might contain context variables that need to be expanded.
|
|
453
|
-
# path_pattern = expand_context(input_config["path"], context)
|
|
454
|
-
path_pattern = input_config["path"]
|
|
455
|
-
input_files = glob.glob(path_pattern, recursive=True)
|
|
1037
|
+
if output_path and os.path.exists(output_path):
|
|
1038
|
+
logging.info(
|
|
1039
|
+
f"Output file already exists, skipping processing: {output_path}"
|
|
1040
|
+
)
|
|
1041
|
+
return
|
|
456
1042
|
|
|
457
|
-
|
|
458
|
-
input_files # Pass in the file list via the context dict
|
|
459
|
-
)
|
|
1043
|
+
tool.run_in_temp_dir(script, context=self.context)
|
|
460
1044
|
|
|
461
|
-
if
|
|
462
|
-
|
|
1045
|
+
# verify context.output was created if it was specified
|
|
1046
|
+
output_info: dict | None = self.context.get("output")
|
|
1047
|
+
if output_info:
|
|
1048
|
+
output_path = output_info.get("full_path")
|
|
463
1049
|
|
|
464
|
-
|
|
1050
|
+
if not output_path or not os.path.exists(output_path):
|
|
1051
|
+
raise RuntimeError(f"Expected output file not found: {output_path}")
|
|
1052
|
+
else:
|
|
1053
|
+
self.add_image_to_db(output_info["repo"], Path(output_path), force=True)
|