starbash 0.1.6__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {starbash/repo → repo}/__init__.py +2 -1
- repo/manager.py +144 -0
- starbash/repo/manager.py → repo/repo.py +29 -116
- starbash/__init__.py +28 -1
- starbash/analytics.py +6 -7
- starbash/app.py +449 -86
- starbash/commands/__init__.py +7 -0
- starbash/commands/info.py +53 -25
- starbash/commands/process.py +154 -0
- starbash/commands/repo.py +168 -77
- starbash/commands/select.py +157 -68
- starbash/database.py +252 -135
- starbash/defaults/starbash.toml +17 -0
- starbash/main.py +4 -1
- starbash/recipes/master_bias/starbash.toml +25 -8
- starbash/recipes/starbash.toml +5 -0
- starbash/selection.py +109 -45
- starbash/templates/repo/master.toml +13 -0
- starbash/templates/userconfig.toml +1 -1
- starbash/toml.py +29 -0
- starbash/tool.py +84 -12
- {starbash-0.1.6.dist-info → starbash-0.1.9.dist-info}/METADATA +37 -16
- starbash-0.1.9.dist-info/RECORD +37 -0
- starbash-0.1.6.dist-info/RECORD +0 -33
- {starbash-0.1.6.dist-info → starbash-0.1.9.dist-info}/WHEEL +0 -0
- {starbash-0.1.6.dist-info → starbash-0.1.9.dist-info}/entry_points.txt +0 -0
- {starbash-0.1.6.dist-info → starbash-0.1.9.dist-info}/licenses/LICENSE +0 -0
starbash/app.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from importlib import resources
|
|
3
|
+
import os
|
|
3
4
|
from pathlib import Path
|
|
5
|
+
import tempfile
|
|
4
6
|
import typer
|
|
5
7
|
import tomlkit
|
|
6
8
|
from tomlkit.toml_file import TOMLFile
|
|
@@ -11,16 +13,20 @@ import itertools
|
|
|
11
13
|
from rich.progress import track
|
|
12
14
|
from rich.logging import RichHandler
|
|
13
15
|
import shutil
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
import rich.console
|
|
18
|
+
import copy
|
|
14
19
|
|
|
15
20
|
import starbash
|
|
16
|
-
from starbash import console
|
|
17
|
-
from starbash.database import Database
|
|
18
|
-
from
|
|
19
|
-
from starbash.
|
|
20
|
-
from starbash.
|
|
21
|
+
from starbash import console, _is_test_env, to_shortdate
|
|
22
|
+
from starbash.database import Database, SessionRow, ImageRow, get_column_name
|
|
23
|
+
from repo import Repo, repo_suffix
|
|
24
|
+
from starbash.toml import toml_from_template
|
|
25
|
+
from starbash.tool import Tool, expand_context, expand_context_unsafe
|
|
26
|
+
from repo import RepoManager
|
|
21
27
|
from starbash.tool import tools
|
|
22
28
|
from starbash.paths import get_user_config_dir, get_user_data_dir
|
|
23
|
-
from starbash.selection import Selection
|
|
29
|
+
from starbash.selection import Selection, where_tuple
|
|
24
30
|
from starbash.analytics import (
|
|
25
31
|
NopAnalytics,
|
|
26
32
|
analytics_exception,
|
|
@@ -29,41 +35,41 @@ from starbash.analytics import (
|
|
|
29
35
|
analytics_start_transaction,
|
|
30
36
|
)
|
|
31
37
|
|
|
38
|
+
# Type aliases for better documentation
|
|
32
39
|
|
|
33
|
-
|
|
40
|
+
|
|
41
|
+
def setup_logging(stderr: bool = False):
|
|
34
42
|
"""
|
|
35
43
|
Configures basic logging.
|
|
36
44
|
"""
|
|
45
|
+
console = rich.console.Console(stderr=stderr)
|
|
46
|
+
handlers = (
|
|
47
|
+
[RichHandler(console=console, rich_tracebacks=True)] if not _is_test_env else []
|
|
48
|
+
)
|
|
37
49
|
logging.basicConfig(
|
|
38
50
|
level=starbash.log_filter_level, # use the global log filter level
|
|
39
51
|
format="%(message)s",
|
|
40
52
|
datefmt="[%X]",
|
|
41
|
-
handlers=
|
|
53
|
+
handlers=handlers,
|
|
42
54
|
)
|
|
43
55
|
|
|
44
56
|
|
|
45
57
|
def get_user_config_path() -> Path:
|
|
46
58
|
"""Returns the path to the user config file."""
|
|
47
59
|
config_dir = get_user_config_dir()
|
|
48
|
-
return config_dir /
|
|
60
|
+
return config_dir / repo_suffix
|
|
49
61
|
|
|
50
62
|
|
|
51
63
|
def create_user() -> Path:
|
|
52
64
|
"""Create user directories if they don't exist yet."""
|
|
53
65
|
path = get_user_config_path()
|
|
54
66
|
if not path.exists():
|
|
55
|
-
|
|
56
|
-
resources.files("starbash")
|
|
57
|
-
.joinpath("templates/userconfig.toml")
|
|
58
|
-
.read_text()
|
|
59
|
-
)
|
|
60
|
-
toml = tomlkit.parse(tomlstr)
|
|
61
|
-
TOMLFile(path).write(toml)
|
|
67
|
+
toml_from_template("userconfig", path)
|
|
62
68
|
logging.info(f"Created user config file: {path}")
|
|
63
69
|
return get_user_config_dir()
|
|
64
70
|
|
|
65
71
|
|
|
66
|
-
def copy_images_to_dir(images: list[
|
|
72
|
+
def copy_images_to_dir(images: list[ImageRow], output_dir: Path) -> None:
|
|
67
73
|
"""Copy images to the specified output directory (using symbolic links if possible)."""
|
|
68
74
|
|
|
69
75
|
# Export images
|
|
@@ -112,15 +118,23 @@ def copy_images_to_dir(images: list[dict[str, Any]], output_dir: Path) -> None:
|
|
|
112
118
|
console.print(f" [red]Errors: {error_count} files[/red]")
|
|
113
119
|
|
|
114
120
|
|
|
121
|
+
def imagetyp_equals(imagetyp1: str, imagetyp2: str) -> bool:
|
|
122
|
+
"""Imagetyps (BIAS, Dark, FLAT, flats) have a number of slightly different convetions.
|
|
123
|
+
Do a sloppy equality check.
|
|
124
|
+
|
|
125
|
+
Eventually handle non english variants by using the repos aliases table."""
|
|
126
|
+
return imagetyp1.strip().lower() == imagetyp2.strip().lower()
|
|
127
|
+
|
|
128
|
+
|
|
115
129
|
class Starbash:
|
|
116
130
|
"""The main Starbash application class."""
|
|
117
131
|
|
|
118
|
-
def __init__(self, cmd: str = "unspecified"):
|
|
132
|
+
def __init__(self, cmd: str = "unspecified", stderr_logging: bool = False):
|
|
119
133
|
"""
|
|
120
134
|
Initializes the Starbash application by loading configurations
|
|
121
135
|
and setting up the repository manager.
|
|
122
136
|
"""
|
|
123
|
-
setup_logging()
|
|
137
|
+
setup_logging(stderr=stderr_logging)
|
|
124
138
|
logging.info("Starbash starting...")
|
|
125
139
|
|
|
126
140
|
# Load app defaults and initialize the repository manager
|
|
@@ -148,23 +162,45 @@ class Starbash:
|
|
|
148
162
|
)
|
|
149
163
|
# self.repo_manager.dump()
|
|
150
164
|
|
|
151
|
-
self.
|
|
165
|
+
self._db = None # Lazy initialization - only create when accessed
|
|
152
166
|
self.session_query = None # None means search all sessions
|
|
153
167
|
|
|
154
|
-
# Initialize selection state
|
|
155
|
-
|
|
156
|
-
selection_file = data_dir / "selection.json"
|
|
157
|
-
self.selection = Selection(selection_file)
|
|
168
|
+
# Initialize selection state (stored in user config repo)
|
|
169
|
+
self.selection = Selection(self.user_repo)
|
|
158
170
|
|
|
159
171
|
# FIXME, call reindex somewhere and also index whenever new repos are added
|
|
160
172
|
# self.reindex_repos()
|
|
161
173
|
|
|
174
|
+
@property
|
|
175
|
+
def db(self) -> Database:
|
|
176
|
+
"""Lazy initialization of database - only created as needed."""
|
|
177
|
+
if self._db is None:
|
|
178
|
+
self._db = Database()
|
|
179
|
+
# Ensure all repos are registered in the database
|
|
180
|
+
self.repo_db_update()
|
|
181
|
+
return self._db
|
|
182
|
+
|
|
183
|
+
def repo_db_update(self) -> None:
|
|
184
|
+
"""Update the database with all managed repositories.
|
|
185
|
+
|
|
186
|
+
Iterates over all repos in the RepoManager and ensures each one
|
|
187
|
+
has a record in the repos table. This is called during lazy database
|
|
188
|
+
initialization to prepare repo_id values for image insertion.
|
|
189
|
+
"""
|
|
190
|
+
if self._db is None:
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
for repo in self.repo_manager.repos:
|
|
194
|
+
self._db.upsert_repo(repo.url)
|
|
195
|
+
logging.debug(f"Registered repo in database: {repo.url}")
|
|
196
|
+
|
|
162
197
|
# --- Lifecycle ---
|
|
163
198
|
def close(self) -> None:
|
|
164
199
|
self.analytics.__exit__(None, None, None)
|
|
165
200
|
|
|
166
201
|
analytics_shutdown()
|
|
167
|
-
self.
|
|
202
|
+
if self._db is not None:
|
|
203
|
+
self._db.close()
|
|
168
204
|
|
|
169
205
|
# Context manager support
|
|
170
206
|
def __enter__(self) -> "Starbash":
|
|
@@ -179,6 +215,7 @@ class Starbash:
|
|
|
179
215
|
return handled
|
|
180
216
|
|
|
181
217
|
def _add_session(self, f: str, image_doc_id: int, header: dict) -> None:
|
|
218
|
+
"""We just added a new image, create or update its session entry as needed."""
|
|
182
219
|
filter = header.get(Database.FILTER_KEY, "unspecified")
|
|
183
220
|
image_type = header.get(Database.IMAGETYP_KEY)
|
|
184
221
|
date = header.get(Database.DATE_OBS_KEY)
|
|
@@ -204,17 +241,154 @@ class Starbash:
|
|
|
204
241
|
session = self.db.get_session(new)
|
|
205
242
|
self.db.upsert_session(new, existing=session)
|
|
206
243
|
|
|
207
|
-
def
|
|
244
|
+
def guess_sessions(
|
|
245
|
+
self, ref_session: SessionRow, want_type: str
|
|
246
|
+
) -> list[SessionRow]:
|
|
247
|
+
"""Given a particular session type (i.e. FLAT or BIAS etc...) and an
|
|
248
|
+
existing session (which is assumed to generally be a LIGHT frame based session):
|
|
249
|
+
|
|
250
|
+
Return a list of possible sessions which would be acceptable. The more desirable
|
|
251
|
+
matches are first in the list. Possibly in the future I might have a 'score' and reason
|
|
252
|
+
given for each ranking.
|
|
253
|
+
|
|
254
|
+
The following critera MUST match to be acceptable:
|
|
255
|
+
* matches requested imagetyp.
|
|
256
|
+
* same filter as reference session (in the case want_type==FLAT only)
|
|
257
|
+
* same telescope as reference session
|
|
258
|
+
|
|
259
|
+
Quality is determined by (most important first):
|
|
260
|
+
* temperature of CCD-TEMP is closer to the reference session
|
|
261
|
+
* smaller DATE-OBS delta to the reference session
|
|
262
|
+
|
|
263
|
+
Eventually the code will check the following for 'nice to have' (but not now):
|
|
264
|
+
* TBD
|
|
265
|
+
|
|
266
|
+
Possibly eventually this code could be moved into recipes.
|
|
267
|
+
|
|
268
|
+
"""
|
|
269
|
+
# Get reference image to access CCD-TEMP and DATE-OBS
|
|
270
|
+
metadata: dict = ref_session.get("metadata", {})
|
|
271
|
+
ref_temp = metadata.get("CCD-TEMP", None)
|
|
272
|
+
ref_date_str = metadata.get(Database.DATE_OBS_KEY)
|
|
273
|
+
|
|
274
|
+
# Parse reference date for time delta calculations
|
|
275
|
+
ref_date = None
|
|
276
|
+
if ref_date_str:
|
|
277
|
+
try:
|
|
278
|
+
ref_date = datetime.fromisoformat(ref_date_str)
|
|
279
|
+
except (ValueError, TypeError):
|
|
280
|
+
logging.warning(f"Malformed session ref date: {ref_date_str}")
|
|
281
|
+
|
|
282
|
+
# Build search conditions - MUST match criteria
|
|
283
|
+
conditions = {
|
|
284
|
+
Database.IMAGETYP_KEY: want_type,
|
|
285
|
+
Database.TELESCOP_KEY: ref_session[get_column_name(Database.TELESCOP_KEY)],
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
# For FLAT frames, filter must match the reference session
|
|
289
|
+
if want_type.upper() == "FLAT":
|
|
290
|
+
conditions[Database.FILTER_KEY] = ref_session[
|
|
291
|
+
get_column_name(Database.FILTER_KEY)
|
|
292
|
+
]
|
|
293
|
+
|
|
294
|
+
# Search for candidate sessions
|
|
295
|
+
candidates = self.db.search_session(where_tuple(conditions))
|
|
296
|
+
|
|
297
|
+
# Now score and sort the candidates
|
|
298
|
+
scored_candidates = []
|
|
299
|
+
|
|
300
|
+
for candidate in candidates:
|
|
301
|
+
score = 0.0
|
|
302
|
+
|
|
303
|
+
# Get candidate image metadata to access CCD-TEMP and DATE-OBS
|
|
304
|
+
try:
|
|
305
|
+
candidate_image = candidate.get("metadata", {})
|
|
306
|
+
|
|
307
|
+
# Score by CCD-TEMP difference (most important)
|
|
308
|
+
# Lower temperature difference = better score
|
|
309
|
+
if ref_temp is not None:
|
|
310
|
+
candidate_temp = candidate_image.get("CCD-TEMP")
|
|
311
|
+
if candidate_temp is not None:
|
|
312
|
+
try:
|
|
313
|
+
temp_diff = abs(float(ref_temp) - float(candidate_temp))
|
|
314
|
+
# Use exponential decay: closer temps get much better scores
|
|
315
|
+
# Perfect match (0°C diff) = 1000, 1°C diff ≈ 368, 2°C diff ≈ 135
|
|
316
|
+
score += 1000 * (2.718 ** (-temp_diff))
|
|
317
|
+
except (ValueError, TypeError):
|
|
318
|
+
# If we can't parse temps, give a neutral score
|
|
319
|
+
score += 0
|
|
320
|
+
|
|
321
|
+
# Score by date/time proximity (secondary importance)
|
|
322
|
+
if ref_date is not None:
|
|
323
|
+
candidate_date_str = candidate_image.get(Database.DATE_OBS_KEY)
|
|
324
|
+
if candidate_date_str:
|
|
325
|
+
try:
|
|
326
|
+
candidate_date = datetime.fromisoformat(candidate_date_str)
|
|
327
|
+
time_delta = abs(
|
|
328
|
+
(ref_date - candidate_date).total_seconds()
|
|
329
|
+
)
|
|
330
|
+
# Closer in time = better score
|
|
331
|
+
# Same day ≈ 100, 7 days ≈ 37, 30 days ≈ 9
|
|
332
|
+
# Using 7-day half-life
|
|
333
|
+
score += 100 * (2.718 ** (-time_delta / (7 * 86400)))
|
|
334
|
+
except (ValueError, TypeError):
|
|
335
|
+
logging.warning(
|
|
336
|
+
f"Could not parse candidate date: {candidate_date_str}"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
scored_candidates.append((score, candidate))
|
|
340
|
+
|
|
341
|
+
except (AssertionError, KeyError) as e:
|
|
342
|
+
# If we can't get the session image, log and skip this candidate
|
|
343
|
+
logging.warning(
|
|
344
|
+
f"Could not score candidate session {candidate.get('id')}: {e}"
|
|
345
|
+
)
|
|
346
|
+
continue
|
|
347
|
+
|
|
348
|
+
# Sort by score (highest first) and return just the sessions
|
|
349
|
+
scored_candidates.sort(key=lambda x: x[0], reverse=True)
|
|
350
|
+
|
|
351
|
+
return [candidate for score, candidate in scored_candidates]
|
|
352
|
+
|
|
353
|
+
def search_session(self) -> list[SessionRow]:
|
|
208
354
|
"""Search for sessions, optionally filtered by the current selection."""
|
|
209
|
-
#
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
else:
|
|
213
|
-
# Get query conditions from selection
|
|
214
|
-
conditions = self.selection.get_query_conditions()
|
|
215
|
-
return self.db.search_session(conditions)
|
|
355
|
+
# Get query conditions from selection
|
|
356
|
+
conditions = self.selection.get_query_conditions()
|
|
357
|
+
return self.db.search_session(conditions)
|
|
216
358
|
|
|
217
|
-
def
|
|
359
|
+
def _reconstruct_image_path(self, image: ImageRow) -> ImageRow:
|
|
360
|
+
"""Reconstruct absolute path from image row containing repo_url and relative path.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
image: Image record with 'repo_url' and 'path' (relative) fields
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
Modified image record with 'path' as absolute path
|
|
367
|
+
"""
|
|
368
|
+
repo_url = image.get("repo_url")
|
|
369
|
+
relative_path = image.get("path")
|
|
370
|
+
|
|
371
|
+
if repo_url and relative_path:
|
|
372
|
+
repo = self.repo_manager.get_repo_by_url(repo_url)
|
|
373
|
+
if repo:
|
|
374
|
+
absolute_path = repo.resolve_path(relative_path)
|
|
375
|
+
image["path"] = str(absolute_path)
|
|
376
|
+
|
|
377
|
+
return image
|
|
378
|
+
|
|
379
|
+
def get_session_image(self, session: SessionRow) -> ImageRow:
|
|
380
|
+
"""
|
|
381
|
+
Get the reference ImageRow for a session with absolute path.
|
|
382
|
+
"""
|
|
383
|
+
images = self.db.search_image(
|
|
384
|
+
{Database.ID_KEY: session[get_column_name(Database.IMAGE_DOC_KEY)]}
|
|
385
|
+
)
|
|
386
|
+
assert (
|
|
387
|
+
len(images) == 1
|
|
388
|
+
), f"Expected exactly one reference for session, found {len(images)}"
|
|
389
|
+
return self._reconstruct_image_path(images[0])
|
|
390
|
+
|
|
391
|
+
def get_session_images(self, session: SessionRow) -> list[ImageRow]:
|
|
218
392
|
"""
|
|
219
393
|
Get all images belonging to a specific session.
|
|
220
394
|
|
|
@@ -232,24 +406,20 @@ class Starbash:
|
|
|
232
406
|
Raises:
|
|
233
407
|
ValueError: If session_id is not found in the database
|
|
234
408
|
"""
|
|
235
|
-
# First get the session details
|
|
236
|
-
session = self.db.get_session_by_id(session_id)
|
|
237
|
-
if session is None:
|
|
238
|
-
raise ValueError(f"Session with id {session_id} not found")
|
|
239
|
-
|
|
240
409
|
# Query images that match ALL session criteria including date range
|
|
241
410
|
conditions = {
|
|
242
|
-
Database.FILTER_KEY: session[Database.FILTER_KEY],
|
|
243
|
-
Database.IMAGETYP_KEY: session[Database.IMAGETYP_KEY],
|
|
244
|
-
Database.OBJECT_KEY: session[Database.OBJECT_KEY],
|
|
245
|
-
Database.TELESCOP_KEY: session[Database.TELESCOP_KEY],
|
|
246
|
-
"date_start": session[Database.START_KEY],
|
|
247
|
-
"date_end": session[Database.END_KEY],
|
|
411
|
+
Database.FILTER_KEY: session[get_column_name(Database.FILTER_KEY)],
|
|
412
|
+
Database.IMAGETYP_KEY: session[get_column_name(Database.IMAGETYP_KEY)],
|
|
413
|
+
Database.OBJECT_KEY: session[get_column_name(Database.OBJECT_KEY)],
|
|
414
|
+
Database.TELESCOP_KEY: session[get_column_name(Database.TELESCOP_KEY)],
|
|
415
|
+
"date_start": session[get_column_name(Database.START_KEY)],
|
|
416
|
+
"date_end": session[get_column_name(Database.END_KEY)],
|
|
248
417
|
}
|
|
249
418
|
|
|
250
419
|
# Single query with all conditions
|
|
251
420
|
images = self.db.search_image(conditions)
|
|
252
|
-
|
|
421
|
+
# Reconstruct absolute paths for all images
|
|
422
|
+
return [self._reconstruct_image_path(img) for img in images] if images else []
|
|
253
423
|
|
|
254
424
|
def remove_repo_ref(self, url: str) -> None:
|
|
255
425
|
"""
|
|
@@ -261,6 +431,8 @@ class Starbash:
|
|
|
261
431
|
Raises:
|
|
262
432
|
ValueError: If the repository URL is not found in user configuration
|
|
263
433
|
"""
|
|
434
|
+
self.db.remove_repo(url)
|
|
435
|
+
|
|
264
436
|
# Get the repo-ref list from user config
|
|
265
437
|
repo_refs = self.user_repo.config.get("repo-ref")
|
|
266
438
|
|
|
@@ -286,6 +458,10 @@ class Starbash:
|
|
|
286
458
|
|
|
287
459
|
def reindex_repo(self, repo: Repo, force: bool = False):
|
|
288
460
|
"""Reindex all repositories managed by the RepoManager."""
|
|
461
|
+
|
|
462
|
+
# make sure this new repo is listed in the repos table
|
|
463
|
+
self.repo_db_update() # not really ideal, a more optimal version would just add the new repo
|
|
464
|
+
|
|
289
465
|
# FIXME, add a method to get just the repos that contain images
|
|
290
466
|
if repo.is_scheme("file") and repo.kind != "recipe":
|
|
291
467
|
logging.debug("Reindexing %s...", repo.url)
|
|
@@ -306,7 +482,10 @@ class Starbash:
|
|
|
306
482
|
):
|
|
307
483
|
# progress.console.print(f"Indexing {f}...")
|
|
308
484
|
try:
|
|
309
|
-
|
|
485
|
+
# Convert absolute path to relative path within repo
|
|
486
|
+
relative_path = f.relative_to(path)
|
|
487
|
+
|
|
488
|
+
found = self.db.get_image(repo.url, str(relative_path))
|
|
310
489
|
if not found or force:
|
|
311
490
|
# Read and log the primary header (HDU 0)
|
|
312
491
|
with fits.open(str(f), memmap=False) as hdul:
|
|
@@ -322,8 +501,9 @@ class Starbash:
|
|
|
322
501
|
if (not whitelist) or (key in whitelist):
|
|
323
502
|
headers[key] = value
|
|
324
503
|
logging.debug("Headers for %s: %s", f, headers)
|
|
325
|
-
|
|
326
|
-
|
|
504
|
+
# Store relative path in database
|
|
505
|
+
headers["path"] = str(relative_path)
|
|
506
|
+
image_doc_id = self.db.upsert_image(headers, repo.url)
|
|
327
507
|
|
|
328
508
|
if not found:
|
|
329
509
|
# Update the session infos, but ONLY on first file scan
|
|
@@ -340,10 +520,6 @@ class Starbash:
|
|
|
340
520
|
for repo in track(self.repo_manager.repos, description="Reindexing repos..."):
|
|
341
521
|
self.reindex_repo(repo, force=force)
|
|
342
522
|
|
|
343
|
-
def test_processing(self):
|
|
344
|
-
"""A crude test of image processing pipeline - FIXME move into testing"""
|
|
345
|
-
self.run_all_stages()
|
|
346
|
-
|
|
347
523
|
def run_all_stages(self):
|
|
348
524
|
"""On the currently active session, run all processing stages"""
|
|
349
525
|
logging.info("--- Running all stages ---")
|
|
@@ -361,30 +537,77 @@ class Starbash:
|
|
|
361
537
|
f"invalid stage definition: a stage is missing the required 'priority' key"
|
|
362
538
|
) from e
|
|
363
539
|
|
|
364
|
-
# 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
|
|
365
|
-
task_definitions = self.repo_manager.merged.getall("stage")
|
|
366
|
-
all_tasks = list(itertools.chain.from_iterable(task_definitions))
|
|
367
|
-
|
|
368
540
|
logging.info(
|
|
369
541
|
f"Found {len(sorted_pipeline)} pipeline steps to run in order of priority."
|
|
370
542
|
)
|
|
371
543
|
|
|
372
|
-
self.
|
|
544
|
+
self.init_context()
|
|
373
545
|
# 4. Iterate through the sorted pipeline and execute the associated tasks.
|
|
374
546
|
for step in sorted_pipeline:
|
|
375
547
|
step_name = step.get("name")
|
|
376
548
|
if not step_name:
|
|
377
549
|
raise ValueError("Invalid pipeline step found: missing 'name' key.")
|
|
550
|
+
self.run_pipeline_step(step_name)
|
|
551
|
+
|
|
552
|
+
def run_pipeline_step(self, step_name: str):
|
|
553
|
+
logging.info(f"--- Running pipeline step: '{step_name}' ---")
|
|
378
554
|
|
|
379
|
-
|
|
380
|
-
|
|
555
|
+
# 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
|
|
556
|
+
task_definitions = self.repo_manager.merged.getall("stage")
|
|
557
|
+
all_tasks = list(itertools.chain.from_iterable(task_definitions))
|
|
558
|
+
|
|
559
|
+
# Find all tasks that should run during this pipeline step.
|
|
560
|
+
tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
|
|
561
|
+
for task in tasks_to_run:
|
|
562
|
+
self.run_stage(task)
|
|
563
|
+
|
|
564
|
+
def run_master_stages(self):
|
|
565
|
+
"""Generate any missing master frames
|
|
566
|
+
|
|
567
|
+
Steps:
|
|
568
|
+
* set all_tasks to be all tasks for when == "setup.masters"
|
|
569
|
+
* loop over all currently unfiltered sessions
|
|
570
|
+
* for each session loop across all_tasks
|
|
571
|
+
* if task input.type == the imagetyp for this current session
|
|
572
|
+
* add_input_to_context() add the input files to the context (from the session)
|
|
573
|
+
* run_stage(task) to generate the new master frame
|
|
574
|
+
"""
|
|
575
|
+
sessions = self.search_session()
|
|
576
|
+
for session in sessions:
|
|
577
|
+
imagetyp = session[get_column_name(Database.IMAGETYP_KEY)]
|
|
578
|
+
logging.debug(
|
|
579
|
+
f"Processing session ID {session[get_column_name(Database.ID_KEY)]} with imagetyp '{imagetyp}'"
|
|
381
580
|
)
|
|
382
|
-
# Find all tasks that should run during this pipeline step.
|
|
383
|
-
tasks_to_run = [task for task in all_tasks if task.get("when") == step_name]
|
|
384
|
-
for task in tasks_to_run:
|
|
385
|
-
self.run_stage(task)
|
|
386
581
|
|
|
387
|
-
|
|
582
|
+
# 3. Get all available task definitions (the `[[stage]]` tables with tool, script, when).
|
|
583
|
+
task_definitions = self.repo_manager.merged.getall("stage")
|
|
584
|
+
all_tasks = list(itertools.chain.from_iterable(task_definitions))
|
|
585
|
+
|
|
586
|
+
# Find all tasks that should run during the "setup.masters" step.
|
|
587
|
+
tasks_to_run = [
|
|
588
|
+
task for task in all_tasks if task.get("when") == "setup.masters"
|
|
589
|
+
]
|
|
590
|
+
|
|
591
|
+
for task in tasks_to_run:
|
|
592
|
+
input_config = task.get("input", {})
|
|
593
|
+
input_type = input_config.get("type")
|
|
594
|
+
if imagetyp_equals(input_type, imagetyp):
|
|
595
|
+
logging.info(
|
|
596
|
+
f" Running master stage task for imagetyp '{imagetyp}'"
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
# Create a default process dir in /tmp, though more advanced 'session' based workflows will
|
|
600
|
+
# probably override this and place it somewhere persistent.
|
|
601
|
+
with tempfile.TemporaryDirectory(prefix="session_tmp_") as temp_dir:
|
|
602
|
+
logging.debug(
|
|
603
|
+
f"Created temporary session directory: {temp_dir}"
|
|
604
|
+
)
|
|
605
|
+
self.init_context()
|
|
606
|
+
self.context["process_dir"] = temp_dir
|
|
607
|
+
self.add_session_to_context(session)
|
|
608
|
+
self.run_stage(task)
|
|
609
|
+
|
|
610
|
+
def init_context(self) -> None:
|
|
388
611
|
"""Do common session init"""
|
|
389
612
|
|
|
390
613
|
# Context is preserved through all stages, so each stage can add new symbols to it for use by later stages
|
|
@@ -392,11 +615,152 @@ class Starbash:
|
|
|
392
615
|
|
|
393
616
|
# Update the context with runtime values.
|
|
394
617
|
runtime_context = {
|
|
395
|
-
"process_dir": "/workspaces/starbash/images/process", # FIXME - create/find this more correctly per session
|
|
396
618
|
"masters": "/workspaces/starbash/images/masters", # FIXME find this the correct way
|
|
397
619
|
}
|
|
398
620
|
self.context.update(runtime_context)
|
|
399
621
|
|
|
622
|
+
def add_session_to_context(self, session: SessionRow) -> None:
|
|
623
|
+
"""adds to context from the indicated session:
|
|
624
|
+
* input_files - all of the files mentioned in the session
|
|
625
|
+
* instrument - for the session
|
|
626
|
+
* date - the localtimezone date of the session
|
|
627
|
+
* imagetyp - the imagetyp of the session
|
|
628
|
+
* session - the current session row (joined with a typical image) (can be used to
|
|
629
|
+
find things like telescope, temperature ...)
|
|
630
|
+
"""
|
|
631
|
+
# Get images for this session
|
|
632
|
+
images = self.get_session_images(session)
|
|
633
|
+
logging.debug(f"Adding {len(images)} files as context.input_files")
|
|
634
|
+
self.context["input_files"] = [
|
|
635
|
+
img["path"] for img in images
|
|
636
|
+
] # Pass in the file list via the context dict
|
|
637
|
+
|
|
638
|
+
# it is okay to give them the actual session row, because we're never using it again
|
|
639
|
+
self.context["session"] = session
|
|
640
|
+
|
|
641
|
+
instrument = session.get(get_column_name(Database.TELESCOP_KEY))
|
|
642
|
+
if instrument:
|
|
643
|
+
self.context["instrument"] = instrument
|
|
644
|
+
|
|
645
|
+
imagetyp = session.get(get_column_name(Database.IMAGETYP_KEY))
|
|
646
|
+
if imagetyp:
|
|
647
|
+
self.context["imagetyp"] = imagetyp
|
|
648
|
+
|
|
649
|
+
date = session.get(get_column_name(Database.START_KEY))
|
|
650
|
+
if date:
|
|
651
|
+
self.context["date"] = to_shortdate(date)
|
|
652
|
+
|
|
653
|
+
def add_input_files(self, stage: dict) -> None:
|
|
654
|
+
"""adds to context.input_files based on the stage input config"""
|
|
655
|
+
input_config = stage.get("input")
|
|
656
|
+
input_required = False
|
|
657
|
+
if input_config:
|
|
658
|
+
# if there is an "input" dict, we assume input.required is true if unset
|
|
659
|
+
input_required = input_config.get("required", True)
|
|
660
|
+
source = input_config.get("source")
|
|
661
|
+
if source is None:
|
|
662
|
+
raise ValueError(
|
|
663
|
+
f"Stage '{stage.get('name')}' has invalid 'input' configuration: missing 'source'"
|
|
664
|
+
)
|
|
665
|
+
if source == "path":
|
|
666
|
+
# The path might contain context variables that need to be expanded.
|
|
667
|
+
# path_pattern = expand_context(input_config["path"], context)
|
|
668
|
+
path_pattern = input_config["path"]
|
|
669
|
+
input_files = glob.glob(path_pattern, recursive=True)
|
|
670
|
+
|
|
671
|
+
self.context["input_files"] = (
|
|
672
|
+
input_files # Pass in the file list via the context dict
|
|
673
|
+
)
|
|
674
|
+
elif source == "repo":
|
|
675
|
+
# We expect that higher level code has already added the correct input files
|
|
676
|
+
# to the context
|
|
677
|
+
if not "input_files" in self.context:
|
|
678
|
+
raise RuntimeError(
|
|
679
|
+
"Input config specifies 'repo' but no 'input_files' found in context"
|
|
680
|
+
)
|
|
681
|
+
else:
|
|
682
|
+
raise ValueError(
|
|
683
|
+
f"Stage '{stage.get('name')}' has invalid 'input' source: {source}"
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
# FIXME compare context.output to see if it already exists and is newer than the input files, if so skip processing
|
|
687
|
+
else:
|
|
688
|
+
# The script doesn't mention input, therefore assume it doesn't want input_files
|
|
689
|
+
if "input_files" in self.context:
|
|
690
|
+
del self.context["input_files"]
|
|
691
|
+
|
|
692
|
+
if input_required and not "input_files" in self.context:
|
|
693
|
+
raise RuntimeError("No input files found for stage")
|
|
694
|
+
|
|
695
|
+
def add_output_path(self, stage: dict) -> None:
|
|
696
|
+
"""Adds output path information to context based on the stage output config.
|
|
697
|
+
|
|
698
|
+
Sets the following context variables:
|
|
699
|
+
- context.output.root_path - base path of the destination repo
|
|
700
|
+
- context.output.base_path - full path without file extension
|
|
701
|
+
- context.output.suffix - file extension (e.g., .fits or .fit.gz)
|
|
702
|
+
- context.output.full_path - complete output file path
|
|
703
|
+
"""
|
|
704
|
+
output_config = stage.get("output")
|
|
705
|
+
if not output_config:
|
|
706
|
+
# No output configuration, remove any existing output from context
|
|
707
|
+
if "output" in self.context:
|
|
708
|
+
del self.context["output"]
|
|
709
|
+
return
|
|
710
|
+
|
|
711
|
+
dest = output_config.get("dest")
|
|
712
|
+
if not dest:
|
|
713
|
+
raise ValueError(
|
|
714
|
+
f"Stage '{stage.get('description', 'unknown')}' has 'output' config but missing 'dest'"
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
if dest == "repo":
|
|
718
|
+
# Find the destination repo by type/kind
|
|
719
|
+
output_type = output_config.get("type")
|
|
720
|
+
if not output_type:
|
|
721
|
+
raise ValueError(
|
|
722
|
+
f"Stage '{stage.get('description', 'unknown')}' has output.dest='repo' but missing 'type'"
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
# Find the repo with matching kind
|
|
726
|
+
dest_repo = self.repo_manager.get_repo_by_kind(output_type)
|
|
727
|
+
if not dest_repo:
|
|
728
|
+
raise ValueError(
|
|
729
|
+
f"No repository found with kind '{output_type}' for output destination"
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
repo_base = dest_repo.get_path()
|
|
733
|
+
if not repo_base:
|
|
734
|
+
raise ValueError(f"Repository '{dest_repo.url}' has no filesystem path")
|
|
735
|
+
|
|
736
|
+
repo_relative: str | None = dest_repo.get("repo.relative")
|
|
737
|
+
if not repo_relative:
|
|
738
|
+
raise ValueError(
|
|
739
|
+
f"Repository '{dest_repo.url}' is missing 'repo.relative' configuration"
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
# we support context variables in the relative path
|
|
743
|
+
repo_relative = expand_context_unsafe(repo_relative, self.context)
|
|
744
|
+
full_path = repo_base / repo_relative
|
|
745
|
+
|
|
746
|
+
# base_path but without spaces - because Siril doesn't like that
|
|
747
|
+
full_path = Path(str(full_path).replace(" ", r"_"))
|
|
748
|
+
|
|
749
|
+
base_path = full_path.parent / full_path.stem
|
|
750
|
+
|
|
751
|
+
# Set context variables as documented in the TOML
|
|
752
|
+
self.context["output"] = {
|
|
753
|
+
# "root_path": repo_relative, not needed I think
|
|
754
|
+
"base_path": base_path,
|
|
755
|
+
# "suffix": full_path.suffix, not needed I think
|
|
756
|
+
"full_path": full_path,
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
else:
|
|
760
|
+
raise ValueError(
|
|
761
|
+
f"Unsupported output destination type: {dest}. Only 'repo' is currently supported."
|
|
762
|
+
)
|
|
763
|
+
|
|
400
764
|
def run_stage(self, stage: dict) -> None:
|
|
401
765
|
"""
|
|
402
766
|
Executes a single processing stage.
|
|
@@ -438,30 +802,29 @@ class Starbash:
|
|
|
438
802
|
)
|
|
439
803
|
|
|
440
804
|
# This allows recipe TOML to define their own default variables.
|
|
805
|
+
# (apply all of the changes to context that the task demands)
|
|
441
806
|
stage_context = stage.get("context", {})
|
|
442
807
|
self.context.update(stage_context)
|
|
808
|
+
self.add_input_files(stage)
|
|
809
|
+
self.add_output_path(stage)
|
|
443
810
|
|
|
444
|
-
#
|
|
445
|
-
|
|
446
|
-
|
|
811
|
+
# if the output path already exists and is newer than all input files, skip processing
|
|
812
|
+
output_info: dict | None = self.context.get("output")
|
|
813
|
+
if output_info:
|
|
814
|
+
output_path = output_info.get("full_path")
|
|
447
815
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
input_required = input_config.get("required", True)
|
|
454
|
-
if "path" in input_config:
|
|
455
|
-
# The path might contain context variables that need to be expanded.
|
|
456
|
-
# path_pattern = expand_context(input_config["path"], context)
|
|
457
|
-
path_pattern = input_config["path"]
|
|
458
|
-
input_files = glob.glob(path_pattern, recursive=True)
|
|
816
|
+
if output_path and os.path.exists(output_path):
|
|
817
|
+
logging.info(
|
|
818
|
+
f"Output file already exists, skipping processing: {output_path}"
|
|
819
|
+
)
|
|
820
|
+
return
|
|
459
821
|
|
|
460
|
-
|
|
461
|
-
input_files # Pass in the file list via the context dict
|
|
462
|
-
)
|
|
822
|
+
tool.run_in_temp_dir(script, context=self.context)
|
|
463
823
|
|
|
464
|
-
if
|
|
465
|
-
|
|
824
|
+
# verify context.output was created if it was specified
|
|
825
|
+
output_info: dict | None = self.context.get("output")
|
|
826
|
+
if output_info:
|
|
827
|
+
output_path = output_info.get("full_path")
|
|
466
828
|
|
|
467
|
-
|
|
829
|
+
if not output_path or not os.path.exists(output_path):
|
|
830
|
+
raise RuntimeError(f"Expected output file not found: {output_path}")
|