starbash 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of starbash might be problematic. Click here for more details.

starbash/database.py CHANGED
@@ -1,67 +1,518 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import logging
4
+ import sqlite3
3
5
  from pathlib import Path
4
6
  from typing import Any, Optional
7
+ from datetime import datetime, timedelta
8
+ import json
5
9
 
6
- from tinydb import TinyDB, Query, table
7
- from platformdirs import PlatformDirs
10
+ from .paths import get_user_data_dir
8
11
 
9
12
 
10
13
  class Database:
11
- """TinyDB-backed application database.
14
+ """SQLite-backed application database.
12
15
 
13
16
  Stores data under the OS-specific user data directory using platformdirs.
14
17
  Provides an `images` table for FITS metadata and basic helpers.
18
+
19
+ The images table stores DATE-OBS and DATE as indexed SQL columns for
20
+ efficient date-based queries, while other FITS metadata is stored in JSON.
15
21
  """
16
22
 
23
+ EXPTIME_KEY = "EXPTIME"
24
+ FILTER_KEY = "FILTER"
25
+ START_KEY = "start"
26
+ END_KEY = "end"
27
+ NUM_IMAGES_KEY = "num-images"
28
+ EXPTIME_TOTAL_KEY = "exptime-total"
29
+ DATE_OBS_KEY = "DATE-OBS"
30
+ DATE_KEY = "DATE"
31
+ IMAGE_DOC_KEY = "image-doc"
32
+ IMAGETYP_KEY = "IMAGETYP"
33
+ OBJECT_KEY = "OBJECT"
34
+ TELESCOP_KEY = "TELESCOP"
35
+
17
36
  def __init__(
18
37
  self,
19
38
  base_dir: Optional[Path] = None,
20
39
  ) -> None:
21
40
  # Resolve base data directory (allow override for tests)
22
41
  if base_dir is None:
23
- app_name = "starbash"
24
- app_author = "geeksville"
25
- dirs = PlatformDirs(app_name, app_author)
26
- data_dir = Path(dirs.user_data_dir)
42
+ data_dir = get_user_data_dir()
27
43
  else:
28
44
  data_dir = base_dir
29
45
 
30
- db_filename = "db.json"
31
- data_dir.mkdir(parents=True, exist_ok=True)
46
+ db_filename = "db.sqlite3"
32
47
  self.db_path = data_dir / db_filename
33
48
 
34
- # Open TinyDB JSON store
35
- self._db = TinyDB(self.db_path)
49
+ # Open SQLite database
50
+ self._db = sqlite3.connect(str(self.db_path))
51
+ self._db.row_factory = sqlite3.Row # Enable column access by name
52
+
53
+ # Initialize tables
54
+ self._init_tables()
55
+
56
+ def _init_tables(self) -> None:
57
+ """Create the images and sessions tables if they don't exist."""
58
+ cursor = self._db.cursor()
59
+
60
+ # Create images table with DATE-OBS and DATE as indexed columns
61
+ cursor.execute(
62
+ """
63
+ CREATE TABLE IF NOT EXISTS images (
64
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
65
+ path TEXT UNIQUE NOT NULL,
66
+ date_obs TEXT,
67
+ date TEXT,
68
+ metadata TEXT NOT NULL
69
+ )
70
+ """
71
+ )
72
+
73
+ # Create index on path for faster lookups
74
+ cursor.execute(
75
+ """
76
+ CREATE INDEX IF NOT EXISTS idx_images_path ON images(path)
77
+ """
78
+ )
79
+
80
+ # Create index on date_obs for efficient date range queries
81
+ cursor.execute(
82
+ """
83
+ CREATE INDEX IF NOT EXISTS idx_images_date_obs ON images(date_obs)
84
+ """
85
+ )
86
+
87
+ # Create index on date for queries using DATE field
88
+ cursor.execute(
89
+ """
90
+ CREATE INDEX IF NOT EXISTS idx_images_date ON images(date)
91
+ """
92
+ )
36
93
 
37
- # Public handle to the images table
38
- self.images = self._db.table("images")
94
+ # Create sessions table
95
+ cursor.execute(
96
+ """
97
+ CREATE TABLE IF NOT EXISTS sessions (
98
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
99
+ start TEXT NOT NULL,
100
+ end TEXT NOT NULL,
101
+ filter TEXT NOT NULL,
102
+ imagetyp TEXT NOT NULL,
103
+ object TEXT NOT NULL,
104
+ telescop TEXT NOT NULL,
105
+ num_images INTEGER NOT NULL,
106
+ exptime_total REAL NOT NULL,
107
+ image_doc_id INTEGER
108
+ )
109
+ """
110
+ )
111
+
112
+ # Create index on session attributes for faster queries
113
+ cursor.execute(
114
+ """
115
+ CREATE INDEX IF NOT EXISTS idx_sessions_lookup
116
+ ON sessions(filter, imagetyp, object, telescop, start, end)
117
+ """
118
+ )
39
119
 
40
- def add_from_fits(self, file_path: Path, headers: dict[str, Any]) -> None:
41
- data = {}
42
- data.update(headers)
43
- data["path"] = str(file_path)
44
- self.upsert_image(data)
120
+ self._db.commit()
45
121
 
46
122
  # --- Convenience helpers for common image operations ---
47
- def upsert_image(self, record: dict[str, Any]) -> None:
123
+ def upsert_image(self, record: dict[str, Any]) -> int:
48
124
  """Insert or update an image record by unique path.
49
125
 
50
126
  The record must include a 'path' key; other keys are arbitrary FITS metadata.
127
+ DATE-OBS and DATE are extracted and stored as indexed columns for efficient queries.
128
+ Returns the rowid of the inserted/updated record.
51
129
  """
52
130
  path = record.get("path")
53
131
  if not path:
54
132
  raise ValueError("record must include 'path'")
55
133
 
56
- Image = Query()
57
- self.images.upsert(record, Image.path == path)
134
+ # Extract date fields for column storage
135
+ date_obs = record.get(self.DATE_OBS_KEY)
136
+ date = record.get(self.DATE_KEY)
137
+
138
+ # Separate path and date fields from metadata
139
+ metadata = {k: v for k, v in record.items() if k != "path"}
140
+ metadata_json = json.dumps(metadata)
141
+
142
+ cursor = self._db.cursor()
143
+ cursor.execute(
144
+ """
145
+ INSERT INTO images (path, date_obs, date, metadata) VALUES (?, ?, ?, ?)
146
+ ON CONFLICT(path) DO UPDATE SET
147
+ date_obs = excluded.date_obs,
148
+ date = excluded.date,
149
+ metadata = excluded.metadata
150
+ """,
151
+ (path, date_obs, date, metadata_json),
152
+ )
153
+
154
+ self._db.commit()
155
+
156
+ # Get the rowid of the inserted/updated record
157
+ cursor.execute("SELECT id FROM images WHERE path = ?", (path,))
158
+ result = cursor.fetchone()
159
+ if result:
160
+ return result[0]
161
+ return cursor.lastrowid if cursor.lastrowid is not None else 0
162
+
163
+ def search_image(self, conditions: dict[str, Any]) -> list[dict[str, Any]] | None:
164
+ """Search for images matching the given conditions.
165
+
166
+ Args:
167
+ conditions: Dictionary of metadata key-value pairs to match.
168
+ Special keys:
169
+ - 'date_start': Filter images with DATE-OBS >= this date
170
+ - 'date_end': Filter images with DATE-OBS <= this date
171
+
172
+ Returns:
173
+ List of matching image records or None if no matches
174
+ """
175
+ # Extract special date filter keys (make a copy to avoid modifying caller's dict)
176
+ conditions_copy = dict(conditions)
177
+ date_start = conditions_copy.pop("date_start", None)
178
+ date_end = conditions_copy.pop("date_end", None)
179
+
180
+ # Build SQL query with WHERE clauses for date filtering
181
+ where_clauses = []
182
+ params = []
183
+
184
+ if date_start:
185
+ where_clauses.append("date_obs >= ?")
186
+ params.append(date_start)
187
+
188
+ if date_end:
189
+ where_clauses.append("date_obs <= ?")
190
+ params.append(date_end)
191
+
192
+ # Build the query
193
+ query = "SELECT id, path, date_obs, date, metadata FROM images"
194
+ if where_clauses:
195
+ query += " WHERE " + " AND ".join(where_clauses)
196
+
197
+ cursor = self._db.cursor()
198
+ cursor.execute(query, params)
199
+
200
+ results = []
201
+ for row in cursor.fetchall():
202
+ metadata = json.loads(row["metadata"])
203
+ metadata["path"] = row["path"]
204
+ metadata["id"] = row["id"]
205
+
206
+ # Add date fields back to metadata for compatibility
207
+ if row["date_obs"]:
208
+ metadata[self.DATE_OBS_KEY] = row["date_obs"]
209
+ if row["date"]:
210
+ metadata[self.DATE_KEY] = row["date"]
211
+
212
+ # Check if remaining conditions match (those stored in JSON metadata)
213
+ match = all(metadata.get(k) == v for k, v in conditions_copy.items())
214
+
215
+ if match:
216
+ results.append(metadata)
217
+
218
+ return results if results else None
219
+
220
+ def search_session(
221
+ self, conditions: dict[str, Any] | None
222
+ ) -> list[dict[str, Any]] | None:
223
+ """Search for sessions matching the given conditions.
224
+
225
+ Args:
226
+ conditions: Dictionary of session key-value pairs to match, or None for all.
227
+ Special keys:
228
+ - 'date_start': Filter sessions starting on or after this date
229
+ - 'date_end': Filter sessions starting on or before this date
230
+
231
+ Returns:
232
+ List of matching session records or None
233
+ """
234
+ if conditions is None:
235
+ return self.all_sessions()
236
+
237
+ cursor = self._db.cursor()
238
+ cursor.execute(
239
+ """
240
+ SELECT id, start, end, filter, imagetyp, object, telescop,
241
+ num_images, exptime_total, image_doc_id
242
+ FROM sessions
243
+ """
244
+ )
245
+
246
+ # Extract date range conditions if present
247
+ date_start = conditions.get("date_start")
248
+ date_end = conditions.get("date_end")
58
249
 
59
- def get_image(self, path: str) -> table.Document | list[table.Document] | None:
60
- Image = Query()
61
- return self.images.get(Image.path == path)
250
+ # Create a copy without date range keys for standard matching
251
+ standard_conditions = {
252
+ k: v
253
+ for k, v in conditions.items()
254
+ if k not in ("date_start", "date_end") and v is not None
255
+ }
256
+
257
+ results = []
258
+ for row in cursor.fetchall():
259
+ session = {
260
+ "id": row["id"],
261
+ self.START_KEY: row["start"],
262
+ self.END_KEY: row["end"],
263
+ self.FILTER_KEY: row["filter"],
264
+ self.IMAGETYP_KEY: row["imagetyp"],
265
+ self.OBJECT_KEY: row["object"],
266
+ self.TELESCOP_KEY: row["telescop"],
267
+ self.NUM_IMAGES_KEY: row["num_images"],
268
+ self.EXPTIME_TOTAL_KEY: row["exptime_total"],
269
+ self.IMAGE_DOC_KEY: row["image_doc_id"],
270
+ }
271
+
272
+ # Check if all standard conditions match
273
+ match = all(session.get(k) == v for k, v in standard_conditions.items())
274
+
275
+ # Apply date range filtering
276
+ if match and date_start:
277
+ session_start = session.get(self.START_KEY, "")
278
+ match = match and session_start >= date_start
279
+
280
+ if match and date_end:
281
+ session_start = session.get(self.START_KEY, "")
282
+ match = match and session_start <= date_end
283
+
284
+ if match:
285
+ results.append(session)
286
+
287
+ return results if results else None
288
+
289
+ def len_session(self) -> int:
290
+ """Return the total number of sessions."""
291
+ cursor = self._db.cursor()
292
+ cursor.execute("SELECT COUNT(*) FROM sessions")
293
+ result = cursor.fetchone()
294
+ return result[0] if result else 0
295
+
296
+ def get_image(self, path: str) -> dict[str, Any] | None:
297
+ """Get an image record by path."""
298
+ cursor = self._db.cursor()
299
+ cursor.execute(
300
+ "SELECT id, path, date_obs, date, metadata FROM images WHERE path = ?",
301
+ (path,),
302
+ )
303
+ row = cursor.fetchone()
304
+
305
+ if row is None:
306
+ return None
307
+
308
+ metadata = json.loads(row["metadata"])
309
+ metadata["path"] = row["path"]
310
+ metadata["id"] = row["id"]
311
+
312
+ # Add date fields back to metadata for compatibility
313
+ if row["date_obs"]:
314
+ metadata[self.DATE_OBS_KEY] = row["date_obs"]
315
+ if row["date"]:
316
+ metadata[self.DATE_KEY] = row["date"]
317
+
318
+ return metadata
62
319
 
63
320
  def all_images(self) -> list[dict[str, Any]]:
64
- return list(self.images.all())
321
+ """Return all image records."""
322
+ cursor = self._db.cursor()
323
+ cursor.execute("SELECT id, path, date_obs, date, metadata FROM images")
324
+
325
+ results = []
326
+ for row in cursor.fetchall():
327
+ metadata = json.loads(row["metadata"])
328
+ metadata["path"] = row["path"]
329
+ metadata["id"] = row["id"]
330
+
331
+ # Add date fields back to metadata for compatibility
332
+ if row["date_obs"]:
333
+ metadata[self.DATE_OBS_KEY] = row["date_obs"]
334
+ if row["date"]:
335
+ metadata[self.DATE_KEY] = row["date"]
336
+
337
+ results.append(metadata)
338
+
339
+ return results
340
+
341
+ def all_sessions(self) -> list[dict[str, Any]]:
342
+ """Return all session records."""
343
+ cursor = self._db.cursor()
344
+ cursor.execute(
345
+ """
346
+ SELECT id, start, end, filter, imagetyp, object, telescop,
347
+ num_images, exptime_total, image_doc_id
348
+ FROM sessions
349
+ """
350
+ )
351
+
352
+ results = []
353
+ for row in cursor.fetchall():
354
+ session = {
355
+ "id": row["id"],
356
+ self.START_KEY: row["start"],
357
+ self.END_KEY: row["end"],
358
+ self.FILTER_KEY: row["filter"],
359
+ self.IMAGETYP_KEY: row["imagetyp"],
360
+ self.OBJECT_KEY: row["object"],
361
+ self.TELESCOP_KEY: row["telescop"],
362
+ self.NUM_IMAGES_KEY: row["num_images"],
363
+ self.EXPTIME_TOTAL_KEY: row["exptime_total"],
364
+ self.IMAGE_DOC_KEY: row["image_doc_id"],
365
+ }
366
+ results.append(session)
367
+
368
+ return results
369
+
370
+ def get_session_by_id(self, session_id: int) -> dict[str, Any] | None:
371
+ """Get a session record by its ID.
372
+
373
+ Args:
374
+ session_id: The database ID of the session
375
+
376
+ Returns:
377
+ Session record dictionary or None if not found
378
+ """
379
+ cursor = self._db.cursor()
380
+ cursor.execute(
381
+ """
382
+ SELECT id, start, end, filter, imagetyp, object, telescop,
383
+ num_images, exptime_total, image_doc_id
384
+ FROM sessions
385
+ WHERE id = ?
386
+ """,
387
+ (session_id,),
388
+ )
389
+
390
+ row = cursor.fetchone()
391
+ if row is None:
392
+ return None
393
+
394
+ return {
395
+ "id": row["id"],
396
+ self.START_KEY: row["start"],
397
+ self.END_KEY: row["end"],
398
+ self.FILTER_KEY: row["filter"],
399
+ self.IMAGETYP_KEY: row["imagetyp"],
400
+ self.OBJECT_KEY: row["object"],
401
+ self.TELESCOP_KEY: row["telescop"],
402
+ self.NUM_IMAGES_KEY: row["num_images"],
403
+ self.EXPTIME_TOTAL_KEY: row["exptime_total"],
404
+ self.IMAGE_DOC_KEY: row["image_doc_id"],
405
+ }
406
+
407
+ def get_session(self, to_find: dict[str, str]) -> dict[str, Any] | None:
408
+ """Find a session matching the given criteria.
409
+
410
+ Searches for sessions with the same filter, image type, target, and telescope
411
+ whose start time is within +/- 8 hours of the provided date.
412
+ """
413
+ date = to_find.get(Database.START_KEY)
414
+ assert date
415
+ image_type = to_find.get(Database.IMAGETYP_KEY)
416
+ assert image_type
417
+ filter = to_find.get(Database.FILTER_KEY)
418
+ assert filter
419
+ target = to_find.get(Database.OBJECT_KEY)
420
+ assert target
421
+ telescop = to_find.get(Database.TELESCOP_KEY, "unspecified")
422
+
423
+ # Convert the provided ISO8601 date string to a datetime, then
424
+ # search for sessions with the same filter whose start time is
425
+ # within +/- 8 hours of the provided date.
426
+ target_dt = datetime.fromisoformat(date)
427
+ window = timedelta(hours=8)
428
+ start_min = (target_dt - window).isoformat()
429
+ start_max = (target_dt + window).isoformat()
430
+
431
+ # Since session 'start' is stored as ISO8601 strings, lexicographic
432
+ # comparison aligns with chronological ordering for a uniform format.
433
+ cursor = self._db.cursor()
434
+ cursor.execute(
435
+ """
436
+ SELECT id, start, end, filter, imagetyp, object, telescop,
437
+ num_images, exptime_total, image_doc_id
438
+ FROM sessions
439
+ WHERE filter = ? AND imagetyp = ? AND object = ? AND telescop = ?
440
+ AND start >= ? AND start <= ?
441
+ LIMIT 1
442
+ """,
443
+ (filter, image_type, target, telescop, start_min, start_max),
444
+ )
445
+
446
+ row = cursor.fetchone()
447
+ if row is None:
448
+ return None
449
+
450
+ return {
451
+ "id": row["id"],
452
+ self.START_KEY: row["start"],
453
+ self.END_KEY: row["end"],
454
+ self.FILTER_KEY: row["filter"],
455
+ self.IMAGETYP_KEY: row["imagetyp"],
456
+ self.OBJECT_KEY: row["object"],
457
+ self.TELESCOP_KEY: row["telescop"],
458
+ self.NUM_IMAGES_KEY: row["num_images"],
459
+ self.EXPTIME_TOTAL_KEY: row["exptime_total"],
460
+ self.IMAGE_DOC_KEY: row["image_doc_id"],
461
+ }
462
+
463
+ def upsert_session(
464
+ self, new: dict[str, Any], existing: dict[str, Any] | None = None
465
+ ) -> None:
466
+ """Insert or update a session record."""
467
+ cursor = self._db.cursor()
468
+
469
+ if existing:
470
+ # Update existing session with new data
471
+ updated_start = min(new[Database.START_KEY], existing[Database.START_KEY])
472
+ updated_end = max(new[Database.END_KEY], existing[Database.END_KEY])
473
+ updated_num_images = existing.get(Database.NUM_IMAGES_KEY, 0) + new.get(
474
+ Database.NUM_IMAGES_KEY, 0
475
+ )
476
+ updated_exptime_total = existing.get(
477
+ Database.EXPTIME_TOTAL_KEY, 0
478
+ ) + new.get(Database.EXPTIME_TOTAL_KEY, 0)
479
+
480
+ cursor.execute(
481
+ """
482
+ UPDATE sessions
483
+ SET start = ?, end = ?, num_images = ?, exptime_total = ?
484
+ WHERE id = ?
485
+ """,
486
+ (
487
+ updated_start,
488
+ updated_end,
489
+ updated_num_images,
490
+ updated_exptime_total,
491
+ existing["id"],
492
+ ),
493
+ )
494
+ else:
495
+ # Insert new session
496
+ cursor.execute(
497
+ """
498
+ INSERT INTO sessions
499
+ (start, end, filter, imagetyp, object, telescop, num_images, exptime_total, image_doc_id)
500
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
501
+ """,
502
+ (
503
+ new[Database.START_KEY],
504
+ new[Database.END_KEY],
505
+ new[Database.FILTER_KEY],
506
+ new[Database.IMAGETYP_KEY],
507
+ new[Database.OBJECT_KEY],
508
+ new.get(Database.TELESCOP_KEY, "unspecified"),
509
+ new[Database.NUM_IMAGES_KEY],
510
+ new[Database.EXPTIME_TOTAL_KEY],
511
+ new.get(Database.IMAGE_DOC_KEY),
512
+ ),
513
+ )
514
+
515
+ self._db.commit()
65
516
 
66
517
  # --- Lifecycle ---
67
518
  def close(self) -> None:
File without changes
@@ -10,25 +10,17 @@ kind = "preferences"
10
10
  # standard default repo locations. When searching repos, repos listed LAST have precedence, so target file can override the root processing defaults,
11
11
  # then the user prefs, then a live github URL or whatever
12
12
 
13
- # [[repo.ref]]
13
+ # [[repo-ref]]
14
14
  # Possibly provide default repos via http from github?
15
15
  # url = "https://github.com/geeksville/starbash-default-repo"
16
16
 
17
- [[repo.ref]]
17
+ [[repo-ref]]
18
18
 
19
19
  # Add our built-in recipes (FIXME, add a "resource" repo type for directories we expect to find inside
20
20
  # our python blob)
21
21
  dir = "/workspaces/starbash/doc/toml/example/recipe-repo"
22
22
 
23
- [[repo.ref]]
24
-
25
- # User custom settings live here
26
- # For "dir" or "url" repos we expect to find an starbash.toml file in the root of the directory.
27
- # dir = "~/.config/starbash"
28
- # But temporarily during early development I'm keeping them in the master github
29
- dir = "/workspaces/starbash/doc/toml/example/config/user/"
30
-
31
- # [[repo.ref]]
23
+ # [[repo-ref]]
32
24
 
33
25
  # recipe repos contain recipes (identified by name). When any sb.toml file references
34
26
  # a recipe the current path of all sources is searched to find that named recipe.
@@ -40,7 +32,7 @@ dir = "/workspaces/starbash/doc/toml/example/config/user/"
40
32
  # url = "http://fixme.com/foo-repo/somedir"
41
33
 
42
34
  # test data. Moved to user preferences (where it should should have been all along)
43
- # [[repo.ref]]
35
+ # [[repo-ref]]
44
36
  # dir = "~/Pictures/telescope/from_astroboy"
45
37
 
46
38
 
@@ -48,42 +40,10 @@ dir = "/workspaces/starbash/doc/toml/example/config/user/"
48
40
 
49
41
 
50
42
  # allow including multiple recipies FIXME old idea, not sure if needed.
51
- # [[repo.ref]]
43
+ # [[repo-ref]]
52
44
 
53
45
  # looks for a file with this name and .py for the code and .toml for the config
54
46
  # we'll expect that toml file to contain various [[recipe.*]] sections which will be loaded at this
55
47
  # location in the sequence as if they were defined here
56
48
  #by-file = "process-osc-dual-duo"
57
49
  #by-url = "http:..."
58
-
59
- [config]
60
-
61
- # What fits fields should we store in our DB cache
62
- fits-whitelist = [
63
- "INSTRUME",
64
- "FILTER",
65
- "TELESCOP",
66
- "IMAGETYP",
67
- "DATE-OBS",
68
- "DATE-LOC",
69
- "DATE",
70
- "EXPTIME",
71
- "FWHEEL",
72
- "OBJECT",
73
- "RA", # we ignore the text version OBJCTRA / OBJCTDEC
74
- "DEC",
75
- "OBJCTROT",
76
- "FOCPOS",
77
- "SITELAT",
78
- "SITELON",
79
- "SITEELEV",
80
- "NAXIS1",
81
- "NAXIS2",
82
- "SWCREATE",
83
- "XBINNING",
84
- "YBINNING",
85
- "GAIN",
86
- "CCD-TEMP",
87
- "SET-TEMP",
88
- "AMBTEMP",
89
- ]