so-campaign-manager 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- so_campaign_manager-0.0.4.dist-info/METADATA +179 -0
- so_campaign_manager-0.0.4.dist-info/RECORD +44 -0
- so_campaign_manager-0.0.4.dist-info/WHEEL +5 -0
- so_campaign_manager-0.0.4.dist-info/entry_points.txt +2 -0
- so_campaign_manager-0.0.4.dist-info/licenses/LICENSE +24 -0
- so_campaign_manager-0.0.4.dist-info/top_level.txt +1 -0
- socm/__about__.py +34 -0
- socm/__init__.py +0 -0
- socm/__main__.py +35 -0
- socm/bookkeeper/__init__.py +1 -0
- socm/bookkeeper/bookkeeper.py +488 -0
- socm/configs/slurmise.toml +2 -0
- socm/core/__init__.py +1 -0
- socm/core/models.py +235 -0
- socm/enactor/__init__.py +3 -0
- socm/enactor/base.py +123 -0
- socm/enactor/dryrun_enactor.py +216 -0
- socm/enactor/rp_enactor.py +273 -0
- socm/execs/__init__.py +3 -0
- socm/execs/mapmaking.py +73 -0
- socm/planner/__init__.py +2 -0
- socm/planner/base.py +87 -0
- socm/planner/heft_planner.py +442 -0
- socm/resources/__init__.py +5 -0
- socm/resources/perlmutter.py +22 -0
- socm/resources/tiger.py +24 -0
- socm/resources/universe.py +18 -0
- socm/utils/__init__.py +0 -0
- socm/utils/misc.py +90 -0
- socm/utils/states.py +17 -0
- socm/workflows/__init__.py +41 -0
- socm/workflows/ml_mapmaking.py +111 -0
- socm/workflows/ml_null_tests/__init__.py +10 -0
- socm/workflows/ml_null_tests/base.py +117 -0
- socm/workflows/ml_null_tests/day_night_null_test.py +132 -0
- socm/workflows/ml_null_tests/direction_null_test.py +133 -0
- socm/workflows/ml_null_tests/elevation_null_test.py +118 -0
- socm/workflows/ml_null_tests/moon_close_null_test.py +165 -0
- socm/workflows/ml_null_tests/moonrise_set_null_test.py +151 -0
- socm/workflows/ml_null_tests/pwv_null_test.py +118 -0
- socm/workflows/ml_null_tests/sun_close_null_test.py +173 -0
- socm/workflows/ml_null_tests/time_null_test.py +76 -0
- socm/workflows/ml_null_tests/wafer_null_test.py +175 -0
- socm/workflows/sat_simulation.py +76 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from functools import lru_cache
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
from sotodlib.core import Context
|
|
6
|
+
|
|
7
|
+
from socm.core import Workflow
|
|
8
|
+
from socm.utils.misc import get_query_from_file
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@lru_cache(maxsize=10)
|
|
12
|
+
def _load_context(ctx_path: str) -> Context:
|
|
13
|
+
return Context(Path(ctx_path))
|
|
14
|
+
|
|
15
|
+
class MLMapmakingWorkflow(Workflow):
|
|
16
|
+
"""
|
|
17
|
+
A workflow for ML mapmaking.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
area: str
|
|
21
|
+
output_dir: str
|
|
22
|
+
preprocess_config: str
|
|
23
|
+
query: str = "1"
|
|
24
|
+
name: str = "ml_mapmaking_workflow"
|
|
25
|
+
executable: str = "so-site-pipeline"
|
|
26
|
+
subcommand: str = "make-ml-map"
|
|
27
|
+
datasize: int = 0
|
|
28
|
+
comps: Optional[str] = "TQU"
|
|
29
|
+
wafers: Optional[str] = None
|
|
30
|
+
bands: Optional[str] = None
|
|
31
|
+
nmat: Optional[str] = "corr"
|
|
32
|
+
max_dets: Optional[int] = None
|
|
33
|
+
site: Optional[str] = "so_lat"
|
|
34
|
+
downsample: Union[int, List[int]] = 1
|
|
35
|
+
maxiter: Union[int, List[int]] = 500
|
|
36
|
+
tiled: int = 1
|
|
37
|
+
|
|
38
|
+
def model_post_init(self, __context: Any) -> None:
|
|
39
|
+
"""
|
|
40
|
+
Post-initialization to set the context for the workflow.
|
|
41
|
+
"""
|
|
42
|
+
ctx_file = Path(self.context.split("file://")[-1]).absolute()
|
|
43
|
+
ctx = _load_context(str(ctx_file))
|
|
44
|
+
|
|
45
|
+
final_query = self.query
|
|
46
|
+
if self.query.startswith("file://"):
|
|
47
|
+
query_path = Path(self.query.split("file://")[-1]).absolute()
|
|
48
|
+
final_query = get_query_from_file(query_path)
|
|
49
|
+
obs_ids = ctx.obsdb.query(final_query)
|
|
50
|
+
for obs_id in obs_ids:
|
|
51
|
+
self.datasize += obs_id["n_samples"]
|
|
52
|
+
|
|
53
|
+
def get_command(self) -> str:
|
|
54
|
+
"""
|
|
55
|
+
Get the command to run the ML mapmaking workflow.
|
|
56
|
+
"""
|
|
57
|
+
command = f"srun --cpu_bind=cores --export=ALL --ntasks-per-node={self.resources['ranks']} --cpus-per-task={self.resources['threads']} {self.executable} {self.subcommand} "
|
|
58
|
+
command += " ".join(self.get_arguments())
|
|
59
|
+
|
|
60
|
+
return command.strip()
|
|
61
|
+
|
|
62
|
+
def get_arguments(self) -> List[str]:
|
|
63
|
+
"""
|
|
64
|
+
Get the command to run the ML mapmaking workflow.
|
|
65
|
+
"""
|
|
66
|
+
area = Path(self.area.split("file://")[-1])
|
|
67
|
+
final_query = self.query
|
|
68
|
+
if self.query.startswith("file://"):
|
|
69
|
+
final_query = Path(self.query.split("file://")[-1]).absolute()
|
|
70
|
+
final_query = f"{final_query.absolute()}"
|
|
71
|
+
preprocess_config = Path(self.preprocess_config.split("file://")[-1])
|
|
72
|
+
|
|
73
|
+
arguments = [final_query, f"{area.absolute()}", self.output_dir, f"{preprocess_config.absolute()}"]
|
|
74
|
+
sorted_workflow = dict(sorted(self.model_dump(exclude_unset=True).items()))
|
|
75
|
+
|
|
76
|
+
for k, v in sorted_workflow.items():
|
|
77
|
+
if isinstance(v, str) and v.startswith("file://"):
|
|
78
|
+
v = Path(v.split("file://")[-1]).absolute()
|
|
79
|
+
elif isinstance(v, list):
|
|
80
|
+
v = ",".join([str(item) for item in v])
|
|
81
|
+
if k not in [
|
|
82
|
+
"area",
|
|
83
|
+
"output_dir",
|
|
84
|
+
"executable",
|
|
85
|
+
"query",
|
|
86
|
+
"output_dir",
|
|
87
|
+
"id",
|
|
88
|
+
"environment",
|
|
89
|
+
"resources",
|
|
90
|
+
"datasize",
|
|
91
|
+
"preprocess_config"
|
|
92
|
+
]:
|
|
93
|
+
arguments.append(f"--{k}={v}")
|
|
94
|
+
return arguments
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def get_workflows(
|
|
98
|
+
cls, descriptions: Union[List[dict], dict]
|
|
99
|
+
) -> List["MLMapmakingWorkflow"]:
|
|
100
|
+
"""
|
|
101
|
+
Create a list of MLMapmakingWorkflow instances from the provided descriptions.
|
|
102
|
+
"""
|
|
103
|
+
if isinstance(descriptions, dict):
|
|
104
|
+
descriptions = [descriptions]
|
|
105
|
+
|
|
106
|
+
workflows = []
|
|
107
|
+
for desc in descriptions:
|
|
108
|
+
workflow = cls(**desc)
|
|
109
|
+
workflows.append(workflow)
|
|
110
|
+
|
|
111
|
+
return workflows
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from .base import NullTestWorkflow # noqa: F401
|
|
2
|
+
from .day_night_null_test import DayNightNullTestWorkflow # noqa: F401
|
|
3
|
+
from .direction_null_test import DirectionNullTestWorkflow # noqa: F401
|
|
4
|
+
from .elevation_null_test import ElevationNullTestWorkflow # noqa: F401
|
|
5
|
+
from .moon_close_null_test import MoonCloseFarNullTestWorkflow # noqa: F401
|
|
6
|
+
from .moonrise_set_null_test import MoonRiseSetNullTestWorkflow # noqa: F401
|
|
7
|
+
from .pwv_null_test import PWVNullTestWorkflow # noqa: F401
|
|
8
|
+
from .sun_close_null_test import SunCloseFarNullTestWorkflow # noqa: F401
|
|
9
|
+
from .time_null_test import TimeNullTestWorkflow # noqa: F401
|
|
10
|
+
from .wafer_null_test import WaferNullTestWorkflow # noqa: F401
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from datetime import timedelta
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
from sotodlib.core import Context
|
|
6
|
+
|
|
7
|
+
from socm.utils.misc import get_query_from_file
|
|
8
|
+
from socm.workflows import MLMapmakingWorkflow
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class NullTestWorkflow(MLMapmakingWorkflow):
|
|
12
|
+
"""
|
|
13
|
+
A workflow for null tests.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
area: str
|
|
17
|
+
output_dir: str
|
|
18
|
+
query: str = "1"
|
|
19
|
+
name: str = "lat_null_test_workflow"
|
|
20
|
+
datasize: int = 0
|
|
21
|
+
chunk_nobs: Optional[int] = None
|
|
22
|
+
chunk_duration: Optional[timedelta] = None
|
|
23
|
+
|
|
24
|
+
def model_post_init(self, __context: Any) -> None:
|
|
25
|
+
"""
|
|
26
|
+
Post-initialization to set the context for the workflow and distribute the
|
|
27
|
+
observations across splits.
|
|
28
|
+
"""
|
|
29
|
+
ctx_file = Path(self.context.split("file://")[-1]).absolute()
|
|
30
|
+
ctx = Context(ctx_file)
|
|
31
|
+
final_query = self.query
|
|
32
|
+
if self.query.startswith("file://"):
|
|
33
|
+
query_path = Path(self.query.split("file://")[-1]).absolute()
|
|
34
|
+
final_query = get_query_from_file(query_path)
|
|
35
|
+
obs_ids = ctx.obsdb.query(final_query)
|
|
36
|
+
obs_info = dict()
|
|
37
|
+
for obs_id in obs_ids:
|
|
38
|
+
self.datasize += obs_id["n_samples"]
|
|
39
|
+
obs_info[obs_id["obs_id"]] = {
|
|
40
|
+
"start_time": obs_id["timestamp"],
|
|
41
|
+
"wafer_list": obs_id["wafer_slots_list"].split(","),
|
|
42
|
+
"tube_slot": obs_id.get("tube_slot", "st1"),
|
|
43
|
+
"az_center": obs_id["az_center"],
|
|
44
|
+
"el_center": obs_id["el_center"],
|
|
45
|
+
"pwv": obs_id.get("pwv", 0),
|
|
46
|
+
}
|
|
47
|
+
# Ensure obs_ids are sorted by their timestamp
|
|
48
|
+
# Order the obs_ids based on their timestamp it is in the obs_meta.obs_info.timestamp
|
|
49
|
+
|
|
50
|
+
self._splits = self._get_splits(ctx, obs_info)
|
|
51
|
+
|
|
52
|
+
def _get_num_chunks(self, num_obs: int) -> int:
|
|
53
|
+
num_chunks = (
|
|
54
|
+
num_obs + self.chunk_nobs - 1
|
|
55
|
+
) // self.chunk_nobs # Ceiling division
|
|
56
|
+
return num_chunks
|
|
57
|
+
|
|
58
|
+
def _get_splits(
|
|
59
|
+
self, ctx: Context, obs_info: Dict[str, Dict[str, Union[float, str]]]
|
|
60
|
+
) -> List[List[str]]:
|
|
61
|
+
"""
|
|
62
|
+
Distribute the observations across splits based on the context and observation IDs.
|
|
63
|
+
"""
|
|
64
|
+
if self.__class__.__name__ != "NullTestWorkflow":
|
|
65
|
+
raise NotImplementedError(
|
|
66
|
+
"This method should be implemented in subclasses."
|
|
67
|
+
)
|
|
68
|
+
else:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def get_workflows(cls, desc: Dict[str, Any]) -> List["NullTestWorkflow"]:
|
|
73
|
+
"""
|
|
74
|
+
Distribute the observations across splits based on the context and observation IDs.
|
|
75
|
+
"""
|
|
76
|
+
if cls.__name__ != "NullTestWorkflow":
|
|
77
|
+
raise NotImplementedError(
|
|
78
|
+
"This method should be implemented in subclasses."
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
def get_arguments(self) -> List[str]:
|
|
84
|
+
"""
|
|
85
|
+
Get the command to run the ML mapmaking workflow.
|
|
86
|
+
"""
|
|
87
|
+
area = Path(self.area.split("file://")[-1])
|
|
88
|
+
query = Path(self.query.split("file://")[-1])
|
|
89
|
+
preprocess_config = Path(self.preprocess_config.split("file://")[-1])
|
|
90
|
+
|
|
91
|
+
arguments = [f"{query.absolute()}", f"{area.absolute()}", self.output_dir, f"{preprocess_config.absolute()}"]
|
|
92
|
+
sorted_workflow = dict(sorted(self.model_dump(exclude_unset=True).items()))
|
|
93
|
+
|
|
94
|
+
for k, v in sorted_workflow.items():
|
|
95
|
+
if isinstance(v, str) and v.startswith("file://"):
|
|
96
|
+
v = Path(v.split("file://")[-1]).absolute()
|
|
97
|
+
elif isinstance(v, list):
|
|
98
|
+
v = ",".join([str(item) for item in v])
|
|
99
|
+
if k not in [
|
|
100
|
+
"area",
|
|
101
|
+
"output_dir",
|
|
102
|
+
"executable",
|
|
103
|
+
"query",
|
|
104
|
+
"id",
|
|
105
|
+
"environment",
|
|
106
|
+
"resources",
|
|
107
|
+
"datasize",
|
|
108
|
+
"chunk_nobs",
|
|
109
|
+
"nsplits",
|
|
110
|
+
"wafers",
|
|
111
|
+
"subcommand",
|
|
112
|
+
"name",
|
|
113
|
+
"chunk_duration",
|
|
114
|
+
"preprocess_config"
|
|
115
|
+
]:
|
|
116
|
+
arguments.append(f"--{k}={v}")
|
|
117
|
+
return arguments
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
from datetime import datetime, timedelta, timezone
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pytz
|
|
7
|
+
from astral import LocationInfo
|
|
8
|
+
from astral.sun import sun
|
|
9
|
+
from sotodlib.core import Context
|
|
10
|
+
|
|
11
|
+
from socm.workflows.ml_null_tests import NullTestWorkflow
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DayNightNullTestWorkflow(NullTestWorkflow):
|
|
15
|
+
"""
|
|
16
|
+
A workflow for day/night null tests.
|
|
17
|
+
|
|
18
|
+
This workflow splits observations based on whether they were taken during the day or night.
|
|
19
|
+
It creates time-interleaved splits with nsplits=2 as specified.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
chunk_nobs: Optional[int] = None
|
|
23
|
+
chunk_duration: Optional[timedelta] = None
|
|
24
|
+
nsplits: int = 2 # Fixed to 2 as specified in the issue
|
|
25
|
+
name: str = "day_night_null_test_workflow"
|
|
26
|
+
|
|
27
|
+
def _get_splits(
|
|
28
|
+
self, ctx: Context, obs_info: Dict[str, Dict[str, Union[float, str]]]
|
|
29
|
+
) -> Dict[str, List[List[str]]]:
|
|
30
|
+
"""
|
|
31
|
+
Distribute the observations across splits based on day/night.
|
|
32
|
+
|
|
33
|
+
Groups observations by whether they were taken during the day or night and then
|
|
34
|
+
creates time-interleaved splits for each with nsplits=2.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
ctx: Context object
|
|
38
|
+
obs_info: Dictionary mapping obs_id to observation metadata
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Dict mapping 'day' and 'night' to list of splits, where each split is a list
|
|
42
|
+
of obs_ids
|
|
43
|
+
"""
|
|
44
|
+
if self.chunk_nobs is None and self.chunk_duration is None:
|
|
45
|
+
raise ValueError("Either chunk_nobs or duration must be set.")
|
|
46
|
+
elif self.chunk_nobs is not None and self.chunk_duration is not None:
|
|
47
|
+
raise ValueError("Only one of chunk_nobs or duration can be set.")
|
|
48
|
+
elif self.chunk_nobs is None:
|
|
49
|
+
# Decide the chunk size based on the duration
|
|
50
|
+
raise NotImplementedError(
|
|
51
|
+
"Splitting by duration is not implemented yet. Please set chunk_nobs."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Group observations by day/night
|
|
55
|
+
day_night_splits = {"day": [], "night": []}
|
|
56
|
+
for obs_id, obs_meta in obs_info.items():
|
|
57
|
+
obs_time = datetime.fromtimestamp(
|
|
58
|
+
timestamp=obs_meta["start_time"], tz=timezone.utc
|
|
59
|
+
) # Assuming time is in ISO format
|
|
60
|
+
|
|
61
|
+
# Determine if it's day or night using the sun position
|
|
62
|
+
city = LocationInfo(
|
|
63
|
+
"San Pedro de Atacama", "Chile", "America/Santiago", -22.91, -68.2
|
|
64
|
+
)
|
|
65
|
+
s = sun(
|
|
66
|
+
city.observer, date=obs_time.date(), tzinfo=pytz.timezone(city.timezone)
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if s["sunrise"] <= obs_time <= s["sunset"]:
|
|
70
|
+
day_night_splits["day"].append(obs_id)
|
|
71
|
+
else:
|
|
72
|
+
day_night_splits["night"].append(obs_id)
|
|
73
|
+
|
|
74
|
+
final_splits = {}
|
|
75
|
+
|
|
76
|
+
# For each direction, create time-interleaved splits
|
|
77
|
+
for day_night, obs_infos in day_night_splits.items():
|
|
78
|
+
if not obs_infos:
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
# Sort by timestamp for time-based splitting
|
|
82
|
+
sorted_ids = sorted(obs_infos, key=lambda k: obs_info[k]["start_time"])
|
|
83
|
+
|
|
84
|
+
# Group in chunks based on chunk_nobs
|
|
85
|
+
num_chunks = self._get_num_chunks(len(sorted_ids))
|
|
86
|
+
obs_lists = np.array_split(sorted_ids, num_chunks) if num_chunks > 0 else []
|
|
87
|
+
|
|
88
|
+
# Create nsplits (=2) time-interleaved splits
|
|
89
|
+
splits = [[] for _ in range(self.nsplits)]
|
|
90
|
+
for i, obs_list in enumerate(obs_lists):
|
|
91
|
+
splits[i % self.nsplits] += obs_list.tolist()
|
|
92
|
+
|
|
93
|
+
final_splits[day_night] = splits
|
|
94
|
+
|
|
95
|
+
return final_splits
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def get_workflows(cls, desc=None) -> List[NullTestWorkflow]:
|
|
99
|
+
"""
|
|
100
|
+
Create a list of NullTestWorkflows instances from the provided descriptions.
|
|
101
|
+
|
|
102
|
+
Creates separate workflows for each direction split following the naming
|
|
103
|
+
convention: {setname} = direction_[rising,setting,middle]
|
|
104
|
+
"""
|
|
105
|
+
day_night_workflow = cls(**desc)
|
|
106
|
+
|
|
107
|
+
workflows = []
|
|
108
|
+
for day_night, day_night_splits in day_night_workflow._splits.items():
|
|
109
|
+
for split_idx, split in enumerate(day_night_splits):
|
|
110
|
+
if not split:
|
|
111
|
+
continue
|
|
112
|
+
desc_copy = day_night_workflow.model_dump(exclude_unset=True)
|
|
113
|
+
# Follow the naming convention: direction_[rising,setting,middle]
|
|
114
|
+
desc_copy["output_dir"] = (
|
|
115
|
+
f"{day_night_workflow.output_dir}/{day_night}_split_{split_idx + 1}"
|
|
116
|
+
)
|
|
117
|
+
desc_copy["name"] = (
|
|
118
|
+
f"{day_night}_split_{split_idx + 1}_null_test_workflow"
|
|
119
|
+
)
|
|
120
|
+
desc_copy["datasize"] = 0
|
|
121
|
+
query_file = Path(desc_copy["output_dir"]) / "query.txt"
|
|
122
|
+
query_file.parent.mkdir(parents=True, exist_ok=True)
|
|
123
|
+
with open(query_file, "w") as f:
|
|
124
|
+
for oid in split:
|
|
125
|
+
f.write(f"{oid}\n")
|
|
126
|
+
desc_copy["query"] = f"file://{str(query_file.absolute())}"
|
|
127
|
+
desc_copy["chunk_nobs"] = 1
|
|
128
|
+
|
|
129
|
+
workflow = NullTestWorkflow(**desc_copy)
|
|
130
|
+
workflows.append(workflow)
|
|
131
|
+
|
|
132
|
+
return workflows
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from datetime import timedelta
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from sotodlib.core import Context
|
|
7
|
+
|
|
8
|
+
from socm.workflows.ml_null_tests import NullTestWorkflow
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DirectionNullTestWorkflow(NullTestWorkflow):
|
|
12
|
+
"""
|
|
13
|
+
A workflow for direction null tests.
|
|
14
|
+
|
|
15
|
+
This workflow splits observations based on scan direction (rising, setting, middle)
|
|
16
|
+
and creates time-interleaved splits with nsplits=2 as specified.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
chunk_nobs: Optional[int] = None
|
|
20
|
+
chunk_duration: Optional[timedelta] = None
|
|
21
|
+
nsplits: int = 2 # Fixed to 2 as specified in the issue
|
|
22
|
+
name: str = "direction_null_test_workflow"
|
|
23
|
+
|
|
24
|
+
def _get_splits(
|
|
25
|
+
self, ctx: Context, obs_info: Dict[str, Dict[str, Union[float, str]]]
|
|
26
|
+
) -> Dict[str, List[List[str]]]:
|
|
27
|
+
"""
|
|
28
|
+
Distribute the observations across splits based on scan direction.
|
|
29
|
+
|
|
30
|
+
Groups observations by direction (rising, setting, middle) and then
|
|
31
|
+
creates time-interleaved splits for each direction with nsplits=2.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
ctx: Context object
|
|
35
|
+
obs_info: Dictionary mapping obs_id to observation metadata
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Dict mapping direction to list of splits, where each split is a list
|
|
39
|
+
of obs_ids
|
|
40
|
+
"""
|
|
41
|
+
if self.chunk_nobs is None and self.chunk_duration is None:
|
|
42
|
+
raise ValueError("Either chunk_nobs or duration must be set.")
|
|
43
|
+
elif self.chunk_nobs is not None and self.chunk_duration is not None:
|
|
44
|
+
raise ValueError("Only one of chunk_nobs or duration can be set.")
|
|
45
|
+
elif self.chunk_nobs is None:
|
|
46
|
+
# Decide the chunk size based on the duration
|
|
47
|
+
raise NotImplementedError(
|
|
48
|
+
"Splitting by duration is not implemented yet. Please set chunk_nobs."
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Group observations by scan direction
|
|
52
|
+
direction_splits = {"rising": [], "setting": [], "middle": []}
|
|
53
|
+
for obs_id, obs_meta in obs_info.items():
|
|
54
|
+
if np.isclose(
|
|
55
|
+
obs_meta["az_center"] % 360, 180
|
|
56
|
+
): # Azimuth close to 180 is considered 'middle'
|
|
57
|
+
direction = "middle"
|
|
58
|
+
elif (
|
|
59
|
+
obs_meta["az_center"] % 360
|
|
60
|
+
) > 180: # More than 180 degrees is considered 'setting'
|
|
61
|
+
direction = "setting"
|
|
62
|
+
elif (
|
|
63
|
+
obs_meta["az_center"] % 360
|
|
64
|
+
) < 180: # Less than 180 degrees is considered 'rising'
|
|
65
|
+
direction = "rising"
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"Unknown azimuth center value for {obs_id}: {obs_meta['az_center']}"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if direction in direction_splits:
|
|
72
|
+
direction_splits[direction].append(obs_id)
|
|
73
|
+
|
|
74
|
+
final_splits = {}
|
|
75
|
+
|
|
76
|
+
# For each direction, create time-interleaved splits
|
|
77
|
+
for direction, direction_obs_info in direction_splits.items():
|
|
78
|
+
if not direction_obs_info:
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
# Sort by timestamp for time-based splitting
|
|
82
|
+
sorted_ids = sorted(
|
|
83
|
+
direction_obs_info, key=lambda k: obs_info[k]["start_time"]
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Group in chunks based on chunk_nobs
|
|
87
|
+
num_chunks = self._get_num_chunks(len(sorted_ids))
|
|
88
|
+
obs_lists = np.array_split(sorted_ids, num_chunks) if num_chunks > 0 else []
|
|
89
|
+
|
|
90
|
+
# Create nsplits (=2) time-interleaved splits
|
|
91
|
+
splits = [[] for _ in range(self.nsplits)]
|
|
92
|
+
for i, obs_list in enumerate(obs_lists):
|
|
93
|
+
splits[i % self.nsplits] += obs_list.tolist()
|
|
94
|
+
|
|
95
|
+
final_splits[direction] = splits
|
|
96
|
+
|
|
97
|
+
return final_splits
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def get_workflows(cls, desc=None) -> List[NullTestWorkflow]:
|
|
101
|
+
"""
|
|
102
|
+
Create a list of NullTestWorkflows instances from the provided descriptions.
|
|
103
|
+
|
|
104
|
+
Creates separate workflows for each direction split following the naming
|
|
105
|
+
convention: {setname} = direction_[rising,setting,middle]
|
|
106
|
+
"""
|
|
107
|
+
direction_workflow = cls(**desc)
|
|
108
|
+
|
|
109
|
+
workflows = []
|
|
110
|
+
for direction, direction_splits in direction_workflow._splits.items():
|
|
111
|
+
for split_idx, split in enumerate(direction_splits):
|
|
112
|
+
if not split:
|
|
113
|
+
continue
|
|
114
|
+
desc_copy = direction_workflow.model_dump(exclude_unset=True)
|
|
115
|
+
desc_copy["name"] = (
|
|
116
|
+
f"direction_{direction}_split_{split_idx + 1}_null_test_workflow"
|
|
117
|
+
)
|
|
118
|
+
desc_copy["datasize"] = 0
|
|
119
|
+
# Follow the naming convention: direction_[rising,setting,middle]
|
|
120
|
+
desc_copy["output_dir"] = (
|
|
121
|
+
f"{direction_workflow.output_dir}/direction_{direction}_split_{split_idx + 1}"
|
|
122
|
+
)
|
|
123
|
+
query_file = Path(desc_copy["output_dir"]) / "query.txt"
|
|
124
|
+
query_file.parent.mkdir(parents=True, exist_ok=True)
|
|
125
|
+
with open(query_file, "w") as f:
|
|
126
|
+
for oid in split:
|
|
127
|
+
f.write(f"{oid}\n")
|
|
128
|
+
desc_copy["query"] = f"file://{str(query_file.absolute())}"
|
|
129
|
+
desc_copy["chunk_nobs"] = 1
|
|
130
|
+
workflow = NullTestWorkflow(**desc_copy)
|
|
131
|
+
workflows.append(workflow)
|
|
132
|
+
|
|
133
|
+
return workflows
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from datetime import timedelta
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from sotodlib.core import Context
|
|
7
|
+
|
|
8
|
+
from socm.workflows.ml_null_tests import NullTestWorkflow
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ElevationNullTestWorkflow(NullTestWorkflow):
|
|
12
|
+
"""
|
|
13
|
+
A workflow for elevation null tests.
|
|
14
|
+
|
|
15
|
+
This workflow splits observations based on their elevation angles.
|
|
16
|
+
It creates time-interleaved splits with nsplits=2 as specified.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
chunk_nobs: Optional[int] = None
|
|
20
|
+
chunk_duration: Optional[timedelta] = None
|
|
21
|
+
nsplits: int = 2 # Fixed to 2 as specified in the issue
|
|
22
|
+
name: str = "elevation_null_test_workflow"
|
|
23
|
+
elevation_threshold: float = 45.0 # Elevation threshold in degrees
|
|
24
|
+
|
|
25
|
+
def _get_splits(
|
|
26
|
+
self, ctx: Context, obs_info: Dict[str, Dict[str, Union[float, str]]]
|
|
27
|
+
) -> Dict[str, List[List[str]]]:
|
|
28
|
+
"""
|
|
29
|
+
Distribute the observations across splits based on elevation angles.
|
|
30
|
+
|
|
31
|
+
Groups observations by their elevation angles and then creates time-interleaved
|
|
32
|
+
splits for each with nsplits=2.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
ctx: Context object
|
|
36
|
+
obs_info: Dictionary mapping obs_id to observation metadata
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Dict mapping 'day' and 'night' to list of splits, where each split is a list
|
|
40
|
+
of obs_ids
|
|
41
|
+
"""
|
|
42
|
+
if self.chunk_nobs is None and self.chunk_duration is None:
|
|
43
|
+
raise ValueError("Either chunk_nobs or duration must be set.")
|
|
44
|
+
elif self.chunk_nobs is not None and self.chunk_duration is not None:
|
|
45
|
+
raise ValueError("Only one of chunk_nobs or duration can be set.")
|
|
46
|
+
elif self.chunk_nobs is None:
|
|
47
|
+
# Decide the chunk size based on the duration
|
|
48
|
+
raise NotImplementedError(
|
|
49
|
+
"Splitting by duration is not implemented yet. Please set chunk_nobs."
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Group observations by elevation angles
|
|
53
|
+
elevation_splits = {"low": [], "high": []}
|
|
54
|
+
for obs_id, obs_meta in obs_info.items():
|
|
55
|
+
if obs_meta["el_center"] < self.elevation_threshold:
|
|
56
|
+
elevation_splits["low"].append(obs_id)
|
|
57
|
+
else:
|
|
58
|
+
elevation_splits["high"].append(obs_id)
|
|
59
|
+
|
|
60
|
+
final_splits = {}
|
|
61
|
+
|
|
62
|
+
# For each elevation, create time-interleaved splits
|
|
63
|
+
for elevation, obs_infos in elevation_splits.items():
|
|
64
|
+
if not obs_infos:
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
# Sort by timestamp for time-based splitting
|
|
68
|
+
sorted_ids = sorted(obs_infos, key=lambda k: obs_info[k]["start_time"])
|
|
69
|
+
|
|
70
|
+
# Group in chunks based on chunk_nobs
|
|
71
|
+
num_chunks = self._get_num_chunks(len(sorted_ids))
|
|
72
|
+
obs_lists = np.array_split(sorted_ids, num_chunks) if num_chunks > 0 else []
|
|
73
|
+
|
|
74
|
+
# Create nsplits (=2) time-interleaved splits
|
|
75
|
+
splits = [[] for _ in range(self.nsplits)]
|
|
76
|
+
for i, obs_list in enumerate(obs_lists):
|
|
77
|
+
splits[i % self.nsplits] += obs_list.tolist()
|
|
78
|
+
|
|
79
|
+
final_splits[elevation] = splits
|
|
80
|
+
|
|
81
|
+
return final_splits
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def get_workflows(cls, desc=None) -> List[NullTestWorkflow]:
|
|
85
|
+
"""
|
|
86
|
+
Create a list of NullTestWorkflows instances from the provided descriptions.
|
|
87
|
+
|
|
88
|
+
Creates separate workflows for each direction split following the naming
|
|
89
|
+
convention: {setname} = direction_[rising,setting,middle]
|
|
90
|
+
"""
|
|
91
|
+
elevation_workflow = cls(**desc)
|
|
92
|
+
|
|
93
|
+
workflows = []
|
|
94
|
+
for elevation, elevation_splits in elevation_workflow._splits.items():
|
|
95
|
+
for split_idx, split in enumerate(elevation_splits):
|
|
96
|
+
if not split:
|
|
97
|
+
continue
|
|
98
|
+
desc_copy = elevation_workflow.model_dump(exclude_unset=True)
|
|
99
|
+
desc_copy["name"] = (
|
|
100
|
+
f"elevation_{elevation}_split_{split_idx + 1}_null_test_workflow"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Follow the naming convention: direction_[rising,setting,middle]
|
|
104
|
+
desc_copy["output_dir"] = (
|
|
105
|
+
f"{elevation_workflow.output_dir}/elevation_{elevation}_split_{split_idx + 1}"
|
|
106
|
+
)
|
|
107
|
+
desc_copy["datasize"] = 0
|
|
108
|
+
query_file = Path(desc_copy["output_dir"]) / "query.txt"
|
|
109
|
+
query_file.parent.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
with open(query_file, "w") as f:
|
|
111
|
+
for oid in split:
|
|
112
|
+
f.write(f"{oid}\n")
|
|
113
|
+
desc_copy["query"] = f"file://{str(query_file.absolute())}"
|
|
114
|
+
desc_copy["chunk_nobs"] = 1
|
|
115
|
+
workflow = NullTestWorkflow(**desc_copy)
|
|
116
|
+
workflows.append(workflow)
|
|
117
|
+
|
|
118
|
+
return workflows
|