so-campaign-manager 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. so_campaign_manager-0.0.4.dist-info/METADATA +179 -0
  2. so_campaign_manager-0.0.4.dist-info/RECORD +44 -0
  3. so_campaign_manager-0.0.4.dist-info/WHEEL +5 -0
  4. so_campaign_manager-0.0.4.dist-info/entry_points.txt +2 -0
  5. so_campaign_manager-0.0.4.dist-info/licenses/LICENSE +24 -0
  6. so_campaign_manager-0.0.4.dist-info/top_level.txt +1 -0
  7. socm/__about__.py +34 -0
  8. socm/__init__.py +0 -0
  9. socm/__main__.py +35 -0
  10. socm/bookkeeper/__init__.py +1 -0
  11. socm/bookkeeper/bookkeeper.py +488 -0
  12. socm/configs/slurmise.toml +2 -0
  13. socm/core/__init__.py +1 -0
  14. socm/core/models.py +235 -0
  15. socm/enactor/__init__.py +3 -0
  16. socm/enactor/base.py +123 -0
  17. socm/enactor/dryrun_enactor.py +216 -0
  18. socm/enactor/rp_enactor.py +273 -0
  19. socm/execs/__init__.py +3 -0
  20. socm/execs/mapmaking.py +73 -0
  21. socm/planner/__init__.py +2 -0
  22. socm/planner/base.py +87 -0
  23. socm/planner/heft_planner.py +442 -0
  24. socm/resources/__init__.py +5 -0
  25. socm/resources/perlmutter.py +22 -0
  26. socm/resources/tiger.py +24 -0
  27. socm/resources/universe.py +18 -0
  28. socm/utils/__init__.py +0 -0
  29. socm/utils/misc.py +90 -0
  30. socm/utils/states.py +17 -0
  31. socm/workflows/__init__.py +41 -0
  32. socm/workflows/ml_mapmaking.py +111 -0
  33. socm/workflows/ml_null_tests/__init__.py +10 -0
  34. socm/workflows/ml_null_tests/base.py +117 -0
  35. socm/workflows/ml_null_tests/day_night_null_test.py +132 -0
  36. socm/workflows/ml_null_tests/direction_null_test.py +133 -0
  37. socm/workflows/ml_null_tests/elevation_null_test.py +118 -0
  38. socm/workflows/ml_null_tests/moon_close_null_test.py +165 -0
  39. socm/workflows/ml_null_tests/moonrise_set_null_test.py +151 -0
  40. socm/workflows/ml_null_tests/pwv_null_test.py +118 -0
  41. socm/workflows/ml_null_tests/sun_close_null_test.py +173 -0
  42. socm/workflows/ml_null_tests/time_null_test.py +76 -0
  43. socm/workflows/ml_null_tests/wafer_null_test.py +175 -0
  44. socm/workflows/sat_simulation.py +76 -0
@@ -0,0 +1,111 @@
1
+ from functools import lru_cache
2
+ from pathlib import Path
3
+ from typing import Any, List, Optional, Union
4
+
5
+ from sotodlib.core import Context
6
+
7
+ from socm.core import Workflow
8
+ from socm.utils.misc import get_query_from_file
9
+
10
+
11
+ @lru_cache(maxsize=10)
12
+ def _load_context(ctx_path: str) -> Context:
13
+ return Context(Path(ctx_path))
14
+
15
+ class MLMapmakingWorkflow(Workflow):
16
+ """
17
+ A workflow for ML mapmaking.
18
+ """
19
+
20
+ area: str
21
+ output_dir: str
22
+ preprocess_config: str
23
+ query: str = "1"
24
+ name: str = "ml_mapmaking_workflow"
25
+ executable: str = "so-site-pipeline"
26
+ subcommand: str = "make-ml-map"
27
+ datasize: int = 0
28
+ comps: Optional[str] = "TQU"
29
+ wafers: Optional[str] = None
30
+ bands: Optional[str] = None
31
+ nmat: Optional[str] = "corr"
32
+ max_dets: Optional[int] = None
33
+ site: Optional[str] = "so_lat"
34
+ downsample: Union[int, List[int]] = 1
35
+ maxiter: Union[int, List[int]] = 500
36
+ tiled: int = 1
37
+
38
+ def model_post_init(self, __context: Any) -> None:
39
+ """
40
+ Post-initialization to set the context for the workflow.
41
+ """
42
+ ctx_file = Path(self.context.split("file://")[-1]).absolute()
43
+ ctx = _load_context(str(ctx_file))
44
+
45
+ final_query = self.query
46
+ if self.query.startswith("file://"):
47
+ query_path = Path(self.query.split("file://")[-1]).absolute()
48
+ final_query = get_query_from_file(query_path)
49
+ obs_ids = ctx.obsdb.query(final_query)
50
+ for obs_id in obs_ids:
51
+ self.datasize += obs_id["n_samples"]
52
+
53
+ def get_command(self) -> str:
54
+ """
55
+ Get the command to run the ML mapmaking workflow.
56
+ """
57
+ command = f"srun --cpu_bind=cores --export=ALL --ntasks-per-node={self.resources['ranks']} --cpus-per-task={self.resources['threads']} {self.executable} {self.subcommand} "
58
+ command += " ".join(self.get_arguments())
59
+
60
+ return command.strip()
61
+
62
+ def get_arguments(self) -> List[str]:
63
+ """
64
+ Get the command to run the ML mapmaking workflow.
65
+ """
66
+ area = Path(self.area.split("file://")[-1])
67
+ final_query = self.query
68
+ if self.query.startswith("file://"):
69
+ final_query = Path(self.query.split("file://")[-1]).absolute()
70
+ final_query = f"{final_query.absolute()}"
71
+ preprocess_config = Path(self.preprocess_config.split("file://")[-1])
72
+
73
+ arguments = [final_query, f"{area.absolute()}", self.output_dir, f"{preprocess_config.absolute()}"]
74
+ sorted_workflow = dict(sorted(self.model_dump(exclude_unset=True).items()))
75
+
76
+ for k, v in sorted_workflow.items():
77
+ if isinstance(v, str) and v.startswith("file://"):
78
+ v = Path(v.split("file://")[-1]).absolute()
79
+ elif isinstance(v, list):
80
+ v = ",".join([str(item) for item in v])
81
+ if k not in [
82
+ "area",
83
+ "output_dir",
84
+ "executable",
85
+ "query",
86
+ "output_dir",
87
+ "id",
88
+ "environment",
89
+ "resources",
90
+ "datasize",
91
+ "preprocess_config"
92
+ ]:
93
+ arguments.append(f"--{k}={v}")
94
+ return arguments
95
+
96
+ @classmethod
97
+ def get_workflows(
98
+ cls, descriptions: Union[List[dict], dict]
99
+ ) -> List["MLMapmakingWorkflow"]:
100
+ """
101
+ Create a list of MLMapmakingWorkflow instances from the provided descriptions.
102
+ """
103
+ if isinstance(descriptions, dict):
104
+ descriptions = [descriptions]
105
+
106
+ workflows = []
107
+ for desc in descriptions:
108
+ workflow = cls(**desc)
109
+ workflows.append(workflow)
110
+
111
+ return workflows
@@ -0,0 +1,10 @@
1
+ from .base import NullTestWorkflow # noqa: F401
2
+ from .day_night_null_test import DayNightNullTestWorkflow # noqa: F401
3
+ from .direction_null_test import DirectionNullTestWorkflow # noqa: F401
4
+ from .elevation_null_test import ElevationNullTestWorkflow # noqa: F401
5
+ from .moon_close_null_test import MoonCloseFarNullTestWorkflow # noqa: F401
6
+ from .moonrise_set_null_test import MoonRiseSetNullTestWorkflow # noqa: F401
7
+ from .pwv_null_test import PWVNullTestWorkflow # noqa: F401
8
+ from .sun_close_null_test import SunCloseFarNullTestWorkflow # noqa: F401
9
+ from .time_null_test import TimeNullTestWorkflow # noqa: F401
10
+ from .wafer_null_test import WaferNullTestWorkflow # noqa: F401
@@ -0,0 +1,117 @@
1
+ from datetime import timedelta
2
+ from pathlib import Path
3
+ from typing import Any, Dict, List, Optional, Union
4
+
5
+ from sotodlib.core import Context
6
+
7
+ from socm.utils.misc import get_query_from_file
8
+ from socm.workflows import MLMapmakingWorkflow
9
+
10
+
11
+ class NullTestWorkflow(MLMapmakingWorkflow):
12
+ """
13
+ A workflow for null tests.
14
+ """
15
+
16
+ area: str
17
+ output_dir: str
18
+ query: str = "1"
19
+ name: str = "lat_null_test_workflow"
20
+ datasize: int = 0
21
+ chunk_nobs: Optional[int] = None
22
+ chunk_duration: Optional[timedelta] = None
23
+
24
+ def model_post_init(self, __context: Any) -> None:
25
+ """
26
+ Post-initialization to set the context for the workflow and distribute the
27
+ observations across splits.
28
+ """
29
+ ctx_file = Path(self.context.split("file://")[-1]).absolute()
30
+ ctx = Context(ctx_file)
31
+ final_query = self.query
32
+ if self.query.startswith("file://"):
33
+ query_path = Path(self.query.split("file://")[-1]).absolute()
34
+ final_query = get_query_from_file(query_path)
35
+ obs_ids = ctx.obsdb.query(final_query)
36
+ obs_info = dict()
37
+ for obs_id in obs_ids:
38
+ self.datasize += obs_id["n_samples"]
39
+ obs_info[obs_id["obs_id"]] = {
40
+ "start_time": obs_id["timestamp"],
41
+ "wafer_list": obs_id["wafer_slots_list"].split(","),
42
+ "tube_slot": obs_id.get("tube_slot", "st1"),
43
+ "az_center": obs_id["az_center"],
44
+ "el_center": obs_id["el_center"],
45
+ "pwv": obs_id.get("pwv", 0),
46
+ }
47
+ # Ensure obs_ids are sorted by their timestamp
48
+ # Order the obs_ids based on their timestamp it is in the obs_meta.obs_info.timestamp
49
+
50
+ self._splits = self._get_splits(ctx, obs_info)
51
+
52
+ def _get_num_chunks(self, num_obs: int) -> int:
53
+ num_chunks = (
54
+ num_obs + self.chunk_nobs - 1
55
+ ) // self.chunk_nobs # Ceiling division
56
+ return num_chunks
57
+
58
+ def _get_splits(
59
+ self, ctx: Context, obs_info: Dict[str, Dict[str, Union[float, str]]]
60
+ ) -> List[List[str]]:
61
+ """
62
+ Distribute the observations across splits based on the context and observation IDs.
63
+ """
64
+ if self.__class__.__name__ != "NullTestWorkflow":
65
+ raise NotImplementedError(
66
+ "This method should be implemented in subclasses."
67
+ )
68
+ else:
69
+ pass
70
+
71
+ @classmethod
72
+ def get_workflows(cls, desc: Dict[str, Any]) -> List["NullTestWorkflow"]:
73
+ """
74
+ Distribute the observations across splits based on the context and observation IDs.
75
+ """
76
+ if cls.__name__ != "NullTestWorkflow":
77
+ raise NotImplementedError(
78
+ "This method should be implemented in subclasses."
79
+ )
80
+ else:
81
+ pass
82
+
83
+ def get_arguments(self) -> List[str]:
84
+ """
85
+ Get the command to run the ML mapmaking workflow.
86
+ """
87
+ area = Path(self.area.split("file://")[-1])
88
+ query = Path(self.query.split("file://")[-1])
89
+ preprocess_config = Path(self.preprocess_config.split("file://")[-1])
90
+
91
+ arguments = [f"{query.absolute()}", f"{area.absolute()}", self.output_dir, f"{preprocess_config.absolute()}"]
92
+ sorted_workflow = dict(sorted(self.model_dump(exclude_unset=True).items()))
93
+
94
+ for k, v in sorted_workflow.items():
95
+ if isinstance(v, str) and v.startswith("file://"):
96
+ v = Path(v.split("file://")[-1]).absolute()
97
+ elif isinstance(v, list):
98
+ v = ",".join([str(item) for item in v])
99
+ if k not in [
100
+ "area",
101
+ "output_dir",
102
+ "executable",
103
+ "query",
104
+ "id",
105
+ "environment",
106
+ "resources",
107
+ "datasize",
108
+ "chunk_nobs",
109
+ "nsplits",
110
+ "wafers",
111
+ "subcommand",
112
+ "name",
113
+ "chunk_duration",
114
+ "preprocess_config"
115
+ ]:
116
+ arguments.append(f"--{k}={v}")
117
+ return arguments
@@ -0,0 +1,132 @@
1
+ from datetime import datetime, timedelta, timezone
2
+ from pathlib import Path
3
+ from typing import Dict, List, Optional, Union
4
+
5
+ import numpy as np
6
+ import pytz
7
+ from astral import LocationInfo
8
+ from astral.sun import sun
9
+ from sotodlib.core import Context
10
+
11
+ from socm.workflows.ml_null_tests import NullTestWorkflow
12
+
13
+
14
+ class DayNightNullTestWorkflow(NullTestWorkflow):
15
+ """
16
+ A workflow for day/night null tests.
17
+
18
+ This workflow splits observations based on whether they were taken during the day or night.
19
+ It creates time-interleaved splits with nsplits=2 as specified.
20
+ """
21
+
22
+ chunk_nobs: Optional[int] = None
23
+ chunk_duration: Optional[timedelta] = None
24
+ nsplits: int = 2 # Fixed to 2 as specified in the issue
25
+ name: str = "day_night_null_test_workflow"
26
+
27
+ def _get_splits(
28
+ self, ctx: Context, obs_info: Dict[str, Dict[str, Union[float, str]]]
29
+ ) -> Dict[str, List[List[str]]]:
30
+ """
31
+ Distribute the observations across splits based on day/night.
32
+
33
+ Groups observations by whether they were taken during the day or night and then
34
+ creates time-interleaved splits for each with nsplits=2.
35
+
36
+ Args:
37
+ ctx: Context object
38
+ obs_info: Dictionary mapping obs_id to observation metadata
39
+
40
+ Returns:
41
+ Dict mapping 'day' and 'night' to list of splits, where each split is a list
42
+ of obs_ids
43
+ """
44
+ if self.chunk_nobs is None and self.chunk_duration is None:
45
+ raise ValueError("Either chunk_nobs or duration must be set.")
46
+ elif self.chunk_nobs is not None and self.chunk_duration is not None:
47
+ raise ValueError("Only one of chunk_nobs or duration can be set.")
48
+ elif self.chunk_nobs is None:
49
+ # Decide the chunk size based on the duration
50
+ raise NotImplementedError(
51
+ "Splitting by duration is not implemented yet. Please set chunk_nobs."
52
+ )
53
+
54
+ # Group observations by day/night
55
+ day_night_splits = {"day": [], "night": []}
56
+ for obs_id, obs_meta in obs_info.items():
57
+ obs_time = datetime.fromtimestamp(
58
+ timestamp=obs_meta["start_time"], tz=timezone.utc
59
+ ) # Assuming time is in ISO format
60
+
61
+ # Determine if it's day or night using the sun position
62
+ city = LocationInfo(
63
+ "San Pedro de Atacama", "Chile", "America/Santiago", -22.91, -68.2
64
+ )
65
+ s = sun(
66
+ city.observer, date=obs_time.date(), tzinfo=pytz.timezone(city.timezone)
67
+ )
68
+
69
+ if s["sunrise"] <= obs_time <= s["sunset"]:
70
+ day_night_splits["day"].append(obs_id)
71
+ else:
72
+ day_night_splits["night"].append(obs_id)
73
+
74
+ final_splits = {}
75
+
76
+ # For each direction, create time-interleaved splits
77
+ for day_night, obs_infos in day_night_splits.items():
78
+ if not obs_infos:
79
+ continue
80
+
81
+ # Sort by timestamp for time-based splitting
82
+ sorted_ids = sorted(obs_infos, key=lambda k: obs_info[k]["start_time"])
83
+
84
+ # Group in chunks based on chunk_nobs
85
+ num_chunks = self._get_num_chunks(len(sorted_ids))
86
+ obs_lists = np.array_split(sorted_ids, num_chunks) if num_chunks > 0 else []
87
+
88
+ # Create nsplits (=2) time-interleaved splits
89
+ splits = [[] for _ in range(self.nsplits)]
90
+ for i, obs_list in enumerate(obs_lists):
91
+ splits[i % self.nsplits] += obs_list.tolist()
92
+
93
+ final_splits[day_night] = splits
94
+
95
+ return final_splits
96
+
97
+ @classmethod
98
+ def get_workflows(cls, desc=None) -> List[NullTestWorkflow]:
99
+ """
100
+ Create a list of NullTestWorkflows instances from the provided descriptions.
101
+
102
+ Creates separate workflows for each direction split following the naming
103
+ convention: {setname} = direction_[rising,setting,middle]
104
+ """
105
+ day_night_workflow = cls(**desc)
106
+
107
+ workflows = []
108
+ for day_night, day_night_splits in day_night_workflow._splits.items():
109
+ for split_idx, split in enumerate(day_night_splits):
110
+ if not split:
111
+ continue
112
+ desc_copy = day_night_workflow.model_dump(exclude_unset=True)
113
+ # Follow the naming convention: direction_[rising,setting,middle]
114
+ desc_copy["output_dir"] = (
115
+ f"{day_night_workflow.output_dir}/{day_night}_split_{split_idx + 1}"
116
+ )
117
+ desc_copy["name"] = (
118
+ f"{day_night}_split_{split_idx + 1}_null_test_workflow"
119
+ )
120
+ desc_copy["datasize"] = 0
121
+ query_file = Path(desc_copy["output_dir"]) / "query.txt"
122
+ query_file.parent.mkdir(parents=True, exist_ok=True)
123
+ with open(query_file, "w") as f:
124
+ for oid in split:
125
+ f.write(f"{oid}\n")
126
+ desc_copy["query"] = f"file://{str(query_file.absolute())}"
127
+ desc_copy["chunk_nobs"] = 1
128
+
129
+ workflow = NullTestWorkflow(**desc_copy)
130
+ workflows.append(workflow)
131
+
132
+ return workflows
@@ -0,0 +1,133 @@
1
+ from datetime import timedelta
2
+ from pathlib import Path
3
+ from typing import Dict, List, Optional, Union
4
+
5
+ import numpy as np
6
+ from sotodlib.core import Context
7
+
8
+ from socm.workflows.ml_null_tests import NullTestWorkflow
9
+
10
+
11
+ class DirectionNullTestWorkflow(NullTestWorkflow):
12
+ """
13
+ A workflow for direction null tests.
14
+
15
+ This workflow splits observations based on scan direction (rising, setting, middle)
16
+ and creates time-interleaved splits with nsplits=2 as specified.
17
+ """
18
+
19
+ chunk_nobs: Optional[int] = None
20
+ chunk_duration: Optional[timedelta] = None
21
+ nsplits: int = 2 # Fixed to 2 as specified in the issue
22
+ name: str = "direction_null_test_workflow"
23
+
24
+ def _get_splits(
25
+ self, ctx: Context, obs_info: Dict[str, Dict[str, Union[float, str]]]
26
+ ) -> Dict[str, List[List[str]]]:
27
+ """
28
+ Distribute the observations across splits based on scan direction.
29
+
30
+ Groups observations by direction (rising, setting, middle) and then
31
+ creates time-interleaved splits for each direction with nsplits=2.
32
+
33
+ Args:
34
+ ctx: Context object
35
+ obs_info: Dictionary mapping obs_id to observation metadata
36
+
37
+ Returns:
38
+ Dict mapping direction to list of splits, where each split is a list
39
+ of obs_ids
40
+ """
41
+ if self.chunk_nobs is None and self.chunk_duration is None:
42
+ raise ValueError("Either chunk_nobs or duration must be set.")
43
+ elif self.chunk_nobs is not None and self.chunk_duration is not None:
44
+ raise ValueError("Only one of chunk_nobs or duration can be set.")
45
+ elif self.chunk_nobs is None:
46
+ # Decide the chunk size based on the duration
47
+ raise NotImplementedError(
48
+ "Splitting by duration is not implemented yet. Please set chunk_nobs."
49
+ )
50
+
51
+ # Group observations by scan direction
52
+ direction_splits = {"rising": [], "setting": [], "middle": []}
53
+ for obs_id, obs_meta in obs_info.items():
54
+ if np.isclose(
55
+ obs_meta["az_center"] % 360, 180
56
+ ): # Azimuth close to 180 is considered 'middle'
57
+ direction = "middle"
58
+ elif (
59
+ obs_meta["az_center"] % 360
60
+ ) > 180: # More than 180 degrees is considered 'setting'
61
+ direction = "setting"
62
+ elif (
63
+ obs_meta["az_center"] % 360
64
+ ) < 180: # Less than 180 degrees is considered 'rising'
65
+ direction = "rising"
66
+ else:
67
+ raise ValueError(
68
+ f"Unknown azimuth center value for {obs_id}: {obs_meta['az_center']}"
69
+ )
70
+
71
+ if direction in direction_splits:
72
+ direction_splits[direction].append(obs_id)
73
+
74
+ final_splits = {}
75
+
76
+ # For each direction, create time-interleaved splits
77
+ for direction, direction_obs_info in direction_splits.items():
78
+ if not direction_obs_info:
79
+ continue
80
+
81
+ # Sort by timestamp for time-based splitting
82
+ sorted_ids = sorted(
83
+ direction_obs_info, key=lambda k: obs_info[k]["start_time"]
84
+ )
85
+
86
+ # Group in chunks based on chunk_nobs
87
+ num_chunks = self._get_num_chunks(len(sorted_ids))
88
+ obs_lists = np.array_split(sorted_ids, num_chunks) if num_chunks > 0 else []
89
+
90
+ # Create nsplits (=2) time-interleaved splits
91
+ splits = [[] for _ in range(self.nsplits)]
92
+ for i, obs_list in enumerate(obs_lists):
93
+ splits[i % self.nsplits] += obs_list.tolist()
94
+
95
+ final_splits[direction] = splits
96
+
97
+ return final_splits
98
+
99
+ @classmethod
100
+ def get_workflows(cls, desc=None) -> List[NullTestWorkflow]:
101
+ """
102
+ Create a list of NullTestWorkflows instances from the provided descriptions.
103
+
104
+ Creates separate workflows for each direction split following the naming
105
+ convention: {setname} = direction_[rising,setting,middle]
106
+ """
107
+ direction_workflow = cls(**desc)
108
+
109
+ workflows = []
110
+ for direction, direction_splits in direction_workflow._splits.items():
111
+ for split_idx, split in enumerate(direction_splits):
112
+ if not split:
113
+ continue
114
+ desc_copy = direction_workflow.model_dump(exclude_unset=True)
115
+ desc_copy["name"] = (
116
+ f"direction_{direction}_split_{split_idx + 1}_null_test_workflow"
117
+ )
118
+ desc_copy["datasize"] = 0
119
+ # Follow the naming convention: direction_[rising,setting,middle]
120
+ desc_copy["output_dir"] = (
121
+ f"{direction_workflow.output_dir}/direction_{direction}_split_{split_idx + 1}"
122
+ )
123
+ query_file = Path(desc_copy["output_dir"]) / "query.txt"
124
+ query_file.parent.mkdir(parents=True, exist_ok=True)
125
+ with open(query_file, "w") as f:
126
+ for oid in split:
127
+ f.write(f"{oid}\n")
128
+ desc_copy["query"] = f"file://{str(query_file.absolute())}"
129
+ desc_copy["chunk_nobs"] = 1
130
+ workflow = NullTestWorkflow(**desc_copy)
131
+ workflows.append(workflow)
132
+
133
+ return workflows
@@ -0,0 +1,118 @@
1
+ from datetime import timedelta
2
+ from pathlib import Path
3
+ from typing import Dict, List, Optional, Union
4
+
5
+ import numpy as np
6
+ from sotodlib.core import Context
7
+
8
+ from socm.workflows.ml_null_tests import NullTestWorkflow
9
+
10
+
11
+ class ElevationNullTestWorkflow(NullTestWorkflow):
12
+ """
13
+ A workflow for elevation null tests.
14
+
15
+ This workflow splits observations based on their elevation angles.
16
+ It creates time-interleaved splits with nsplits=2 as specified.
17
+ """
18
+
19
+ chunk_nobs: Optional[int] = None
20
+ chunk_duration: Optional[timedelta] = None
21
+ nsplits: int = 2 # Fixed to 2 as specified in the issue
22
+ name: str = "elevation_null_test_workflow"
23
+ elevation_threshold: float = 45.0 # Elevation threshold in degrees
24
+
25
+ def _get_splits(
26
+ self, ctx: Context, obs_info: Dict[str, Dict[str, Union[float, str]]]
27
+ ) -> Dict[str, List[List[str]]]:
28
+ """
29
+ Distribute the observations across splits based on elevation angles.
30
+
31
+ Groups observations by their elevation angles and then creates time-interleaved
32
+ splits for each with nsplits=2.
33
+
34
+ Args:
35
+ ctx: Context object
36
+ obs_info: Dictionary mapping obs_id to observation metadata
37
+
38
+ Returns:
39
+ Dict mapping 'day' and 'night' to list of splits, where each split is a list
40
+ of obs_ids
41
+ """
42
+ if self.chunk_nobs is None and self.chunk_duration is None:
43
+ raise ValueError("Either chunk_nobs or duration must be set.")
44
+ elif self.chunk_nobs is not None and self.chunk_duration is not None:
45
+ raise ValueError("Only one of chunk_nobs or duration can be set.")
46
+ elif self.chunk_nobs is None:
47
+ # Decide the chunk size based on the duration
48
+ raise NotImplementedError(
49
+ "Splitting by duration is not implemented yet. Please set chunk_nobs."
50
+ )
51
+
52
+ # Group observations by elevation angles
53
+ elevation_splits = {"low": [], "high": []}
54
+ for obs_id, obs_meta in obs_info.items():
55
+ if obs_meta["el_center"] < self.elevation_threshold:
56
+ elevation_splits["low"].append(obs_id)
57
+ else:
58
+ elevation_splits["high"].append(obs_id)
59
+
60
+ final_splits = {}
61
+
62
+ # For each elevation, create time-interleaved splits
63
+ for elevation, obs_infos in elevation_splits.items():
64
+ if not obs_infos:
65
+ continue
66
+
67
+ # Sort by timestamp for time-based splitting
68
+ sorted_ids = sorted(obs_infos, key=lambda k: obs_info[k]["start_time"])
69
+
70
+ # Group in chunks based on chunk_nobs
71
+ num_chunks = self._get_num_chunks(len(sorted_ids))
72
+ obs_lists = np.array_split(sorted_ids, num_chunks) if num_chunks > 0 else []
73
+
74
+ # Create nsplits (=2) time-interleaved splits
75
+ splits = [[] for _ in range(self.nsplits)]
76
+ for i, obs_list in enumerate(obs_lists):
77
+ splits[i % self.nsplits] += obs_list.tolist()
78
+
79
+ final_splits[elevation] = splits
80
+
81
+ return final_splits
82
+
83
+ @classmethod
84
+ def get_workflows(cls, desc=None) -> List[NullTestWorkflow]:
85
+ """
86
+ Create a list of NullTestWorkflows instances from the provided descriptions.
87
+
88
+ Creates separate workflows for each direction split following the naming
89
+ convention: {setname} = direction_[rising,setting,middle]
90
+ """
91
+ elevation_workflow = cls(**desc)
92
+
93
+ workflows = []
94
+ for elevation, elevation_splits in elevation_workflow._splits.items():
95
+ for split_idx, split in enumerate(elevation_splits):
96
+ if not split:
97
+ continue
98
+ desc_copy = elevation_workflow.model_dump(exclude_unset=True)
99
+ desc_copy["name"] = (
100
+ f"elevation_{elevation}_split_{split_idx + 1}_null_test_workflow"
101
+ )
102
+
103
+ # Follow the naming convention: direction_[rising,setting,middle]
104
+ desc_copy["output_dir"] = (
105
+ f"{elevation_workflow.output_dir}/elevation_{elevation}_split_{split_idx + 1}"
106
+ )
107
+ desc_copy["datasize"] = 0
108
+ query_file = Path(desc_copy["output_dir"]) / "query.txt"
109
+ query_file.parent.mkdir(parents=True, exist_ok=True)
110
+ with open(query_file, "w") as f:
111
+ for oid in split:
112
+ f.write(f"{oid}\n")
113
+ desc_copy["query"] = f"file://{str(query_file.absolute())}"
114
+ desc_copy["chunk_nobs"] = 1
115
+ workflow = NullTestWorkflow(**desc_copy)
116
+ workflows.append(workflow)
117
+
118
+ return workflows