sparkkflow 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. sparkkflow-0.1.0/PKG-INFO +100 -0
  2. sparkkflow-0.1.0/README.md +72 -0
  3. sparkkflow-0.1.0/pyproject.toml +45 -0
  4. sparkkflow-0.1.0/setup.cfg +4 -0
  5. sparkkflow-0.1.0/sparkkflow/__init__.py +6 -0
  6. sparkkflow-0.1.0/sparkkflow/atom_run.py +13 -0
  7. sparkkflow-0.1.0/sparkkflow/atom_scan.py +73 -0
  8. sparkkflow-0.1.0/sparkkflow/calc_arpes.py +49 -0
  9. sparkkflow-0.1.0/sparkkflow/calc_ni2fega.py +61 -0
  10. sparkkflow-0.1.0/sparkkflow/cli.py +261 -0
  11. sparkkflow-0.1.0/sparkkflow/config.py +452 -0
  12. sparkkflow-0.1.0/sparkkflow/jobs.py +465 -0
  13. sparkkflow-0.1.0/sparkkflow/log.py +505 -0
  14. sparkkflow-0.1.0/sparkkflow/ml.py +804 -0
  15. sparkkflow-0.1.0/sparkkflow/monitor.py +172 -0
  16. sparkkflow-0.1.0/sparkkflow/plot.py +449 -0
  17. sparkkflow-0.1.0/sparkkflow/plot_tr.py +100 -0
  18. sparkkflow-0.1.0/sparkkflow/run.py +43 -0
  19. sparkkflow-0.1.0/sparkkflow/scans.py +288 -0
  20. sparkkflow-0.1.0/sparkkflow/search.py +61 -0
  21. sparkkflow-0.1.0/sparkkflow/state.py +97 -0
  22. sparkkflow-0.1.0/sparkkflow/trcdad.py +27 -0
  23. sparkkflow-0.1.0/sparkkflow/trcdad_create.py +47 -0
  24. sparkkflow-0.1.0/sparkkflow/trcdad_pot.py +57 -0
  25. sparkkflow-0.1.0/sparkkflow.egg-info/PKG-INFO +100 -0
  26. sparkkflow-0.1.0/sparkkflow.egg-info/SOURCES.txt +34 -0
  27. sparkkflow-0.1.0/sparkkflow.egg-info/dependency_links.txt +1 -0
  28. sparkkflow-0.1.0/sparkkflow.egg-info/entry_points.txt +2 -0
  29. sparkkflow-0.1.0/sparkkflow.egg-info/requires.txt +23 -0
  30. sparkkflow-0.1.0/sparkkflow.egg-info/top_level.txt +1 -0
  31. sparkkflow-0.1.0/tests/test_arrays.py +96 -0
  32. sparkkflow-0.1.0/tests/test_config.py +50 -0
  33. sparkkflow-0.1.0/tests/test_imports.py +21 -0
  34. sparkkflow-0.1.0/tests/test_jobs.py +48 -0
  35. sparkkflow-0.1.0/tests/test_naming.py +32 -0
  36. sparkkflow-0.1.0/tests/test_state.py +55 -0
@@ -0,0 +1,100 @@
1
+ Metadata-Version: 2.4
2
+ Name: sparkkflow
3
+ Version: 0.1.0
4
+ Summary: ARPES workflow engine with SPR-KKR, OSCARpes and ML polarization calibration
5
+ Author: Ridha Eddhib
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: numpy
10
+ Requires-Dist: scipy
11
+ Requires-Dist: matplotlib
12
+ Requires-Dist: pyyaml
13
+ Requires-Dist: pandas
14
+ Provides-Extra: ml
15
+ Requires-Dist: torch; extra == "ml"
16
+ Requires-Dist: scikit-learn; extra == "ml"
17
+ Provides-Extra: oscarpes
18
+ Requires-Dist: duckdb; extra == "oscarpes"
19
+ Requires-Dist: pyarrow; extra == "oscarpes"
20
+ Requires-Dist: fsspec; extra == "oscarpes"
21
+ Requires-Dist: oscarpes; extra == "oscarpes"
22
+ Provides-Extra: remote
23
+ Requires-Dist: paramiko; extra == "remote"
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest; extra == "dev"
26
+ Requires-Dist: ruff; extra == "dev"
27
+ Requires-Dist: mypy; extra == "dev"
28
+
29
+ # Sparkkflow
30
+
31
+ ARPES workflow engine with SPR-KKR, OSCARpes ingestion and ML polarization calibration.
32
+
33
+ ## Install
34
+
35
+ ```bash
36
+ pip install -e . # core
37
+ pip install -e ".[ml]" # + torch / scikit-learn
38
+ pip install -e ".[oscarpes]" # + OSCARpes ingestion
39
+ pip install -e ".[dev]" # + pytest / ruff / mypy
40
+ ```
41
+
42
+ ## CLI
43
+
44
+ ```bash
45
+ sparkkflow doctor # check tooling
46
+ sparkkflow scan --energy-range 20 50 --dicho # run an ARPES scan
47
+ sparkkflow generate --energy-range 20 100 --fine-grid
48
+ sparkkflow train --model-path pol.pth
49
+ sparkkflow predict --model-path pol.pth --energy 30
50
+ sparkkflow monitor --job-ids 12,34 --continuous
51
+ ```
52
+
53
+ ## Library
54
+
55
+ ```python
56
+ from sparkkflow.config import manager
57
+ from sparkkflow.scans import scans
58
+ from sparkkflow.jobs import submitter
59
+ from sparkkflow.ml import pipeline, train, predict
60
+ from sparkkflow.monitor import watch, status
61
+
62
+ cfg = manager("config.yaml").config
63
+ runner = scans(scheduler_type=cfg.scheduler.default_type, ml_mode=False)
64
+ runner.run(energy_range=(20, 50), dicho=True)
65
+ ```
66
+
67
+ ## Layout
68
+
69
+ ```
70
+ sparkkflow/
71
+ __init__.py # public API surface
72
+ cli.py # `sparkkflow` console entry point
73
+ config.py # YAML loader, env overrides, validation
74
+ log.py # structured logging
75
+ jobs.py # SLURM / SGE submitter (`submitter`, `slurm`, `sge`)
76
+ scans.py # ARPES parameter scans (`scans`)
77
+ ml.py # PyTorch pipeline + train / predict
78
+ monitor.py # status polling and resubmission
79
+ plot.py # dichroism plotting
80
+ trcdad*.py # TR+CDAD utilities
81
+ calc_*.py # SPR-KKR calculators
82
+ atom_*.py # atomic-position scans
83
+ config.yaml # configuration template
84
+ examples/ # runnable demos
85
+ tests/ # pytest suite
86
+ ```
87
+
88
+ ## Naming conventions
89
+
90
+ - All modules and public classes are **lowercase, snake_case, ASCII**.
91
+ - No `+`, `-`, spaces, or version words like `enhanced` in filenames.
92
+ - One short responsibility per module name (`config`, `jobs`, `ml`, `scans`).
93
+
94
+ ## Development
95
+
96
+ ```bash
97
+ pytest -q
98
+ ruff check sparkkflow tests
99
+ mypy sparkkflow
100
+ ```
@@ -0,0 +1,72 @@
1
+ # Sparkkflow
2
+
3
+ ARPES workflow engine with SPR-KKR, OSCARpes ingestion and ML polarization calibration.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install -e . # core
9
+ pip install -e ".[ml]" # + torch / scikit-learn
10
+ pip install -e ".[oscarpes]" # + OSCARpes ingestion
11
+ pip install -e ".[dev]" # + pytest / ruff / mypy
12
+ ```
13
+
14
+ ## CLI
15
+
16
+ ```bash
17
+ sparkkflow doctor # check tooling
18
+ sparkkflow scan --energy-range 20 50 --dicho # run an ARPES scan
19
+ sparkkflow generate --energy-range 20 100 --fine-grid
20
+ sparkkflow train --model-path pol.pth
21
+ sparkkflow predict --model-path pol.pth --energy 30
22
+ sparkkflow monitor --job-ids 12,34 --continuous
23
+ ```
24
+
25
+ ## Library
26
+
27
+ ```python
28
+ from sparkkflow.config import manager
29
+ from sparkkflow.scans import scans
30
+ from sparkkflow.jobs import submitter
31
+ from sparkkflow.ml import pipeline, train, predict
32
+ from sparkkflow.monitor import watch, status
33
+
34
+ cfg = manager("config.yaml").config
35
+ runner = scans(scheduler_type=cfg.scheduler.default_type, ml_mode=False)
36
+ runner.run(energy_range=(20, 50), dicho=True)
37
+ ```
38
+
39
+ ## Layout
40
+
41
+ ```
42
+ sparkkflow/
43
+ __init__.py # public API surface
44
+ cli.py # `sparkkflow` console entry point
45
+ config.py # YAML loader, env overrides, validation
46
+ log.py # structured logging
47
+ jobs.py # SLURM / SGE submitter (`submitter`, `slurm`, `sge`)
48
+ scans.py # ARPES parameter scans (`scans`)
49
+ ml.py # PyTorch pipeline + train / predict
50
+ monitor.py # status polling and resubmission
51
+ plot.py # dichroism plotting
52
+ trcdad*.py # TR+CDAD utilities
53
+ calc_*.py # SPR-KKR calculators
54
+ atom_*.py # atomic-position scans
55
+ config.yaml # configuration template
56
+ examples/ # runnable demos
57
+ tests/ # pytest suite
58
+ ```
59
+
60
+ ## Naming conventions
61
+
62
+ - All modules and public classes are **lowercase, snake_case, ASCII**.
63
+ - No `+`, `-`, spaces, or version words like `enhanced` in filenames.
64
+ - One short responsibility per module name (`config`, `jobs`, `ml`, `scans`).
65
+
66
+ ## Development
67
+
68
+ ```bash
69
+ pytest -q
70
+ ruff check sparkkflow tests
71
+ mypy sparkkflow
72
+ ```
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "sparkkflow"
7
+ version = "0.1.0"
8
+ description = "ARPES workflow engine with SPR-KKR, OSCARpes and ML polarization calibration"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ authors = [{ name = "Ridha Eddhib" }]
12
+ license = { text = "MIT" }
13
+ dependencies = [
14
+ "numpy",
15
+ "scipy",
16
+ "matplotlib",
17
+ "pyyaml",
18
+ "pandas",
19
+ ]
20
+
21
+ [project.optional-dependencies]
22
+ ml = ["torch", "scikit-learn"]
23
+ oscarpes = ["duckdb", "pyarrow", "fsspec", "oscarpes"]
24
+ remote = ["paramiko"]
25
+ dev = ["pytest", "ruff", "mypy"]
26
+
27
+ [project.scripts]
28
+ sparkkflow = "sparkkflow.cli:main"
29
+
30
+ [tool.setuptools.packages.find]
31
+ include = ["sparkkflow*"]
32
+ exclude = ["tests*", "examples*"]
33
+
34
+ [tool.ruff]
35
+ line-length = 100
36
+ target-version = "py39"
37
+
38
+ [tool.ruff.lint]
39
+ select = ["E", "F", "W", "I", "UP", "B"]
40
+ ignore = ["E501"]
41
+
42
+ [tool.mypy]
43
+ python_version = "3.9"
44
+ ignore_missing_imports = true
45
+ warn_unused_ignores = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,6 @@
1
+ """Sparkkflow — ARPES workflow engine with SPR-KKR, OSCARpes and ML."""
2
+
3
+ from . import config, jobs, log, ml, monitor, scans
4
+
5
+ __all__ = ['config', 'jobs', 'log', 'ml', 'monitor', 'scans']
6
+ __version__ = "0.1.0"
@@ -0,0 +1,13 @@
1
+ """Entry point for an atomic-position scan."""
2
+
3
+ from .atom_scan import atom_scan
4
+
5
+
6
+ def main() -> None:
7
+ aps = atom_scan(task='Ni2FeGa', cpus=32, runtime='24:00:00',
8
+ a=[5.77], b=[5.77], c=[5.77])
9
+ print(aps.run())
10
+
11
+
12
+ if __name__ == "__main__":
13
+ main()
@@ -0,0 +1,73 @@
1
+ """Atomic-position parameter scan."""
2
+
3
+ import logging
4
+ from typing import List, Optional, Sequence
5
+
6
+ import numpy as np
7
+
8
+ from .jobs import submitter
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class atom_scan:
14
+ """Sweep lattice parameters (a, b, c) and submit one job per combination."""
15
+
16
+ def __init__(self,
17
+ task: Optional[str] = None,
18
+ a: Optional[Sequence[float]] = None,
19
+ b: Optional[Sequence[float]] = None,
20
+ c: Optional[Sequence[float]] = None,
21
+ cpus: Optional[int] = None,
22
+ email: str = '',
23
+ email_notify: str = '',
24
+ runtime: str = '') -> None:
25
+ self.task = task
26
+ self.job_ids: List[str] = []
27
+ self.job_names: List[str] = []
28
+ self.job_paths: List[str] = []
29
+ self.js = submitter(
30
+ scheduler_type='sge',
31
+ number_of_requested_cpus=cpus,
32
+ email_adress=email,
33
+ email_notification=email_notify,
34
+ runtime=runtime,
35
+ )
36
+ self.a = self._check(a)
37
+ self.b = self._check(b)
38
+ self.c = self._check(c)
39
+
40
+ @staticmethod
41
+ def _check(rng: Optional[Sequence[float]]) -> Sequence[float]:
42
+ if not rng:
43
+ raise ValueError("Atomic position range is required (a, b, c).")
44
+ return rng
45
+
46
+ @staticmethod
47
+ def _expand(rng: Sequence[float]) -> List[float]:
48
+ if len(rng) == 1:
49
+ logger.warning("Only one atomic position provided.")
50
+ return [rng[0]]
51
+ if len(rng) == 2:
52
+ logger.warning("No range step provided; defaulting to 0.01.")
53
+ return list(np.arange(rng[0], rng[1], 0.01))
54
+ return list(np.arange(rng[0], rng[1], rng[2]))
55
+
56
+ def run(self) -> dict:
57
+ for pa in self._expand(self.a):
58
+ for pb in self._expand(self.b):
59
+ for pc in self._expand(self.c):
60
+ logger.info(f"Submitting position ({pa}, {pb}, {pc})")
61
+ res = self.js.submit(
62
+ task=self.task,
63
+ input_sets=[(pa, pb, pc)],
64
+ files=[],
65
+ )
66
+ self.job_ids.extend(res['job_ids'])
67
+ self.job_names.extend(res['job_names'])
68
+ self.job_paths.extend(res['job_paths'])
69
+ return {
70
+ 'job_ids': self.job_ids,
71
+ 'job_names': self.job_names,
72
+ 'job_paths': self.job_paths,
73
+ }
@@ -0,0 +1,49 @@
1
+ import os
2
+ import sys
3
+ from ase2sprkkr import SPRKKR
4
+ import shutil
5
+ import glob
6
+ from FinalStateDamping.ImFinal import Fitkrasovskii_rpa
7
+ import logging
8
+
9
+ logging.basicConfig(level=logging.DEBUG)
10
+
11
+ #executable_suffix='9.2devnrep1')
12
+ ###################################
13
+ task = 'ARPES'
14
+ polarization = sys.argv[1]
15
+ i = int(sys.argv[2])
16
+ j = int(sys.argv[3])
17
+ l = int(sys.argv[4])
18
+ IM_FINAL_EV=Fitkrasovskii_rpa( int(sys.argv[2]))
19
+ print('IM_FINAL_EV',IM_FINAL_EV)
20
+ ####################################
21
+
22
+ calculator = SPRKKR(mpi=True,executable_suffix='9.2CMOM')
23
+ calculator.change_task('arpes')
24
+ calculator.input_parameters.CONTROL.PRINT = 0
25
+ calculator.input_parameters.CONTROL.NOHFF=True
26
+ calculator.input_parameters.CONTROL.KRMT=0
27
+ calculator.input_parameters.CONTROL.KRWS=1
28
+ calculator.input_parameters.TASK.STRVER = 0
29
+ calculator.input_parameters.TASK.IQ_AT_SURF=9
30
+ calculator.input_parameters.TAU.NKTAB=1000
31
+ calculator.input_parameters.ENERGY.IMV_FIN_EV=IM_FINAL_EV
32
+ calculator.input_parameters.ENERGY.IMV_INI_EV=0.03
33
+ calculator.input_parameters.ENERGY.EWORK_EV=4.5
34
+ calculator.input_parameters.ENERGY.EMINEV=-3.0
35
+ calculator.input_parameters.ENERGY.EMAXEV=-1.1
36
+ calculator.input_parameters.ENERGY.NE=100
37
+ calculator.input_parameters.SPEC_EL.KA = [2.0, 0.0]
38
+ calculator.input_parameters.SPEC_EL.K1 = [-4.0, 0.0]
39
+ calculator.input_parameters.SPEC_EL.NK1 = 100
40
+ calculator.input_parameters.SPEC_STR.N_LAYER = 20
41
+ calculator.input_parameters.SPEC_STR.NLAT_G_VEC=55
42
+ calculator.input_parameters.SPEC_PH.EPHOT = 0.0 + i
43
+ calculator.input_parameters.SPEC_PH.POL_P = polarization
44
+ calculator.input_parameters.SPEC_PH.PHI=0.0+j
45
+ calculator.input_parameters.SPEC_PH.THETA=45.0
46
+ calculator.input_parameters.SPEC_STR.SURF_BAR= [0.35,0.35]
47
+ #calculator.input_parameters.SPEC.FEGFINAL=True
48
+ calculator.input_parameters.SPEC_STR.TRANSP_BAR=True
49
+ calculator.calculate(potential='WSe2.pot_new')
@@ -0,0 +1,61 @@
1
+ from ase.spacegroup import crystal
2
+ import ase
3
+ from ase.io import write
4
+ from ase.visualize import view
5
+ from ase2sprkkr import SPRKKR
6
+ from ase2sprkkr.sprkkr.sprkkr_atoms import SPRKKRAtoms
7
+ import os
8
+ import sys
9
+ import shutil
10
+ from ase.build import bulk
11
+ from ase import Atoms
12
+ # Define lattice parameters and basis
13
+
14
+ a = float(sys.argv[1])
15
+ b = float(sys.argv[2])
16
+ c = float(sys.argv[3])
17
+
18
+ # Rest of your script...
19
+
20
+
21
+ cell=[]
22
+ cell=[a,b,c]
23
+ ## define the task for the submitter
24
+
25
+ task=sys.argv[4]
26
+
27
+
28
+ # Create crystal and determine the spacegroup
29
+ Ni2FeGa = crystal(symbols=['Ni','Ni','Fe','Ga'],
30
+ basis=[[0.25, 0.25, 0.25], [0.75, 0.75, 0.75],[0.5,0.5,0.5],[0.0,0.0,0.0]], spacegroup=225,
31
+ cellpar=[a, b, c, 90., 90., 90.],pbc=True,primitive_cell=True)
32
+ # Promote the ASE atoms to SPRKKRAtoms
33
+ Ni2FeGa = SPRKKRAtoms.promote_ase_atoms(Ni2FeGa)
34
+
35
+ # Options for input file
36
+ opts = {
37
+ 'CONTROL.KRMT': 4,
38
+ 'CONTROL.KRWS': 1,
39
+ 'ENERGY.EMIN':-0.52,
40
+ 'SITES.NL': 4,
41
+ 'MODE.LLOYD': True,
42
+ 'TAU.BZINT': 'POINTS',
43
+ 'TAU.NKTAB': 1000,
44
+ 'SCF.VXC': 'VWN',
45
+ 'SCF.NITER': 700,
46
+ 'SCF.MIX': 0.1,
47
+ 'SCF.TOL': 1E-5,
48
+ 'SCF.ISTBRY': 1,
49
+ }
50
+
51
+ # Write starting potential and input file
52
+ calculator = SPRKKR(atoms=Ni2FeGa)
53
+ #calculator.save_input(input_file='Ni2FeGa.inp', potential_file='Ni2FeGa.pot')
54
+
55
+ #perform scf calculations
56
+ out=calculator.calculate(mpi=['/opt/openmpi/bin/mpirun','-np','32'], options=opts)
57
+
58
+
59
+
60
+
61
+
@@ -0,0 +1,261 @@
1
+ """Sparkkflow command line interface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import shutil
8
+ import sys
9
+ import time
10
+ from datetime import datetime
11
+ from typing import Any
12
+
13
+ from . import config as cfg
14
+ from . import log as logmod
15
+ from . import ml
16
+ from . import monitor as mon
17
+ from .jobs import submitter
18
+ from .scans import scans
19
+
20
+ logger = logmod.get_logger(__name__)
21
+
22
+
23
+ def parser() -> argparse.ArgumentParser:
24
+ p = argparse.ArgumentParser(
25
+ prog='sparkkflow',
26
+ description="Sparkkflow — ARPES workflow with ML polarization calibration",
27
+ )
28
+ p.add_argument('--config', '-c', default='config.yaml')
29
+ p.add_argument('--debug', '-d', action='store_true')
30
+ p.add_argument('--scheduler', choices=['slurm', 'sge'])
31
+ p.add_argument('--oscar-db')
32
+
33
+ sub = p.add_subparsers(dest='command')
34
+
35
+ s = sub.add_parser('scan', help='Run ARPES scan')
36
+ s.add_argument('--energy-range', nargs=2, type=int, metavar=('START', 'END'))
37
+ s.add_argument('--energy-step', type=int, default=1)
38
+ s.add_argument('--polarization', nargs='+')
39
+ s.add_argument('--dicho', action='store_true')
40
+ s.add_argument('--theta-analyzer', type=float)
41
+ s.add_argument('--theta-photon', type=float)
42
+ s.add_argument('--rotate', type=float)
43
+ s.add_argument('--auto-ingest', action='store_true')
44
+ s.add_argument('--wait', action='store_true', help='Wait for completion and ingest')
45
+ s.add_argument('--no-array', action='store_true', help='Disable job array submission')
46
+ s.add_argument('--poll-interval', type=int, default=60, help='Seconds between polls when waiting')
47
+
48
+ g = sub.add_parser('generate', help='Generate ML training data')
49
+ g.add_argument('--energy-range', nargs=2, type=int, metavar=('START', 'END'))
50
+ g.add_argument('--fine-grid', action='store_true')
51
+ g.add_argument('--wait', action='store_true', help='Wait for completion and ingest')
52
+ g.add_argument('--no-array', action='store_true', help='Disable job array submission')
53
+
54
+ t = sub.add_parser('train', help='Train polarization model')
55
+ t.add_argument('--model-path', required=True)
56
+ t.add_argument('--query-filter')
57
+ t.add_argument('--limit', type=int)
58
+
59
+ pr = sub.add_parser('predict', help='Predict optimal polarization')
60
+ pr.add_argument('--model-path', required=True)
61
+ pr.add_argument('--energy', type=float, required=True)
62
+ pr.add_argument('--formula')
63
+ pr.add_argument('--task', default='ARPES')
64
+ pr.add_argument('--energy-tolerance', type=float, default=5.0)
65
+
66
+ m = sub.add_parser('monitor', help='Monitor job status')
67
+ m.add_argument('--job-ids', help='Comma-separated job IDs')
68
+ m.add_argument('--scripts', help='Comma-separated script paths (for resubmit)')
69
+ m.add_argument('--continuous', action='store_true')
70
+ m.add_argument('--interval', type=int, default=60)
71
+
72
+ i = sub.add_parser('ingest', help='Ingest completed jobs into OSCARpes')
73
+ i.add_argument('--dry-run', action='store_true', help='Show what would be ingested without doing it')
74
+
75
+ sub.add_parser('doctor', help='Check environment and tooling')
76
+
77
+ return p
78
+
79
+
80
+ def _sched_params(c: Any, kind: str) -> dict:
81
+ return dict(c.scheduler.sge if kind == 'sge' else c.scheduler.slurm)
82
+
83
+
84
+ def _runner(args, c: Any, ml_mode: bool = False) -> scans:
85
+ kind = args.scheduler or c.scheduler.default_type
86
+ return scans(
87
+ oscar_db_path=c.oscarpes.database_path if c.oscarpes.enabled else None,
88
+ scheduler_type=kind,
89
+ auto_ingest=getattr(args, 'auto_ingest', False) or c.oscarpes.auto_ingest,
90
+ ml_mode=ml_mode,
91
+ **_sched_params(c, kind),
92
+ )
93
+
94
+
95
+ def cmd_scan(args, c: Any) -> None:
96
+ logger.info("Starting ARPES scan")
97
+ runner = _runner(args, c)
98
+ r = runner.run(
99
+ energy_range=tuple(args.energy_range) if args.energy_range else None,
100
+ energy_step=args.energy_step,
101
+ dicho=args.dicho,
102
+ pol=args.polarization,
103
+ rotate=args.rotate,
104
+ theta_analyzer=args.theta_analyzer,
105
+ theta_photon=args.theta_photon,
106
+ wait=args.wait,
107
+ poll_interval=args.poll_interval,
108
+ )
109
+ logger.info(f"ARPES scan submitted: {len(r['job_ids'])} jobs")
110
+
111
+
112
+ def cmd_generate(args, c: Any) -> None:
113
+ logger.info("Generating ML training data")
114
+ runner = _runner(args, c, ml_mode=True)
115
+ r = runner.run(
116
+ energy_range=tuple(args.energy_range) if args.energy_range else None,
117
+ ml_mode=True,
118
+ wait=args.wait,
119
+ poll_interval=getattr(args, 'poll_interval', 60),
120
+ )
121
+ out = f"ml_dataset_{len(r['job_ids'])}_samples.json"
122
+ runner.export(out)
123
+ logger.info(f"Generated {len(r['job_ids'])} jobs; dataset → {out}")
124
+
125
+
126
+ def cmd_train(args, c: Any) -> None:
127
+ logger.info("Training polarization model")
128
+ qf = json.loads(args.query_filter) if args.query_filter else None
129
+ res = ml.train(config_path=args.config, query_filter=qf, save_path=args.model_path)
130
+ logger.info(f"Model saved to {args.model_path}")
131
+ logger.info(f"Best val loss: {res['best_val_loss']:.6f}")
132
+
133
+
134
+ def cmd_predict(args, c: Any) -> None:
135
+ logger.info("Predicting optimal polarization")
136
+ cond: dict = {
137
+ 'photon_energy': args.energy,
138
+ 'energy_tolerance': args.energy_tolerance,
139
+ 'task': args.task,
140
+ }
141
+ if args.formula:
142
+ cond['formula'] = args.formula
143
+ res = ml.predict(model_path=args.model_path, experimental_conditions=cond,
144
+ config_path=args.config)
145
+ if res.get('status') == 'success':
146
+ logger.info(f"Optimal polarization: {res['optimal_polarization']}")
147
+ for k in ('s1', 's2', 's3'):
148
+ logger.info(
149
+ f" {k.upper()}: {res['optimal_stokes'][k]:+.3f} "
150
+ f"± {res['uncertainty'][k]:.3f}"
151
+ )
152
+ logger.info(f"Based on {res['n_similar_calculations']} similar calculations")
153
+ else:
154
+ logger.error(f"Prediction failed: {res.get('message')}")
155
+
156
+
157
+ def cmd_monitor(args, c: Any) -> None:
158
+ if not args.job_ids:
159
+ logger.error("No job IDs provided")
160
+ return
161
+ job_ids = args.job_ids.split(',')
162
+ scripts = args.scripts.split(',') if args.scripts else []
163
+ if scripts and args.continuous:
164
+ mon.watch(job_ids, scripts, poll_interval=args.interval)
165
+ return
166
+ while True:
167
+ snap = mon.status(job_ids)
168
+ ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
169
+ logger.info(f"[{ts}] {snap}")
170
+ if not args.continuous:
171
+ return
172
+ time.sleep(args.interval)
173
+
174
+
175
+ def cmd_ingest(args, c: Any) -> None:
176
+ """Ingest completed jobs into OSCARpes."""
177
+ logger.info("Ingesting completed jobs into OSCARpes")
178
+ js = submitter(
179
+ scheduler_type=c.scheduler.default_type,
180
+ oscar_db_path=c.oscarpes.database_path if c.oscarpes.enabled else None,
181
+ auto_ingest=False,
182
+ )
183
+ ingested = js.ingest_completed(dry_run=args.dry_run)
184
+ if args.dry_run:
185
+ logger.info(f"[DRY-RUN] Would ingest {len(ingested)} jobs")
186
+ else:
187
+ logger.info(f"Successfully ingested {len(ingested)} jobs")
188
+
189
+
190
+ def cmd_doctor(args, c: Any) -> None:
191
+ """Check environment, scheduler binaries, and OSCARpes reachability."""
192
+ ok = True
193
+ for tool in ('qsub', 'qstat', 'qacct', 'sbatch', 'squeue'):
194
+ path = shutil.which(tool)
195
+ logger.info(f"{tool}: {'found at ' + path if path else 'NOT FOUND'}")
196
+ try:
197
+ import oscarpes # noqa: F401
198
+ logger.info("oscarpes: importable")
199
+ except ImportError:
200
+ logger.warning("oscarpes: not installed (ML ingest disabled)")
201
+ ok = False
202
+ try:
203
+ import torch # noqa: F401
204
+ logger.info("torch: importable")
205
+ except ImportError:
206
+ logger.warning("torch: not installed (ML pipeline disabled)")
207
+ ok = False
208
+ db = c.oscarpes.database_path
209
+ logger.info(f"OSCARpes db path: {db} (enabled={c.oscarpes.enabled})")
210
+ sys.exit(0 if ok else 1)
211
+
212
+
213
+ COMMANDS = {
214
+ 'scan': cmd_scan,
215
+ 'generate': cmd_generate,
216
+ 'train': cmd_train,
217
+ 'predict': cmd_predict,
218
+ 'monitor': cmd_monitor,
219
+ 'ingest': cmd_ingest,
220
+ 'doctor': cmd_doctor,
221
+ }
222
+
223
+
224
+ def main() -> None:
225
+ args = parser().parse_args()
226
+ if not args.command:
227
+ parser().print_help()
228
+ sys.exit(1)
229
+
230
+ mgr = cfg.get_config_manager(args.config)
231
+ c = mgr.config
232
+
233
+ if args.debug:
234
+ c.general['debug_mode'] = True
235
+ c.general['log_level'] = 'DEBUG'
236
+ if args.scheduler:
237
+ c.scheduler.default_type = args.scheduler
238
+ if args.oscar_db:
239
+ c.oscarpes.database_path = args.oscar_db
240
+
241
+ logmod.setup_logging({
242
+ 'log_level': c.general.get('log_level', 'INFO'),
243
+ 'log_dir': c.general.get('log_dir', 'logs'),
244
+ 'console_output': True,
245
+ 'file_output': True,
246
+ 'json_output': c.general.get('debug_mode', False),
247
+ })
248
+
249
+ logger.info(f"Sparkkflow command: {args.command}")
250
+ try:
251
+ COMMANDS[args.command](args, c)
252
+ except Exception as e:
253
+ logger.error(f"Command failed: {e}")
254
+ if c.general.get('debug_mode', False):
255
+ import traceback
256
+ traceback.print_exc()
257
+ sys.exit(1)
258
+
259
+
260
+ if __name__ == '__main__':
261
+ main()