PyPI - vyas-group-scripts - Versions diffs - 0.3.0__py3-none-any.whl - Mend

vyas-group-scripts 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

vyas_group_scripts/__init__.py +4 -0
vyas_group_scripts/gen_batch.py +161 -0
vyas_group_scripts/run_batch.py +35 -0
vyas_group_scripts/templates/g16_batch_inputs.py +180 -0
vyas_group_scripts/templates/orca_batch_inputs.py +315 -0
vyas_group_scripts-0.3.0.dist-info/METADATA +11 -0
vyas_group_scripts-0.3.0.dist-info/RECORD +9 -0
vyas_group_scripts-0.3.0.dist-info/WHEEL +4 -0
vyas_group_scripts-0.3.0.dist-info/entry_points.txt +4 -0

vyas_group_scripts/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from . import gen_batch
+from .templates import g16_batch_inputs
+from .templates import orca_batch_inputs
+from . import run_batch

vyas_group_scripts/gen_batch.py ADDED Viewed

@@ -0,0 +1,161 @@
+from glob import glob
+from .templates.g16_batch_inputs import simple_g16_template, complex_g16_template
+from .templates.orca_batch_inputs import orca_template, orca_restart_template
+import os
+import argparse
+def process_g16_files(file_pattern="*.gjf",complex = False ,time=os.environ.get("DEFAULT_DFT_TIME") ,num_nodes=os.environ.get("NUM_NODES") ,benchmarking=False):
+    benchmarking_line = ""
+    batch_file = simple_g16_template
+    if complex:
+        batch_file = complex_g16_template
+    if benchmarking:
+        benchmarking_line = "\n#SBATCH --exclusive"
+    for gjf_file in glob(file_pattern):
+        if ".gjf" !=  gjf_file[-4:]:
+            continue
+        bash_file_name = gjf_file[:-4]+".s"
+        print(bash_file_name)
+        if  os.path.exists(bash_file_name):
+            print(f"{bash_file_name} already exists, skipping.")
+            continue
+        this_file_string = batch_file.format(TIME=time,
+                                             ACTNUM=os.environ.get("ACTNUM"),
+                                             JOB_NAME=gjf_file[:-4],
+                                             TASKS_PER_NODE=1,
+                                             N_TASKS=num_nodes,
+                                             INPUT_FILE_NAME=gjf_file.split("/")[-1][:-4],
+                                             NUM_NODES=num_nodes,
+                                             BENCHMARKING_LINE = benchmarking_line)
+        with open(bash_file_name,"w") as bash_file:
+            bash_file.write(this_file_string)
+        apply_parallelization(gjf_file,num_nodes)
+def process_inp_files(file_pattern="*.inp",complex = False ,time=os.environ.get("DEFAULT_DFT_TIME") ,num_nodes=os.environ.get("NUM_NODES") ,benchmarking=False,restart=False):
+    # Select the bash template
+    batch_file = orca_template
+    # Set Benchmarking line
+    benchmarking_line = ""
+    if benchmarking:
+        benchmarking_line = "\n#SBATCH --exclusive"
+    # loop through input files
+    for inp_file in glob(file_pattern):
+        if ".inp" != inp_file[-4:]:
+            continue
+        bash_file_name = inp_file[:-4]+".s"
+        if  os.path.exists(bash_file_name):
+            print(f"{bash_file_name} already exists, skipping.")
+            continue
+        # Modify bash script
+        this_file_string = batch_file.format(TIME=time,
+                                             ACTNUM=os.environ.get("ACTNUM"),
+                                             JOB_NAME=inp_file[:-4],
+                                             TASKS_PER_NODE=1,
+                                             N_TASKS=num_nodes,
+                                             INPUT_FILE_NAME=inp_file.split("/")[-1][:-4],
+                                             NUM_NODES=num_nodes,
+                                             BENCHMARKING_LINE = benchmarking_line)
+        with open(bash_file_name,"w") as bash_file:
+            bash_file.write(this_file_string)
+        # Add parallelization to input file
+        apply_parallelization(inp_file,num_nodes)
+def apply_parallelization(input_file,nprocs = 36):
+    if ".gjf" == input_file[-4:]:
+        if nprocs != 0:
+            print(f"Adding parallelization to {input_file}.")
+            with open(input_file,"r") as file:
+                file_string = file.read()
+            file_string = f"%mem=5GB\n%nprocshared={nprocs}\n"+file_string
+            with open(input_file,"w") as file:
+                file.write(file_string)
+    if ".inp" ==  input_file[-4:]:
+        if nprocs != 0:
+            if not nprocs in [6,10,30,36]:
+                print("invalid nprocs selected.")
+                return None
+            memory = int(144000/nprocs)
+            file_string = f"%maxcore {memory}\n%pal nprocs {nprocs} end\n"
+            with open(input_file,"r") as file:
+                file_list = file.readlines()
+            lastinput_line = 1
+            for index,line in enumerate(file_list):
+                if len(line.strip())>0 and line.strip()[0] == "!":
+                    lastinput_line = index
+            file_list.insert(lastinput_line+1,file_string)
+            with open(input_file,"w") as file:
+                file.writelines(file_list)
+def gen_all_batch():
+    parser = argparse.ArgumentParser(prog="gab")
+    parser.add_argument("-t",default=os.environ.get("DEFAULT_DFT_TIME"),help="Set the time limit for the batch script (format: HH:MM:SS)." \
+    " If not specified, the default from the DFT_DEFAULT_TIME environment variable is used.")
+    parser.add_argument("-b",action="store_true",help="Enable benchmarking mode. This sets the node allocation" \
+    " to exclusive to ensure consistent performance for benchmarking runs.")
+    parser.add_argument("-k",default="*",help="Provide a comma seperated regex pattern to match input files. " \
+    "Only batch files for matching input files will be generated.\nExample: -k 'xyz,*mol' (quotes reccomended)")
+    parser.add_argument("-p",default=-1,type=int,choices=[0,6,10,30,36],help="Specify the number of processors to use"\
+    " Acceptable values 36, 30, 10, 6, or 0." \
+    " Memory sttings will be adjusted based on this value. Use 0 to skip adding parallelization directives entirely." \
+    " When the -p flag is not called the default options in DFT_DEFAULT_MEM and DFT_DEFAULT_PROCESSORS environment variables" \
+    " are used for all calculations.\n\tOrca jobs:\n\t\tInsert content after first '!' in input file.\n\tGaussian Jobs:\n\t\tAppends" \
+    " parralleliszation options to the top of input file\n\t\tOnly '36' and '0' options are available for gaussian jobs.")
+    parser.add_argument("-r",default="*",help="Restart ORCA jobs from one or more specified folders. Provide a comma-seperated list of" \
+    " folder names.\n\tExample: -r 'job1_TIMEOUT,job2_TIMEOUT")
+    parser.add_argument("-v",default=None,help="Specify the quantum chemistry software version to use." \
+    " Currently only downgrading to ORCA 5 is supported.")
+    parser.add_argument("-c",action="store_false",help="Uses a more simple .s file for gaussian jobs that does not create a bunch of subdirectories.")
+    args = parser.parse_args()
+    patterns_to_search = args.k.split(",")
+    complex_choice = args.c
+    time_choice = args.t
+    processors_choice = args.p
+    benchmarking_choice = args.b
+    orca_version = args.v
+    restarts = args.r
+    if processors_choice == -1:
+        processors_choice = os.environ.get("DEFAULT_DFT_NUM_NODES")
+    for file_pattern in patterns_to_search:
+        for file in glob(file_pattern):
+            process_g16_files(file,complex_choice,time_choice,processors_choice,benchmarking_choice)
+            process_inp_files(file,complex_choice,time_choice,processors_choice,benchmarking_choice)
+if __name__ == "__main__":
+    gen_all_batch()

vyas_group_scripts/run_batch.py ADDED Viewed

@@ -0,0 +1,35 @@
+import os
+from glob import glob
+from rich import print
+import argparse
+def check_file_has_pair(s_file:str):
+    print(f"Checking {s_file}")
+    if os.path.exists(s_file[:-1]+"inp"):
+        return True
+    if os.path.exists(s_file[:-1]+"gjf"):
+        return True
+    print(f"[red]Matching file not found for {s_file}")
+    return False
+def submit_job_pattern(sfile_pattern:str="*.s"):
+    for s_file in glob(sfile_pattern):
+        if check_file_has_pair(s_file):
+            os.system(f"sbatch {s_file}")
+def run_all_batch():
+    parser = argparse.ArgumentParser(prog="rab")
+    parser.add_argument("-k",default="*.s",help="provide a regesx pattern for .s files to submit")
+    args = parser.parse_args()
+    patterns_to_submit = args.k
+    submit_job_pattern(patterns_to_submit)
+if __name__ == "__main__":
+    run_all_batch()

vyas_group_scripts/templates/g16_batch_inputs.py ADDED Viewed

@@ -0,0 +1,180 @@
+##################################################
+# This a simple g16 batch file template python
+# Is responsible for feeding in the following
+# information:
+# JOB_NAME
+# ACTNUM
+# NUM_NODES
+# TASKS_PER_NODE
+# N_TASKS
+# INPUT_FILE_NAME
+# TIME
+##################################################
+simple_g16_template = """#!/bin/bash -x
+#SBATCH --job-name={JOB_NAME}
+#SBATCH --account={ACTNUM} # you can find the account number by running $ sacctmgr show Account
+#SBATCH --nodes={NUM_NODES}
+#SBATCH --ntasks-per-node={TASKS_PER_NODE}
+#SBATCH --ntasks={N_TASKS}
+#SBATCH --export=ALL
+#SBATCH --time={TIME} # time when job will automatically terminate- want the smallest possible overestimate{BENCHMARKING_LINE}
+export KMP_AFFINITY=respect,verbose
+module load apps/gaussian16/c01
+cd $SLURM_SUBMIT_DIR
+eval "$VGS_LOAD_G16"
+INPUT_FILE="{INPUT_FILE_NAME}"
+OUTPUT_FILE="${{INPUT_FILE: 0:-4}}.log"
+JOBID=`echo $SLURM_JOBID`
+#export OMP_NUM_THREADS=1
+#export GAUSS_EXEDIR=/opt/g09/g09
+echo "Running Job"
+g16 <"${{INPUT_FILE}}">"${{OUTPUT_FILE}}"
+echo "job has finished" """
+##################################################
+# This a more complex g16 batch file template python
+# Is responsible for feeding in the following
+# information:
+# JOB_NAME
+# ACTNUM
+# NUM_NODES
+# TASKS_PER_NODE
+# N_TASKS
+# INPUT_FILE_NAME
+# TIME
+##################################################
+complex_g16_template = """#!/bin/bash -x
+# --------------------------------------------------------------------------------------------------------------
+#
+# This script will submit the input file specified to Gaussian 16 via the slurm scheduler. The script will
+# automatically create a new output directory with the same title as the input file and send all files there.
+# Script timing will be recorded in the slurm.out file which will be copied to the output directory only when
+# the job has completed. Email status updates will be sent if configured in the config.toml.
+#
+# --------------------------------------------------------------------------------------------------------------
+# must set the account number as an env variable manually - gab will automatically use this
+#SBATCH --account={ACTNUM} # you can find the account number by running $ sacctmgr show Account
+#SBATCH --nodes={NUM_NODES}
+#SBATCH --ntasks-per-node={TASKS_PER_NODE}
+#SBATCH --ntasks={N_TASKS}
+#SBATCH --export=ALL
+#SBATCH --time={TIME} # time when job will automatically terminate- want the smallest possible overestimate{BENCHMARKING_LINE}
+maxtime=3590 # buffer time to allow cleanup, should be ~10 seconds less than auto termination time
+# --------------------------------------------------------------------------------------------------------------
+INPUT_FILE="{INPUT_FILE_NAME}"
+# --------------------------------------------------------------------------------------------------------------
+# record submission time
+mystart=$(date +%Y-%m-%d %H:%M:%S)
+# make a new output folder with the job name as the title and direct the output there
+OUTPUT_DIR="${{SLURM_SUBMIT_DIR}}/${{SLURM_JOB_NAME: 0:-2}}"
+mkdir -p "${{OUTPUT_DIR}}_IN-PROGRESS"
+# copy the input file to the output folder and delete the copy in the submit directory
+cp "${{SLURM_SUBMIT_DIR}}/${{INPUT_FILE}}" "${{OUTPUT_DIR}}_IN-PROGRESS/${{INPUT_FILE}}"
+rm "${{SLURM_SUBMIT_DIR}}/${{INPUT_FILE}}"
+# copy the input script to the output folder and delete the copy in the submit directory
+cp "${{SLURM_SUBMIT_DIR}}/${{SLURM_JOB_NAME}}" "${{OUTPUT_DIR}}_IN-PROGRESS/${{SLURM_JOB_NAME}}"
+rm "${{SLURM_SUBMIT_DIR}}/${{SLURM_JOB_NAME}}"
+# configure KMP_AFFINITY to communicate hardware threads to OMP parallelizer
+export KMP_AFFINITY=respect,verbose
+# load the gaussian module and print JOBID to slurm out for debugging
+eval "$VGS_LOAD_G16"
+JOBID=`echo $SLURM_JOBID`
+# configure and pass OMP parameters
+#export OMP_NUM_THREADS=1
+#export GAUSS_EXEDIR=/opt/g09/g09
+# go the output folder
+cd "${{OUTPUT_DIR}}_IN-PROGRESS"
+# run the input file and generate an output file of the same name
+# if the timeout is reached it will return exit 124, otherwise it returns the calc exit status
+start=`date +%s.%N`
+OUTPUT_FILE="${{INPUT_FILE: 0:-4}}.log"
+timeout -s SIGTERM $maxtime g16 <"${{OUTPUT_DIR}}_IN-PROGRESS/${{INPUT_FILE}}">"${{OUTPUT_FILE}}"
+CALC_STATUS=$?
+end=`date +%s.%N`
+# get the job status
+if [[ $CALC_STATUS == 124 ]]; then
+	status="TIMEOUT"
+elif [[ $CALC_STATUS != 0 ]]; then
+	status="ERROR"
+elif [[ $CALC_STATUS == 0 ]]; then
+	status="NORMAL"
+fi
+# log the time for benchmarking in the outputfile
+runtime=$( echo "$end - $start" | bc -l )
+echo $runtime
+exec 3>>"${{OUTPUT_FILE}}"
+echo "">&3
+echo "slurmID:    ${{SLURM_JOBID}}">&3
+echo "totalRuntime[s]:    ${{runtime}}">&3
+exec 3>&-
+# get the number of basis functions used in the first calculation
+myBasis=$(grep -o -m 1 '[0-9]\+ basis functions' myFile | cut -d ' ' -f 1)
+myBasis=($myBasis)
+# record the completion time
+myend=$(date +%Y-%m-%d %H:%M:%S)
+# write the total job timing to the job_timings file in the submit directory as a CSV
+cd "${{SLURM_SUBMIT_DIR}}"
+if [ ! -f job_timings.csv ]; then
+	echo "filename,slurmID,nbasisfuncs,start,end,runtime[s],jobstatus" > job_timings.csv
+fi
+exec 3>>job_timings.csv
+echo "${{INPUT_FILE}},${{SLURM_JOBID}},${{myBasis}},${{mystart}},${{myend}},${{runtime}},${{status}}">&3
+exec 3>&-
+# copy the slurm output to the output folder and delete from the submit directory
+cp "${{SLURM_SUBMIT_DIR}}/slurm-${{SLURM_JOBID}}.out" "${{OUTPUT_DIR}}_IN-PROGRESS/slurm-${{SLURM_JOBID}}-${{INPUT_FILE: 0:-4}}.out"
+rm "${{SLURM_SUBMIT_DIR}}/slurm-${{SLURM_JOBID}}.out"
+# rename the output directory appropriately
+if [[ $CALC_STATUS == 124 ]]; then
+	mv "${{OUTPUT_DIR}}_IN-PROGRESS" "${{OUTPUT_DIR}}_TIMEOUT"
+	exit 124
+elif [[ $CALC_STATUS != 0 ]]; then
+	mv "${{OUTPUT_DIR}}_IN-PROGRESS" "${{OUTPUT_DIR}}_ERROR"
+	exit 2
+elif [[ $CALC_STATUS == 0 ]]; then
+	mv "${{OUTPUT_DIR}}_IN-PROGRESS" "${{OUTPUT_DIR}}"
+	exit 0
+fi"""

vyas_group_scripts/templates/orca_batch_inputs.py ADDED Viewed

@@ -0,0 +1,315 @@
+orca_template = """#!/bin/bash -x
+# --------------------------------------------------------------------------------------------------------------
+#
+# This script will submit the input file specified to ORCA via the slurm scheduler. The script will automatically
+# create a new output directory with the same title as the input file and send all files there. Script timing
+# will be recorded in the slurm.out file which will be copied to the output directory only when the job has
+# completed. Email status updates will be sent if configured in the config.toml.
+#
+# --------------------------------------------------------------------------------------------------------------
+# must set the account number as an env variable manually - gab will automatically use this
+#SBATCH --account={ACTNUM} # you can find the account number by running $ sacctmgr show Account
+#SBATCH --nodes={NUM_NODES}
+#SBATCH --ntasks-per-node={TASKS_PER_NODE}
+#SBATCH --ntasks={N_TASKS}
+#SBATCH --export=ALL
+#SBATCH --time={TIME} # time when job will automatically terminate- want the smallest possible overestimate{BENCHMARKING_LINE}
+maxtime=3590 # buffer time to allow cleanup, should be ~10 seconds less than auto termination time
+# --------------------------------------------------------------------------------------------------------------
+INPUT_FILE="{INPUT_FILE_NAME}"
+# --------------------------------------------------------------------------------------------------------------
+ml purge
+# record submission time
+mystart=$(date +%Y-%m-%d %H:%M:%S)
+# make a new output folder with the job name as the title and direct the output there
+OUTPUT_DIR="${{SLURM_SUBMIT_DIR}}/${{SLURM_JOB_NAME: 0:-2}}"
+mkdir -p "${{OUTPUT_DIR}}_IN-PROGRESS"
+# copy the input orca file to the output folder and delete the copy in the submit directory
+cp "${{SLURM_SUBMIT_DIR}}/${{INPUT_FILE}}" "${{OUTPUT_DIR}}_IN-PROGRESS/${{INPUT_FILE}}"
+rm "${{SLURM_SUBMIT_DIR}}/${{INPUT_FILE}}"
+# copy the input xyz file to the output folder and delete the copy in the submit directory
+cp "${{SLURM_SUBMIT_DIR}}/${{SLURM_JOB_NAME: 0:-2}}.xyz" "${{OUTPUT_DIR}}_IN-PROGRESS/${{SLURM_JOB_NAME: 0:-2}}.xyz" || echo "NO XYZ INPUT FOR THIS JOB"
+rm "${{SLURM_SUBMIT_DIR}}/${{SLURM_JOB_NAME: 0:-2}}.xyz" || echo "NO XYZ FILE TO REMOVE"
+# copy the input script to the output folder and delete the copy in the submit directory
+cp "${{SLURM_SUBMIT_DIR}}/${{SLURM_JOB_NAME}}" "${{OUTPUT_DIR}}_IN-PROGRESS/${{SLURM_JOB_NAME}}"
+rm "${{SLURM_SUBMIT_DIR}}/${{SLURM_JOB_NAME}}"
+# load and export NBO path
+eval "$VGS_LOAD_NBO"
+# load ORCA path
+eval "$VGS_LOAD_ORCA"
+# load the mpi module and print JOBID to slurm out for debugging
+eval "$VGS_LOAD_MPI"
+JOBID=`echo $SLURM_JOBID`
+# configure and pass OMP parameters
+#export OMP_NUM_THREADS=1
+# go the output folder
+cd "${{OUTPUT_DIR}}_IN-PROGRESS"
+#################################################################################################
+# run the input file and generate an output file of the same name
+# if the timeout is reached it will return exit 124, otherwise it returns the calc exit status
+start=`date +%s.%N`
+OUTPUT_FILE="${{INPUT_FILE: 0:-4}}.out"
+timeout -s SIGTERM $maxtime $VGS_ORCA_PATH "${{OUTPUT_DIR}}_IN-PROGRESS/${{INPUT_FILE}}">"${{OUTPUT_FILE}}"
+CALC_STATUS=$?
+end=`date +%s.%N`
+#################################################################################################
+# try to generate the orbital *.cube files and *.html files if the ORCA job was successful
+# also try to extract the NBO output (not always called, will pass if no NBO)
+if [[ $CALC_STATUS == 0 ]]; then
+	cubegen="${{VGSPATH}}/orca_orbital_cubegen.s"
+	chmod +x "${{cubegen}}"
+	"${{cubegen}}" -f "${{INPUT_FILE: 0:-4}}.gbw" || echo "MO VISUALIZATION FAILED"
+	sed -n '/Now starting NBO\.\.\./,/returned from  NBO  program/p' "${{OUTPUT_FILE}}" | tail -n +2 | head -n -1 > "${{INPUT_FILE: 0:-4}}.nbout" || echo "NBO SCRAPING FAILED"
+fi
+# get the job status
+# get the job status
+if [[ $CALC_STATUS == 124 ]]; then
+    status="TIMEOUT"
+elif [[ $CALC_STATUS != 0 ]]; then
+    status="ERROR"
+elif [[ $CALC_STATUS == 0 ]]; then
+    # check if the words "ORCA TERMINATED NORMALLY" appear in the last 10 lines of the output file
+    if tail -n 10 "$OUTPUT_FILE" | grep -q "****ORCA TERMINATED NORMALLY****"; then
+        status="NORMAL"
+    else
+        CALC_STATUS=2
+        status="INCOMPLETE"
+    fi
+fi
+# log the time for benchmarking in the outputfile
+runtime=$( echo "$end - $start" | bc -l )
+echo $runtime
+exec 3>>"${{OUTPUT_FILE}}"
+echo "">&3
+echo "slurmID:    ${{SLURM_JOBID}}">&3
+echo "totalRuntime[s]:    ${{runtime}}">&3
+exec 3>&-
+# get the number of basis functions used in the first calculation by the SHARK package
+myBasis=$(grep -m 1 "Number of basis functions" "${{OUTPUT_FILE}}" | awk '{{print $NF}}' | tr -d '[:space:]')
+myBasis=($myBasis)
+# record the completion time
+myend=$(date +%Y-%m-%d %H:%M:%S)
+# write the total job timing to the job_timings file in the submit directory as a CSV
+cd "${{SLURM_SUBMIT_DIR}}"
+if [ ! -f job_timings.csv ]; then
+	echo "filename,slurmID,nbasisfuncs,start,end,runtime[s],jobstatus" > job_timings.csv
+fi
+exec 3>>job_timings.csv
+echo "${{INPUT_FILE}},${{SLURM_JOBID}},${{myBasis}},${{mystart}},${{myend}},${{runtime}},${{status}}">&3
+exec 3>&-
+# copy the slurm output to the output folder and delete from the submit directory
+cp "${{SLURM_SUBMIT_DIR}}/slurm-${{SLURM_JOBID}}.out" "${{OUTPUT_DIR}}_IN-PROGRESS/slurm-${{SLURM_JOBID}}-${{INPUT_FILE: 0:-4}}.out"
+rm "${{SLURM_SUBMIT_DIR}}/slurm-${{SLURM_JOBID}}.out"
+# rename the output directory appropriately
+if [[ $status == "TIMEOUT" ]]; then
+	mv "${{OUTPUT_DIR}}_IN-PROGRESS" "${{OUTPUT_DIR}}_TIMEOUT"
+	exit 124
+elif [[ $status == "ERROR" ]]; then
+	mv "${{OUTPUT_DIR}}_IN-PROGRESS" "${{OUTPUT_DIR}}_ERROR"
+	exit 2
+elif [[ $status == "INCOMPLETE" ]]; then
+	mv "${{OUTPUT_DIR}}_IN-PROGRESS" "${{OUTPUT_DIR}}_INCOMPLETE"
+	exit 2
+elif [[ $status == "NORMAL" ]]; then
+	mv "${{OUTPUT_DIR}}_IN-PROGRESS" "${{OUTPUT_DIR}}"
+	cd "${{OUTPUT_DIR}}"
+	# cleanup empty files
+	if [[ -e "2" && ! -s "2" ]]; then
+		rm "2"
+	fi
+	for nbout_file in *.nbout; do
+		if [[ -e "$nbout_file" && ! -s "$nbout_file" ]]; then
+			rm "${{nbout_file}}"
+		fi
+	done
+	cd ..
+	exit 0
+fi"""
+orca_restart_template = """#!/bin/bash -x
+# --------------------------------------------------------------------------------------------------------------
+#
+# This script will re-submit the input file specified to ORCA via the slurm scheduler. This script automatically
+# edits the last input file to ensure that the previous geometry and wavefunction is read. All output will be
+# generated in the same directory as the original calculation that timed out. Script timing will be recorded in
+# the slurm.out file which will be copied to the original output directory only when the job has completed. Email
+# status updates will be sent if configured in the config.toml.
+#
+# --------------------------------------------------------------------------------------------------------------
+# must set the account number as an env variable manually - gab will automatically use this
+#SBATCH --account={ACTNUM} # you can find the account number by running $ sacctmgr show Account
+#SBATCH --nodes={NUM_NODES}
+#SBATCH --ntasks-per-node={TASKS_PER_NODE}
+#SBATCH --ntasks={N_TASKS}
+#SBATCH --export=ALL
+#SBATCH --time={TIME} # time when job will automatically terminate- want the smallest possible overestimate{BENCHMARKING_LINE}
+maxtime=3590 # buffer time to allow cleanup, should be ~10 seconds less than auto termination time
+# --------------------------------------------------------------------------------------------------------------
+INPUT_FILE="{INPUT_FILE_NAME}"
+OLD_JOB_DIR="my_old_job"
+# --------------------------------------------------------------------------------------------------------------
+# record submission time
+mystart=$(date +%Y-%m-%d %H:%M:%S)
+# make a new output folder with the job name as the title and direct the output there
+OUTPUT_DIR="${{SLURM_SUBMIT_DIR}}/${{OLD_JOB_DIR}}"
+mv "${{OUTPUT_DIR}}" "${{OUTPUT_DIR}}_RESTARTED"
+# copy the input script to the output folder and delete the copy in the submit directory
+cp "${{SLURM_SUBMIT_DIR}}/${{SLURM_JOB_NAME}}" "${{OUTPUT_DIR}}_RESTARTED/${{SLURM_JOB_NAME}}"
+rm "${{SLURM_SUBMIT_DIR}}/${{SLURM_JOB_NAME}}"
+cp "${{SLURM_SUBMIT_DIR}}/${{INPUT_FILE}}" "${{OUTPUT_DIR}}_RESTARTED/${{INPUT_FILE}}"
+rm "${{SLURM_SUBMIT_DIR}}/${{INPUT_FILE}}"
+# load and export NBO path
+eval "$VGS_LOAD_NBO"
+# load the mpi module and print JOBID to slurm out for debugging
+eval "$VGS_LOAD_MPI"
+JOBID=`echo $SLURM_JOBID`
+# go the output folder
+cd "${{OUTPUT_DIR}}_RESTARTED"
+#################################################################################################
+# run the input file and generate an output file of the same name
+# if the timeout is reached it will return exit 124, otherwise it returns the calc exit status
+start=`date +%s.%N`
+OUTPUT_FILE="${{INPUT_FILE: 0:-4}}.out"
+timeout -s SIGTERM $maxtime $SS_ORCA_PATH "${{OUTPUT_DIR}}_RESTARTED/${{INPUT_FILE}}">"${{OUTPUT_FILE}}"
+CALC_STATUS=$?
+end=`date +%s.%N`
+#################################################################################################
+# try to generate the orbital *.cube files and *.html files if the ORCA job was successful
+# also try to extract the NBO output (not always called, will pass if no NBO)
+if [[ $CALC_STATUS == 0 ]]; then
+	cubegen="${{VGSPATH}}/orca_orbital_cubegen.s"
+	chmod +x "${{cubegen}}"
+	"${{cubegen}}" -f "${{INPUT_FILE: 0:-4}}.gbw" || echo "MO VISUALIZATION FAILED"
+	sed -n '/Now starting NBO\.\.\./,/returned from  NBO  program/p' "${{OUTPUT_FILE}}" | tail -n +2 | head -n -1 > "${{INPUT_FILE: 0:-4}}.nbout" || echo "NBO SCRAPING FAILED"
+fi
+# get the job status
+if [[ $CALC_STATUS == 124 ]]; then
+	status="TIMEOUT"
+elif [[ $CALC_STATUS != 0 ]]; then
+	status="ERROR"
+elif [[ $CALC_STATUS == 0 ]]; then
+	status="NORMAL"
+fi
+# log the time for benchmarking in the outputfile
+runtime=$( echo "$end - $start" | bc -l )
+echo $runtime
+exec 3>>"${{OUTPUT_FILE}}"
+echo "">&3
+echo "slurmID:    ${{SLURM_JOBID}}">&3
+echo "totalRuntime[s]:    ${{runtime}}">&3
+exec 3>&-
+# get the number of basis functions used in the first calculation by the SHARK package
+myBasis=$(grep -m 1 "Number of basis functions" "${{OUTPUT_FILE}}" | awk '{{print $NF}}' | tr -d '[:space:]')
+myBasis=($myBasis)
+# record the completion time
+myend=$(date +%Y-%m-%d %H:%M:%S)
+# write the total job timing to the job_timings file in the submit directory as a CSV
+cd "${{SLURM_SUBMIT_DIR}}"
+if [ ! -f job_timings.csv ]; then
+	echo "filename,slurmID,nbasisfuncs,start,end,runtime[s],jobstatus" > job_timings.csv
+fi
+exec 3>>job_timings.csv
+echo "${{INPUT_FILE}},${{SLURM_JOBID}},${{myBasis}},${{mystart}},${{myend}},${{runtime}},${{status}}">&3
+exec 3>&-
+# copy the slurm output to the output folder and delete from the submit directory
+cp "${{SLURM_SUBMIT_DIR}}/slurm-${{SLURM_JOBID}}.out" "${{OUTPUT_DIR}}_RESTARTED/slurm-${{SLURM_JOBID}}-${{INPUT_FILE: 0:-4}}.out"
+rm "${{SLURM_SUBMIT_DIR}}/slurm-${{SLURM_JOBID}}.out"
+# rename the output directory appropriately
+if [[ $CALC_STATUS == 124 ]]; then
+	mv "${{OUTPUT_DIR}}_RESTARTED" "${{OUTPUT_DIR}}_TIMEOUT"
+	exit 124
+elif [[ $CALC_STATUS != 0 ]]; then
+	mv "${{OUTPUT_DIR}}_RESTARTED" "${{OUTPUT_DIR}}_ERROR"
+	exit 2
+elif [[ $CALC_STATUS == 0 ]]; then
+	FINAL_OUTPUT="${{OUTPUT_DIR%_TIMEOUT}}"
+	mv "${{OUTPUT_DIR}}_RESTARTED" "${{FINAL_OUTPUT}}"
+	cd "${{OUTPUT_DIR}}"
+	# cleanup empty files
+	if [[ -e "2" && ! -s "2" ]]; then
+		rm "2"
+	fi
+	for nbout_file in *.nbout; do
+		if [[ -e "$nbout_file" && ! -s "$nbout_file" ]]; then
+			rm "${{nbout_file}}"
+		fi
+	done
+	cd ..
+	exit 0
+fi"""

vyas_group_scripts-0.3.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,11 @@
+Metadata-Version: 2.3
+Name: vyas-group-scripts
+Version: 0.3.0
+Summary: Add your description here
+Author: Ben Payton
+Author-email: Ben Payton <bennyp2494@gmail.com>
+Requires-Dist: argparse>=1.4.0
+Requires-Dist: rich>=14.2.0
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown

vyas_group_scripts-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+vyas_group_scripts/__init__.py,sha256=PZJwQzDvN8PbfBDMCXOb0ciJKMir9J35nU1EXRqgARg,131
+vyas_group_scripts/gen_batch.py,sha256=WHaw2DsC0gq9FSrkOpnCvwX8LqC-3bHJ-9x1OaGGhOc,7025
+vyas_group_scripts/run_batch.py,sha256=RUqEM2UATNq1KOorfszeCfNcdABhwiSlQf6MHnXdENU,881
+vyas_group_scripts/templates/g16_batch_inputs.py,sha256=3BbLCZaPOeiivJJlLNc5lL-8bRDcOkAyGIuChqsdUEY,6138
+vyas_group_scripts/templates/orca_batch_inputs.py,sha256=moYHIJmhX8NRH5J6NK_TcGLSh_nzWoQVQ1HiDA25yfg,12087
+vyas_group_scripts-0.3.0.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
+vyas_group_scripts-0.3.0.dist-info/entry_points.txt,sha256=7cL-_aZU96ATik2TKSKoUHO0Ar4b7Rhoa3NqoTG8Ki0,121
+vyas_group_scripts-0.3.0.dist-info/METADATA,sha256=1qBK7c-hiHuDf2yQdH6-dzDP_Ts_1kIj6niDoX8TTcY,287
+vyas_group_scripts-0.3.0.dist-info/RECORD,,

vyas_group_scripts-0.3.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: uv 0.8.24
+Root-Is-Purelib: true
+Tag: py3-none-any

vyas_group_scripts-0.3.0.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,4 @@
+[console_scripts]
+pygab = vyas_group_scripts.gen_batch:gen_all_batch
+pyrab = vyas_group_scripts.run_batch:run_all_batch