Skip to content

Commit 5f0cfee

Browse files
authored
Update checks for MOM6 restarts when performing a re-run on failure (#4179)
Resolves #3822 employ additional checks on MOM6 state when restarting upon re-run for robustness. This check is skipped when `FHOUT_OCN > 6` such as in the `SFS` runs where `FHOUT_OCN=24`. This causes problems noted in the issue comment [here](#3822 (comment)) This also: - has a minor fix in `ush/preamble.sh` that prints the `Begin` date time in human readable format similar to `End` date time message.
1 parent 769e3ba commit 5f0cfee

File tree

2 files changed

+33
-12
lines changed

2 files changed

+33
-12
lines changed

ush/forecast_det.sh

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ UFS_det(){
2626
IAU_OFFSET=0
2727
model_start_date_current_cycle=${current_cycle}
2828

29-
DO_LAND_IAU=".false."
29+
DO_LAND_IAU=".false."
3030

3131
# It is still possible that a restart is available from a previous forecast attempt
3232
# So we have to continue checking for restarts
@@ -35,7 +35,7 @@ UFS_det(){
3535
# Lets assume this is was not run before and hence this is not a RERUN
3636
RERUN="NO"
3737

38-
# RERUN is only available for RUN=gfs|gefs It is not available for RUN=gdas|enkfgdas|enkfgfs
38+
# RERUN is only available for RUN=gfs|gefs. It is not available for RUN=gdas|enkfgdas|enkfgfs
3939
if [[ "${RUN}" =~ "gdas" ]] || [[ "${RUN}" == "enkfgfs" ]]; then
4040
echo "RERUN is not available for RUN='${RUN}'"
4141
return 0
@@ -46,15 +46,19 @@ UFS_det(){
4646
# shellcheck disable=SC2312
4747
mapfile -t file_array < <(find "${DATArestart}/FV3_RESTART" -name "????????.??0000.coupler.res" | sort)
4848
nrestarts=${#file_array[@]}
49-
if (( nrestarts == 0 )); then
49+
if [[ ${nrestarts} -eq 0 ]]; then
5050
echo "No restarts found in '${DATArestart}/FV3_RESTART', RERUN='${RERUN}'"
5151
return 0
52+
else
53+
echo "Found ${nrestarts} restarts in '${DATArestart}/FV3_RESTART' to check for RERUN"
54+
ls -1 "${DATArestart}/FV3_RESTART/"????????.??0000.coupler.res
5255
fi
5356

5457
# Look in reverse order of file_array to determine available restart times
5558
local ii filepath filename
5659
local rdate seconds
5760
local fv3_rst_ok cmeps_rst_ok mom6_rst_ok cice6_rst_ok ww3_rst_ok
61+
local hdate hdatep1 fhout_ocn_by_2
5862
for (( ii=nrestarts-1; ii>=0; ii-- )); do
5963

6064
filepath="${file_array[ii]}"
@@ -80,9 +84,26 @@ UFS_det(){
8084
if [[ ! -f "${DATArestart}/CMEPS_RESTART/ufs.cpld.cpl.r.${rdate:0:4}-${rdate:4:2}-${rdate:6:2}-${seconds}.nc" ]]; then
8185
cmeps_rst_ok="NO"
8286
fi
83-
if [[ ! -f "${DATArestart}/MOM6_RESTART/${rdate:0:8}.${rdate:8:2}0000.MOM.res.nc" ]]; then
8487
# TODO: add checks for other MOM6 restarts as well
88+
if [[ ! -f "${DATArestart}/MOM6_RESTART/${rdate:0:8}.${rdate:8:2}0000.MOM.res.nc" ]]; then
8589
mom6_rst_ok="NO"
90+
else
91+
# Also check for MOM6 history file availability
92+
# TODO: SFS runs with 24-hr averaging of ocean output, which causes issues with restart checks,
93+
# TODO: so we will skip them for now, and revisit this logic later
94+
if [[ ${FHOUT_OCN} -le 6 ]]; then
95+
fhout_ocn_by_2=$((FHOUT_OCN / 2))
96+
hdate=$(date -u -d "${rdate:0:8} ${rdate:8:2} + ${fhout_ocn_by_2} hours" +"%Y%m%d%H")
97+
if [[ ! -f "${DATAoutput}/MOM6_OUTPUT/ocn_${hdate:0:4}_${hdate:4:2}_${hdate:6:2}_${hdate:8:2}.nc" ]]; then
98+
mom6_rst_ok="NO"
99+
else
100+
# Also check for the next MOM6 history file (hdate + FHOUT_OCN hours)
101+
hdatep1=$(date -u -d "${hdate:0:8} ${hdate:8:2} + ${FHOUT_OCN} hours" +"%Y%m%d%H")
102+
if [[ ! -f "${DATAoutput}/MOM6_OUTPUT/ocn_${hdatep1:0:4}_${hdatep1:4:2}_${hdatep1:6:2}_${hdatep1:8:2}.nc" ]]; then
103+
mom6_rst_ok="NO"
104+
fi
105+
fi
106+
fi
86107
fi
87108
MOM6_RESTART_SETTING='r'
88109
MOM6_INIT_FROM_Z=True

ush/preamble.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#! /usr/bin/env bash
22

33
#######
4-
# Preamble script to be SOURCED at the beginning of every script. Sets
5-
# useful PS4 and optionally turns on set -x and set -eu. Also sets up
4+
# Preamble script to be SOURCED at the beginning of every script. Sets
5+
# useful PS4 and optionally turns on set -x and set -eu. Also sets up
66
# crude script timing and provides a postamble that runs on exit.
77
#
88
# Syntax:
99
# preamble.sh
10-
#
10+
#
1111
# Input environment variables:
1212
# TRACE (YES/NO): Whether to echo every command (set -x) [default: "YES"]
1313
# STRICT (YES/NO): Whether to exit immediately on error or undefined variable
@@ -26,7 +26,7 @@ start_time=$(date +%s)
2626
_calling_script=${_calling_script:-$(basename "${BASH_SOURCE[1]}")}
2727

2828
# Announce the script has begun
29-
start_time_human=$(date -d"@${start_time}" -u)
29+
start_time_human=$(date -d"@${start_time}" -u +%H:%M:%S)
3030
echo "Begin ${_calling_script} at ${start_time_human}"
3131

3232
declare -x PS4='+ $(basename ${BASH_SOURCE[0]:-${FUNCNAME[0]:-"Unknown"}})[${LINENO}]'
@@ -39,16 +39,16 @@ set_strict() {
3939
}
4040

4141
set_trace() {
42-
# Print the script name and line number of each command as it is
43-
# executed when using trace.
42+
# Print the script name and line number of each command as it is
43+
# executed when using trace.
4444
if [[ ${TRACE:-"YES"} == "YES" ]]; then
4545
set -x
4646
fi
4747
}
4848

4949
postamble() {
5050
#
51-
# Commands to execute when a script ends.
51+
# Commands to execute when a script ends.
5252
#
5353
# Syntax:
5454
# postamble script start_time rc
@@ -98,7 +98,7 @@ function err_exit() {
9898
# Taken from NCO prod_util v2.1.0
9999
# SCRIPT NAME: err_exit
100100
#
101-
# ABSTRACT: This script is to be used when a fatal error or condition
101+
# ABSTRACT: This script is to be used when a fatal error or condition
102102
# has been reached and you want to terminate the job.
103103
#
104104
# USAGE: To use this script one must export the following variables to the

0 commit comments

Comments
 (0)