/usr/bin/dmtcp_rm_loclaunch is in dmtcp 2.3.1-6.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | #!/bin/bash
# ****************************************************************************
# *  Copyright (C) 2012-2014 by Artem Y. Polyakov <artpol84@gmail.com>       *
# *                                                                          *
# *  This file is part of the RM plugin for DMTCP                            *
# *                                                                          *
# *  RM plugin is free software: you can redistribute it and/or              *
# *  modify it under the terms of the GNU Lesser General Public License as   *
# *  published by the Free Software Foundation, either version 3 of the      *
# *  License, or (at your option) any later version.                         *
# *                                                                          *
# *  RM plugin is distributed in the hope that it will be useful,            *
# *  but WITHOUT ANY WARRANTY; without even the implied warranty of          *
# *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
# *  GNU Lesser General Public License for more details.                     *
# *                                                                          *
# *  You should have received a copy of the GNU Lesser General Public        *
# *  License along with DMTCP:dmtcp/src.  If not, see                        *
# *  <http://www.gnu.org/licenses/>.                                         *
# ****************************************************************************/
prepare_SLURM_env()
{
  LOCAL_FILES="$1"
  
  # Create temp directory if need
  DMTCP_TMPDIR=$TMPDIR/dmtcp-`whoami`@`hostname`
  if [ ! -d "$DMTCP_TMPDIR" ]; then
    mkdir -p $DMTCP_TMPDIR
  fi
  
  # Create files with SLURM environment
  for CKPT_FILE in $LOCAL_FILES; do
    SUFFIX=${CKPT_FILE%%.dmtcp}
    SLURM_ENV_FILE=$DMTCP_TMPDIR/slurm_env_${SUFFIX##*_}
    echo "SLURM_SRUN_COMM_HOST=$SLURM_SRUN_COMM_HOST" > $SLURM_ENV_FILE
    echo "SLURM_SRUN_COMM_PORT=$SLURM_SRUN_COMM_PORT" >> $SLURM_ENV_FILE
    echo "SLURMTMPDIR=$SLURMTMPDIR" >> $SLURM_ENV_FILE
  done
}
if [ -n "$SLURM_JOBID" ] || [ -n "$SLURM_JOB_ID" ]; then
  NODE=$SLURM_NODEID
  if [ -z "$NODE" ]; then
    # something goes wrong. Shouldn't happen
    echo "Cannot determine SLURM_NODEID. Exit."
    set
    exit 0
  fi
  
  # Determine total number of nodes
  NODES=$DMTCP_REMLAUNCH_NODES
  if [ -z "$NODES" ] || [ "$NODE" -ge "$NODES" ]; then
    # something goes wrong. Shouldn't happen
    echo "No DMTCP environment or bad ID values: ID=$NODE, IDS=$NODES. Exit."
    set
    exit 0
  fi
  
  eval "LOCAL_SLOTS=\${DMTCP_REMLAUNCH_${NODE}_SLOTS}"
  if [ "${LOCAL_SLOTS}" = 0 ] || [ -z "${LOCAL_SLOTS}" ]; then
    echo "`hostname`: nothing to launch \${DMTCP_REMLAUNCH_${NODE}_SLOTS} = ${LOCAL_SLOTS}"
    set
    exit 0
  fi
  if [ "$SLURM_LOCALID" -ge $LOCAL_SLOTS ]; then
    echo "`hostname`: Will not use SLURM_LOCALID=$SLURM_LOCALID for launch, max is $LOCAL_SLOTS"
    exit 0
  fi
  eval "LOCAL_FILES=\$DMTCP_REMLAUNCH_${NODE}_${SLURM_LOCALID}"
  if [ -z "$LOCAL_FILES" ]; then
    echo "`hostname`: Bad LOCAL_FILES variable DMTCP_REMLAUNCH_${NODE}_${SLURM_LOCALID}"
    set
    exit 0
  fi
  prepare_SLURM_env "$LOCAL_FILES"
  dmtcp_restart --join --host $DMTCP_HOST --port $DMTCP_PORT $LOCAL_FILES
  # set > set.$SLURM_NODEID.$SLURM_LOCALID
  # Accumulate logs from computing nodes
  if [ -d ./LOGS ] && [ ${SLURM_LOCALID} -eq "0" ]; then
    TDIR="$SLURMTMPDIR"
    if [ -z "$TDIR" ]; then
        TDIR=$TMPDIR
    fi
    #echo "TMPDIR=$TDIR"
    if [ -n "$TDIR" ]; then
        cp -R $TDIR/dmtcp* ./LOGS/
        rm -R $TDIR/dmtcp*
    fi
  fi
elif [ "$PBS_ENVIRONMENT" = PBS_BATCH ] && [ -n "$PBS_JOBID" ]; then
  cd $PBS_O_WORKDIR
  NODE=$PBS_NODENUM
  if [ -z "$NODE" ]; then
    # something goes wrong. Shouldn't happen
    echo "Cannot determine number of this node PBS_NODENUM=$PBS_NODENUM. Exit."
    set
    exit 0
  fi
  if [ -z "$1" ]; then
    echo "$0: Not enough parameters: $@. Exit."
    exit 0
  fi  
  eval "$1"
  # Determine total number of nodes
  NODES=$DMTCP_REMLAUNCH_NODES
  if [ -z "$NODES" ] || [ "$NODE" -ge "$NODES" ]; then
    # something goes wrong. Shouldn't happen
    echo "No DMTCP environment or bad ID values: ID=$NODE, IDS=$NODES. Exit."
    set
    exit 0
  fi
  eval "LOCAL_SLOTS=\${DMTCP_REMLAUNCH_${NODE}_SLOTS}"
  if [ "${LOCAL_SLOTS}" = 0 ] || [ -z "${LOCAL_SLOTS}" ]; then
    echo "`hostname`: nothing to launch \${DMTCP_REMLAUNCH_${NODE}_SLOTS} = ${LOCAL_SLOTS}"
    set
    exit 0
  fi
  MAX_SLOT=`expr "$LOCAL_SLOTS" - 1`
  LOCAL_FILES=""
  for slot in `seq 0 $MAX_SLOT`; do
    eval "LOCAL_FILES_TMP=\$DMTCP_REMLAUNCH_${NODE}_${slot}"
    LOCAL_FILES=$LOCAL_FILES" "$LOCAL_FILES_TMP
    unset LOCAL_FILES_TMP
  done
  
  if [ -z "$LOCAL_FILES" ]; then
    echo "`hostname`: Bad LOCAL_FILES variable DMTCP_REMLAUNCH_${NODE}_${SLURM_LOCALID}"
    set
    exit 0
  fi
  #echo "LOCAL_FILES=$LOCAL_FILES"
  dmtcp_restart --join --host $DMTCP_HOST --port $DMTCP_PORT $LOCAL_FILES
  if [ -d ./LOGS ]; then
    cp -R /tmp/dmtcp* ./LOGS/
  fi
fi
 |