#!/usr/bin/env python import os # New version of this script. Now we use DMTCP to launch # the scripts. def chunks(l, n): """ Yield successive n-sized chunks from l. """ for i in xrange(0, len(l), n): yield l[i:i+n] ## MAIN ## if (__name__ == "__main__"): NPROCS = 12 # Get list of scripts to run. They are files with both # 'modelimp1_global' and 'CLUSTER' on their names. Each # script is one job to launch. scripts = os.listdir('.') scripts = filter(lambda x: x.find('CLUSTER') != -1, scripts) scripts = filter(lambda x: x.find('modelimp1_global') != -1, scripts) scripts.sort() id = 0 # We'll save temporary results in the projects directory, so we # don't have to worry about quotas on the scratch one. Might need # these data for several weeks. optdir = "/gs/project/eim-670-aa/jmateos/esmglobalfit" # Port list for DMTCP ports = range(7701, 7713) ## MAIN LOOP ## for batch in chunks(scripts, NPROCS): id = id + 1 jobname = "esmglobal_%02d" % id btemp = """#!/bin/bash #PBS -A eim-670-aa #PBS -l nodes=1:ppn=%d #PBS -l walltime=00:30:00 #PBS -V #PBS -N %s #PBS -o %s #PBS -e %s function rundmtcpjob () { jobfile=$1 port=$2 jobname=$(basename ${jobfile} .R) optdir=/gs/project/eim-670-aa/jmateos/esmglobalfit # Create job directory within ${optdir} and copy all files there # If it already exists, it might mean the script already run once, # so don't do anything. scdir=${optdir}/${jobname} if [ ! -e ${scdir} ] then mkdir ${scdir} cp -va * ${scdir} fi # Move to $scdir and run the script using dmtcp_launch, as in the # workshop. Will use that directory as the temporary one. cd ${scdir} if [ -e "dmtcp_restart_script.sh" ] then ./dmtcp_restart_script.sh -p ${port} -h $(hostname) else dmtcp_launch -i 86400 -p ${port} R CMD BATCH ${jobfile} fi } cd /home/jmateos/code/devmodel/devmodelR """ % (len(batch), jobname, \ optdir + '/' + jobname + '.log', \ optdir + '/' + jobname + '.err') jobsfile = jobname + '.sh' f = open(jobsfile, 'w') f.write(btemp) for i in range(len(batch)): line = "rundmtcpjob %s %d &\n" % (batch[i], ports[i]) f.write(line) f.write("wait\n") f.close() os.chmod(jobsfile, 0755) # end for loop