util: Delete util/batch, util/pbs, and util qdo.

The util/pbs directory has a set of python scripts which were written to
submit jobs to the PBS pool at the University of Michigan. They aren't
incredibly specialized for that environment, but they do have a little
bit of hard coding which, for instance, uses paths which are only
meaningful there.

The util/batch directory was added alongside a seemingly unrelated
change (perhaps by accident?) and is a slightly updated copy of util/pbs
which also (or instead?) supports OAR.

The qdo script seems to be a script for managing job queues on PBS
and/or OAR, and is also tuned to the UofM environment, for instance
insisting that a path starts with /n/poolfs so that files are available
on an NFS volume shared with the pool.

All three of these scripts could potentially be useful with modification
in a similar environment, but also all three are unmaintained. The
environment in UofM may no longer actually match the expectations of
these scripts, and even if it does/did, gem5 may no longer be 100%
compatible with them.

If these scripts sit in util not being used by anyone, they add clutter
and complexity without adding any value. If someone really needs to know
what was once in them, they can be recovered from revision control.

Change-Id: I0192bd119893f7a41fcb820f4cf408609b03cd27
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/30957
Reviewed-by: Steve Reinhardt <stever@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Gabe Black
2020-07-04 16:47:41 -07:00
parent 57a78b2115
commit 7deb34b24d
7 changed files with 0 additions and 1736 deletions

View File

@@ -1,247 +0,0 @@
# Copyright (c) 2006 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os, popen2, re, sys
class MyPOpen(object):
def __init__(self, cmd, input = None, output = None, bufsize = -1):
self.status = -1
if input is None:
p2c_read, p2c_write = os.pipe()
self.tochild = os.fdopen(p2c_write, 'w', bufsize)
else:
p2c_write = None
if isinstance(input, file):
p2c_read = input.fileno()
elif isinstance(input, str):
input = file(input, 'r')
p2c_read = input.fileno()
elif isinstance(input, int):
p2c_read = input
else:
raise AttributeError
if output is None:
c2p_read, c2p_write = os.pipe()
self.fromchild = os.fdopen(c2p_read, 'r', bufsize)
else:
c2p_read = None
if isinstance(output, file):
c2p_write = output.fileno()
elif isinstance(output, str):
output = file(output, 'w')
c2p_write = output.fileno()
elif isinstance(output, int):
c2p_write = output
else:
raise AttributeError
self.pid = os.fork()
if self.pid == 0:
os.dup2(p2c_read, sys.stdin.fileno())
os.dup2(c2p_write, sys.stdout.fileno())
os.dup2(c2p_write, sys.stderr.fileno())
try:
os.execvp(cmd[0], cmd)
finally:
os._exit(1)
os.close(p2c_read)
os.close(c2p_write)
def poll(self):
if self.status < 0:
pid, status = os.waitpid(self.pid, os.WNOHANG)
if pid == self.pid:
self.status = status
return self.status
def wait(self):
if self.status < 0:
pid, status = os.waitpid(self.pid, 0)
if pid == self.pid:
self.status = status
return self.status
class oarsub:
def __init__(self):
self.walltime = None
self.queue = None
self.properties = None
# OAR 2.0 parameters only!
self.name = None
self.afterok = None
self.notify = None
self.stderr = None
self.stdout = None
self.oarhost = None
self.oarsub = 'oarsub'
self.jobid = re.compile('IdJob = (\S+)')
#self.outfile = open("jobnames.dat", "a+")
def build(self, script, args = []):
self.cmd = [ self.oarsub ]
print "args:", args
print "script:", script
if self.properties:
self.cmd.append('-p"%s"' % self.properties )
if self.queue:
self.cmd.append('-q "%s"' % self.queue)
if self.walltime:
self.cmd.append('-l walltime=%s' % self.walltime)
if script[0] != "/":
self.script = os.getcwd()
else:
self.script = script
self.cmd.extend(args)
self.cmd.append(self.script)
#cmd = [ 'ssh', '-x', self.oarhost, '"cd %s; %s"' % (os.getcwd(), self.command) ]
self.command = ' '.join(self.cmd)
print "command: [%s]" % self.command
def do(self):
oar = MyPOpen(self.cmd)
self.result = oar.fromchild.read()
ec = oar.wait()
if ec != 0 and self.oarhost:
pstdin, pstdout = os.popen4(self.command)
self.result = pstdout.read()
jobid = self.jobid.match(self.result)
if jobid == None:
print "Couldn't get jobid from [%s]" % self.result
sys.exit(1)
else:
#self.outfile.write("%d %s\n" %(int(jobid.group(1)), self.name));
#self.outfile.flush()
self.result = jobid.group(1)
return 0
class qsub:
def __init__(self):
self.afterok = None
self.hold = False
self.join = False
self.keep_stdout = False
self.keep_stderr = False
self.node_type = None
self.mail_abort = False
self.mail_begin = False
self.mail_end = False
self.name = None
self.stdout = None
self.priority = None
self.queue = None
self.pbshost = None
self.qsub = 'qsub'
self.env = {}
def build(self, script, args = []):
self.cmd = [ self.qsub ]
if self.env:
arg = '-v'
arg += ','.join([ '%s=%s' % i for i in self.env.iteritems() ])
self.cmd.append(arg)
if self.hold:
self.cmd.append('-h')
if self.stdout:
self.cmd.append('-olocalhost:' + self.stdout)
if self.keep_stdout and self.keep_stderr:
self.cmd.append('-koe')
elif self.keep_stdout:
self.cmd.append('-ko')
elif self.keep_stderr:
self.cmd.append('-ke')
else:
self.cmd.append('-kn')
if self.join:
self.cmd.append('-joe')
if self.node_type:
self.cmd.append('-lnodes=' + self.node_type)
if self.mail_abort or self.mail_begin or self.mail_end:
flags = ''
if self.mail_abort:
flags.append('a')
if self.mail_begin:
flags.append('b')
if self.mail_end:
flags.append('e')
if len(flags):
self.cmd.append('-m ' + flags)
else:
self.cmd.append('-mn')
if self.name:
self.cmd.append("-N%s" % self.name)
if self.priority:
self.cmd.append('-p' + self.priority)
if self.queue:
self.cmd.append('-q' + self.queue)
if self.afterok:
self.cmd.append('-Wdepend=afterok:%s' % self.afterok)
self.cmd.extend(args)
self.script = script
self.command = ' '.join(self.cmd + [ self.script ])
def do(self):
pbs = MyPOpen(self.cmd + [ self.script ])
self.result = pbs.fromchild.read()
ec = pbs.wait()
if ec != 0 and self.pbshost:
cmd = ' '.join(self.cmd + [ '-' ])
cmd = [ 'ssh', '-x', self.pbshost, cmd ]
self.command = ' '.join(cmd)
ssh = MyPOpen(cmd, input = self.script)
self.result = ssh.fromchild.read()
ec = ssh.wait()
return ec

View File

@@ -1,244 +0,0 @@
#!/usr/bin/env python2.7
# Copyright (c) 2006 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os, os.path, shutil, signal, socket, sys
from os import environ as env
from os.path import join as joinpath, expanduser
def date():
import time
return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime())
def cleandir(dir):
for root, dirs, files in os.walk(dir, False):
for name in files:
os.remove(joinpath(root, name))
for name in dirs:
os.rmdir(joinpath(root, name))
class rsync:
def __init__(self):
self.sudo = False
self.rsync = 'rsync'
self.compress = False
self.archive = True
self.delete = False
self.options = ''
def do(self, src, dst):
args = []
if self.sudo:
args.append('sudo')
args.append(self.rsync)
if (self.archive):
args.append('-a')
if (self.compress):
args.append('-z')
if (self.delete):
args.append('--delete')
if len(self.options):
args.append(self.options)
args.append(src)
args.append(dst)
return os.spawnvp(os.P_WAIT, args[0], args)
class JobDir(object):
def __init__(self, dir):
self.dir = dir
def file(self, filename):
return joinpath(self.dir, filename)
def create(self):
if os.path.exists(self.dir):
if not os.path.isdir(self.dir):
sys.exit('%s is not a directory. Cannot build job' % self.dir)
else:
os.mkdir(self.dir)
def exists(self):
return os.path.isdir(self.dir)
def clean(self):
cleandir(self.dir)
def hasfile(self, filename):
return os.path.isfile(self.file(filename))
def echofile(self, filename, string):
filename = self.file(filename)
try:
f = file(filename, 'w')
print >>f, string
f.flush()
f.close()
except IOError,e:
sys.exit(e)
def rmfile(self, filename):
filename = self.file(filename)
if os.path.isfile(filename):
os.unlink(filename)
def readval(self, filename):
filename = self.file(filename)
f = file(filename, 'r')
value = f.readline().strip()
f.close()
return value
def setstatus(self, string):
filename = self.file('.status')
try:
f = file(filename, 'a')
print >>f, string
f.flush()
f.close()
except IOError,e:
sys.exit(e)
def getstatus(self):
filename = self.file('.status')
try:
f = file(filename, 'r')
except IOError, e:
return 'none'
# fast forward to the end
for line in f: pass
# the first word on the last line is the status
return line.split(' ')[0]
def __str__(self):
return self.dir
if __name__ == '__main__':
import platform
binaries = { 'i686' : 'm5.i386',
'x86_64' : 'm5.amd64' }
binary = binaries[platform.machine()]
cwd = os.getcwd()
rootdir = env.setdefault('ROOTDIR', os.path.dirname(cwd))
oar_jobid = int(env['OAR_JOBID'])
oar_jobname = os.path.basename(cwd)
#pbs_jobname = env['PBS_JOBNAME']
basedir = joinpath(rootdir, 'Base')
jobname = env.setdefault('JOBNAME', oar_jobname)
jobfile = env.setdefault('JOBFILE', joinpath(rootdir, 'Test.py'))
outdir = env.setdefault('OUTPUT_DIR', cwd)
env['POOLJOB'] = 'True'
if os.path.isdir("/work"):
workbase = "/work"
else:
workbase = "/tmp/"
workdir = joinpath(workbase, '%s.%s' % (env['USER'], oar_jobid))
host = socket.gethostname()
os.umask(0022)
jobdir = JobDir(outdir)
started = date()
jobdir.echofile('.running', started)
jobdir.rmfile('.queued')
jobdir.echofile('.host', host)
jobdir.setstatus('running on %s on %s' % (host, started))
if os.path.isdir(workdir):
cleandir(workdir)
else:
os.mkdir(workdir)
if False and os.path.isdir('/z/dist'):
sync = rsync()
sync.delete = True
sync.sudo = True
sync.do('poolfs::dist/m5/', '/z/dist/m5/')
try:
os.chdir(workdir)
except OSError,e:
sys.exit(e)
os.symlink(jobdir.file('output'), 'status.out')
args = [ joinpath(basedir, binary), joinpath(basedir, 'run.py') ]
if not len(args):
sys.exit("no arguments")
print 'starting job... %s' % started
print ' '.join(args)
print
sys.stdout.flush()
childpid = os.fork()
if not childpid:
# Execute command
sys.stdin.close()
fd = os.open(jobdir.file("output"),
os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
os.dup2(fd, sys.stdout.fileno())
os.dup2(fd, sys.stderr.fileno())
os.execvp(args[0], args)
def handler(signum, frame):
if childpid != 0:
os.kill(childpid, signum)
signal.signal(signal.SIGHUP, handler)
signal.signal(signal.SIGINT, handler)
signal.signal(signal.SIGQUIT, handler)
signal.signal(signal.SIGTERM, handler)
signal.signal(signal.SIGCONT, handler)
signal.signal(signal.SIGUSR1, handler)
signal.signal(signal.SIGUSR2, handler)
done = 0
while not done:
try:
thepid,ec = os.waitpid(childpid, 0)
if ec:
print 'Exit code ', ec
status = 'failure'
else:
status = 'success'
done = 1
except OSError:
pass
complete = date()
print '\njob complete... %s' % complete
jobdir.echofile('.%s' % status, complete)
jobdir.rmfile('.running')
jobdir.setstatus('%s on %s' % (status, complete))

View File

@@ -1,304 +0,0 @@
#!/usr/bin/env python2.7
# Copyright (c) 2006 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os, os.path, re, socket, sys
from os import environ as env, listdir
from os.path import basename, isdir, isfile, islink, join as joinpath, normpath
from filecmp import cmp as filecmp
from shutil import copy
def nfspath(dir):
if dir.startswith('/.automount/'):
dir = '/n/%s' % dir[12:]
elif not dir.startswith('/n/'):
dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir)
return dir
def syncdir(srcdir, destdir):
srcdir = normpath(srcdir)
destdir = normpath(destdir)
if not isdir(destdir):
sys.exit('destination directory "%s" does not exist' % destdir)
for root, dirs, files in os.walk(srcdir):
root = normpath(root)
prefix = os.path.commonprefix([root, srcdir])
root = root[len(prefix):]
if root.startswith('/'):
root = root[1:]
for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']:
dirs.remove(rem)
for entry in dirs:
newdir = joinpath(destdir, root, entry)
if not isdir(newdir):
os.mkdir(newdir)
print 'mkdir', newdir
for i,d in enumerate(dirs):
if islink(joinpath(srcdir, root, d)):
dirs[i] = joinpath(d, '.')
for entry in files:
dest = normpath(joinpath(destdir, root, entry))
src = normpath(joinpath(srcdir, root, entry))
if not isfile(dest) or not filecmp(src, dest):
print 'copy %s %s' % (dest, src)
copy(src, dest)
progpath = nfspath(sys.path[0])
progname = basename(sys.argv[0])
usage = """\
Usage:
%(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp>
-c clean directory if job can be run
-C submit the checkpointing runs
-d Make jobs be dependent on the completion of the checkpoint runs
-e only echo pbs command info, don't actually send the job
-f force the job to run regardless of state
-q <queue> submit job to the named queue
-j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
-v be verbose
%(progname)s [-j <jobfile>] -l [-v] <regexp>
-j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
-l list job names, don't submit
-v be verbose (list job parameters)
%(progname)s -h
-h display this help
""" % locals()
try:
import getopt
opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lnq:Rt:v')
except getopt.GetoptError:
sys.exit(usage)
depend = False
clean = False
onlyecho = False
exprs = []
force = False
listonly = False
queue = ''
verbose = False
jfile = 'Test.py'
docpts = False
doruns = True
runflag = False
node_type = 'FAST'
update = True
for opt,arg in opts:
if opt == '-C':
docpts = True
if opt == '-c':
clean = True
if opt == '-d':
depend = True
if opt == '-e':
onlyecho = True
if opt == '-f':
force = True
if opt == '-h':
print usage
sys.exit(0)
if opt == '-j':
jfile = arg
if opt == '-l':
listonly = True
if opt == '-n':
update = False
if opt == '-q':
queue = arg
if opt == '-R':
runflag = True
if opt == '-t':
node_type = arg
if opt == '-v':
verbose = True
if docpts:
doruns = runflag
for arg in args:
exprs.append(re.compile(arg))
import jobfile, batch
from job import JobDir, date
conf = jobfile.JobFile(jfile)
if update and not listonly and not onlyecho and isdir(conf.linkdir):
if verbose:
print 'Checking for outdated files in Link directory'
if not isdir(conf.basedir):
os.mkdir(conf.basedir)
syncdir(conf.linkdir, conf.basedir)
jobnames = {}
joblist = []
if docpts and doruns:
gen = conf.alljobs()
elif docpts:
gen = conf.checkpoints()
elif doruns:
gen = conf.jobs()
for job in gen:
if job.name in jobnames:
continue
if exprs:
for expr in exprs:
if expr.match(job.name):
joblist.append(job)
break
else:
joblist.append(job)
if listonly:
if verbose:
for job in joblist:
job.printinfo()
else:
for job in joblist:
print job.name
sys.exit(0)
if not onlyecho:
newlist = []
for job in joblist:
jobdir = JobDir(joinpath(conf.rootdir, job.name))
if jobdir.exists():
if not force:
status = jobdir.getstatus()
if status == 'queued':
continue
if status == 'running':
continue
if status == 'success':
continue
if not clean:
sys.exit('job directory %s not clean!' % jobdir)
jobdir.clean()
newlist.append(job)
joblist = newlist
class NameHack(object):
def __init__(self, host='pbs.pool', port=24465):
self.host = host
self.port = port
self.socket = None
def setname(self, jobid, jobname):
try:
jobid = int(jobid)
except ValueError:
jobid = int(jobid.strip().split('.')[0])
jobname = jobname.strip()
# since pbs can handle jobnames of 15 characters or less,
# don't use the raj hack.
if len(jobname) <= 15:
return
if self.socket is None:
import socket
self.socket = socket.socket()
# Connect to pbs.pool and send the jobid/jobname pair to port
# 24465 (Raj didn't realize that there are only 64k ports and
# setup inetd to point to port 90001)
self.socket.connect((self.host, self.port))
self.socket.send("%s %s\n" % (jobid, jobname))
namehack = NameHack()
rootdir = conf.rootdir
script = joinpath(rootdir, 'Base', 'job.py')
for job in joblist:
jobdir = JobDir(joinpath(rootdir, job.name))
if depend:
cptdir = JobDir(joinpath(rootdir, job.checkpoint.name))
path = str(cptdir)
if not isdir(path) or not isfile(joinpath(path, '.success')):
continue
cptjob = cptdir.readval('.batch_jobid')
if not onlyecho:
jobdir.create()
os.chdir(str(jobdir))
os.environ['PWD'] = str(jobdir)
print 'Job name: %s' % job.name
print 'Job directory: %s' % jobdir
qsub = batch.oarsub()
qsub.oarhost = 'poolfs.eecs.umich.edu'
#qsub.stdout = jobdir.file('jobout')
qsub.name = job.name
qsub.walltime = '50'
#qsub.join = True
#qsub.node_type = node_type
#qsub.env['ROOTDIR'] = conf.rootdir
#qsub.env['JOBNAME'] = job.name
#if depend:
# qsub.afterok = cptjob
#if queue:
# qsub.queue = queue
qsub.properties = "64bit = 'Yes' or 64bit = 'No'"
qsub.build(script)
if verbose:
print 'cwd: %s' % qsub.command
print 'PBS Command: %s' % qsub.command
if not onlyecho:
ec = qsub.do()
if ec == 0:
jobid = qsub.result
print 'OAR Jobid: %s' % jobid
#namehack.setname(jobid, job.name)
queued = date()
jobdir.echofile('.batch_jobid', jobid)
jobdir.echofile('.batch_jobname', job.name)
jobdir.echofile('.queued', queued)
jobdir.setstatus('queued on %s' % queued)
else:
print 'OAR Failed'
print
print

View File

@@ -1,237 +0,0 @@
#!/usr/bin/env python2.7
# Copyright (c) 2005 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os, os.path, shutil, signal, socket, sys
from os import environ as env
from os.path import join as joinpath, expanduser
def date():
import time
return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime())
def cleandir(dir):
for root, dirs, files in os.walk(dir, False):
for name in files:
os.remove(joinpath(root, name))
for name in dirs:
os.rmdir(joinpath(root, name))
class rsync:
def __init__(self):
self.sudo = False
self.rsync = 'rsync'
self.compress = False
self.archive = True
self.delete = False
self.options = ''
def do(self, src, dst):
args = []
if self.sudo:
args.append('sudo')
args.append(self.rsync)
if (self.archive):
args.append('-a')
if (self.compress):
args.append('-z')
if (self.delete):
args.append('--delete')
if len(self.options):
args.append(self.options)
args.append(src)
args.append(dst)
return os.spawnvp(os.P_WAIT, args[0], args)
class JobDir(object):
def __init__(self, dir):
self.dir = dir
def file(self, filename):
return joinpath(self.dir, filename)
def create(self):
if os.path.exists(self.dir):
if not os.path.isdir(self.dir):
sys.exit('%s is not a directory. Cannot build job' % self.dir)
else:
os.mkdir(self.dir)
def exists(self):
return os.path.isdir(self.dir)
def clean(self):
cleandir(self.dir)
def hasfile(self, filename):
return os.path.isfile(self.file(filename))
def echofile(self, filename, string):
filename = self.file(filename)
try:
f = file(filename, 'w')
print >>f, string
f.flush()
f.close()
except IOError,e:
sys.exit(e)
def rmfile(self, filename):
filename = self.file(filename)
if os.path.isfile(filename):
os.unlink(filename)
def readval(self, filename):
filename = self.file(filename)
f = file(filename, 'r')
value = f.readline().strip()
f.close()
return value
def setstatus(self, string):
filename = self.file('.status')
try:
f = file(filename, 'a')
print >>f, string
f.flush()
f.close()
except IOError,e:
sys.exit(e)
def getstatus(self):
filename = self.file('.status')
try:
f = file(filename, 'r')
except IOError, e:
return 'none'
# fast forward to the end
for line in f: pass
# the first word on the last line is the status
return line.split(' ')[0]
def __str__(self):
return self.dir
if __name__ == '__main__':
rootdir = env.setdefault('ROOTDIR', os.getcwd())
pbs_jobid = env['PBS_JOBID']
pbs_jobname = env['PBS_JOBNAME']
basedir = joinpath(rootdir, 'Base')
jobname = env.setdefault('JOBNAME', pbs_jobname)
jobfile = env.setdefault('JOBFILE', joinpath(rootdir, 'Test.py'))
outdir = env.setdefault('OUTPUT_DIR', joinpath(rootdir, jobname))
env['POOLJOB'] = 'True'
if os.path.isdir("/work"):
workbase = "/work"
else:
workbase = "/tmp/"
workdir = joinpath(workbase, '%s.%s' % (env['USER'], pbs_jobid))
host = socket.gethostname()
os.umask(0022)
jobdir = JobDir(outdir)
started = date()
jobdir.echofile('.running', started)
jobdir.rmfile('.queued')
jobdir.echofile('.host', host)
jobdir.setstatus('running on %s on %s' % (host, started))
if os.path.isdir(workdir):
cleandir(workdir)
else:
os.mkdir(workdir)
if False and os.path.isdir('/z/dist'):
sync = rsync()
sync.delete = True
sync.sudo = True
sync.do('poolfs::dist/m5/', '/z/dist/m5/')
try:
os.chdir(workdir)
except OSError,e:
sys.exit(e)
os.symlink(jobdir.file('output'), 'status.out')
args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.py') ]
if not len(args):
sys.exit("no arguments")
print 'starting job... %s' % started
print ' '.join(args)
print
sys.stdout.flush()
childpid = os.fork()
if not childpid:
# Execute command
sys.stdin.close()
fd = os.open(jobdir.file("output"),
os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
os.dup2(fd, sys.stdout.fileno())
os.dup2(fd, sys.stderr.fileno())
os.execvp(args[0], args)
def handler(signum, frame):
if childpid != 0:
os.kill(childpid, signum)
signal.signal(signal.SIGHUP, handler)
signal.signal(signal.SIGINT, handler)
signal.signal(signal.SIGQUIT, handler)
signal.signal(signal.SIGTERM, handler)
signal.signal(signal.SIGCONT, handler)
signal.signal(signal.SIGUSR1, handler)
signal.signal(signal.SIGUSR2, handler)
done = 0
while not done:
try:
thepid,ec = os.waitpid(childpid, 0)
if ec:
print 'Exit code ', ec
status = 'failure'
else:
status = 'success'
done = 1
except OSError:
pass
complete = date()
print '\njob complete... %s' % complete
jobdir.echofile('.%s' % status, complete)
jobdir.rmfile('.running')
jobdir.setstatus('%s on %s' % (status, complete))

View File

@@ -1,180 +0,0 @@
# Copyright (c) 2005 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os, popen2, re, sys
class MyPOpen(object):
def __init__(self, cmd, input = None, output = None, bufsize = -1):
self.status = -1
if input is None:
p2c_read, p2c_write = os.pipe()
self.tochild = os.fdopen(p2c_write, 'w', bufsize)
else:
p2c_write = None
if isinstance(input, file):
p2c_read = input.fileno()
elif isinstance(input, str):
input = file(input, 'r')
p2c_read = input.fileno()
elif isinstance(input, int):
p2c_read = input
else:
raise AttributeError
if output is None:
c2p_read, c2p_write = os.pipe()
self.fromchild = os.fdopen(c2p_read, 'r', bufsize)
else:
c2p_read = None
if isinstance(output, file):
c2p_write = output.fileno()
elif isinstance(output, str):
output = file(output, 'w')
c2p_write = output.fileno()
elif isinstance(output, int):
c2p_write = output
else:
raise AttributeError
self.pid = os.fork()
if self.pid == 0:
os.dup2(p2c_read, sys.stdin.fileno())
os.dup2(c2p_write, sys.stdout.fileno())
os.dup2(c2p_write, sys.stderr.fileno())
try:
os.execvp(cmd[0], cmd)
finally:
os._exit(1)
os.close(p2c_read)
os.close(c2p_write)
def poll(self):
if self.status < 0:
pid, status = os.waitpid(self.pid, os.WNOHANG)
if pid == self.pid:
self.status = status
return self.status
def wait(self):
if self.status < 0:
pid, status = os.waitpid(self.pid, 0)
if pid == self.pid:
self.status = status
return self.status
class qsub:
def __init__(self):
self.afterok = None
self.hold = False
self.join = False
self.keep_stdout = False
self.keep_stderr = False
self.node_type = None
self.mail_abort = False
self.mail_begin = False
self.mail_end = False
self.name = None
self.stdout = None
self.priority = None
self.queue = None
self.pbshost = None
self.qsub = 'qsub'
self.env = {}
def build(self, script, args = []):
self.cmd = [ self.qsub ]
if self.env:
arg = '-v'
arg += ','.join([ '%s=%s' % i for i in self.env.iteritems() ])
self.cmd.append(arg)
if self.hold:
self.cmd.append('-h')
if self.stdout:
self.cmd.append('-olocalhost:' + self.stdout)
if self.keep_stdout and self.keep_stderr:
self.cmd.append('-koe')
elif self.keep_stdout:
self.cmd.append('-ko')
elif self.keep_stderr:
self.cmd.append('-ke')
else:
self.cmd.append('-kn')
if self.join:
self.cmd.append('-joe')
if self.node_type:
self.cmd.append('-lnodes=' + self.node_type)
if self.mail_abort or self.mail_begin or self.mail_end:
flags = ''
if self.mail_abort:
flags.append('a')
if self.mail_begin:
flags.append('b')
if self.mail_end:
flags.append('e')
if len(flags):
self.cmd.append('-m ' + flags)
else:
self.cmd.append('-mn')
if self.name:
self.cmd.append("-N%s" % self.name)
if self.priority:
self.cmd.append('-p' + self.priority)
if self.queue:
self.cmd.append('-q' + self.queue)
if self.afterok:
self.cmd.append('-Wdepend=afterok:%s' % self.afterok)
self.cmd.extend(args)
self.script = script
self.command = ' '.join(self.cmd + [ self.script ])
def do(self):
pbs = MyPOpen(self.cmd + [ self.script ])
self.result = pbs.fromchild.read()
ec = pbs.wait()
if ec != 0 and self.pbshost:
cmd = ' '.join(self.cmd + [ '-' ])
cmd = [ 'ssh', '-x', self.pbshost, cmd ]
self.command = ' '.join(cmd)
ssh = MyPOpen(cmd, input = self.script)
self.result = ssh.fromchild.read()
ec = ssh.wait()
return ec

View File

@@ -1,289 +0,0 @@
#!/usr/bin/env python2.7
# Copyright (c) 2005 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os, os.path, re, socket, sys
from os import environ as env, listdir
from os.path import basename, isdir, isfile, islink, join as joinpath, normpath
from filecmp import cmp as filecmp
from shutil import copy
def nfspath(dir):
if dir.startswith('/.automount/'):
dir = '/n/%s' % dir[12:]
elif not dir.startswith('/n/'):
dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir)
return dir
def syncdir(srcdir, destdir):
srcdir = normpath(srcdir)
destdir = normpath(destdir)
if not isdir(destdir):
sys.exit('destination directory "%s" does not exist' % destdir)
for root, dirs, files in os.walk(srcdir):
root = normpath(root)
prefix = os.path.commonprefix([root, srcdir])
root = root[len(prefix):]
if root.startswith('/'):
root = root[1:]
for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']:
dirs.remove(rem)
for entry in dirs:
newdir = joinpath(destdir, root, entry)
if not isdir(newdir):
os.mkdir(newdir)
print 'mkdir', newdir
for i,d in enumerate(dirs):
if islink(joinpath(srcdir, root, d)):
dirs[i] = joinpath(d, '.')
for entry in files:
dest = normpath(joinpath(destdir, root, entry))
src = normpath(joinpath(srcdir, root, entry))
if not isfile(dest) or not filecmp(src, dest):
print 'copy %s %s' % (dest, src)
copy(src, dest)
progpath = nfspath(sys.path[0])
progname = basename(sys.argv[0])
usage = """\
Usage:
%(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp>
-c clean directory if job can be run
-C submit the checkpointing runs
-d Make jobs be dependent on the completion of the checkpoint runs
-e only echo pbs command info, don't actually send the job
-f force the job to run regardless of state
-q <queue> submit job to the named queue
-j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
-v be verbose
%(progname)s [-j <jobfile>] -l [-v] <regexp>
-j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
-l list job names, don't submit
-v be verbose (list job parameters)
%(progname)s -h
-h display this help
""" % locals()
try:
import getopt
opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lnq:Rt:v')
except getopt.GetoptError:
sys.exit(usage)
depend = False
clean = False
onlyecho = False
exprs = []
force = False
listonly = False
queue = ''
verbose = False
jfile = 'Test.py'
docpts = False
doruns = True
runflag = False
node_type = 'FAST'
update = True
for opt,arg in opts:
if opt == '-C':
docpts = True
if opt == '-c':
clean = True
if opt == '-d':
depend = True
if opt == '-e':
onlyecho = True
if opt == '-f':
force = True
if opt == '-h':
print usage
sys.exit(0)
if opt == '-j':
jfile = arg
if opt == '-l':
listonly = True
if opt == '-n':
update = False
if opt == '-q':
queue = arg
if opt == '-R':
runflag = True
if opt == '-t':
node_type = arg
if opt == '-v':
verbose = True
if docpts:
doruns = runflag
for arg in args:
exprs.append(re.compile(arg))
import jobfile, pbs
from job import JobDir, date
conf = jobfile.JobFile(jfile)
if update and not listonly and not onlyecho and isdir(conf.linkdir):
if verbose:
print 'Checking for outdated files in Link directory'
if not isdir(conf.basedir):
os.mkdir(conf.basedir)
syncdir(conf.linkdir, conf.basedir)
jobnames = {}
joblist = []
if docpts and doruns:
gen = conf.alljobs()
elif docpts:
gen = conf.checkpoints()
elif doruns:
gen = conf.jobs()
for job in gen:
if job.name in jobnames:
continue
if exprs:
for expr in exprs:
if expr.match(job.name):
joblist.append(job)
break
else:
joblist.append(job)
if listonly:
if verbose:
for job in joblist:
job.printinfo()
else:
for job in joblist:
print job.name
sys.exit(0)
if not onlyecho:
newlist = []
for job in joblist:
jobdir = JobDir(joinpath(conf.rootdir, job.name))
if jobdir.exists():
if not force:
status = jobdir.getstatus()
if status == 'queued':
continue
if status == 'running':
continue
if status == 'success':
continue
if not clean:
sys.exit('job directory %s not clean!' % jobdir)
jobdir.clean()
newlist.append(job)
joblist = newlist
class NameHack(object):
def __init__(self, host='pbs.pool', port=24465):
self.host = host
self.port = port
self.socket = None
def setname(self, jobid, jobname):
try:
jobid = int(jobid)
except ValueError:
jobid = int(jobid.strip().split('.')[0])
jobname = jobname.strip()
# since pbs can handle jobnames of 15 characters or less,
# don't use the raj hack.
if len(jobname) <= 15:
return
if self.socket is None:
import socket
self.socket = socket.socket()
# Connect to pbs.pool and send the jobid/jobname pair to port
# 24465 (Raj didn't realize that there are only 64k ports and
# setup inetd to point to port 90001)
self.socket.connect((self.host, self.port))
self.socket.send("%s %s\n" % (jobid, jobname))
namehack = NameHack()
for job in joblist:
jobdir = JobDir(joinpath(conf.rootdir, job.name))
if depend:
cptdir = JobDir(joinpath(conf.rootdir, job.checkpoint.name))
cptjob = cptdir.readval('.pbs_jobid')
if not onlyecho:
jobdir.create()
print 'Job name: %s' % job.name
print 'Job directory: %s' % jobdir
qsub = pbs.qsub()
qsub.pbshost = 'simpool.eecs.umich.edu'
qsub.stdout = jobdir.file('jobout')
qsub.name = job.name[:15]
qsub.join = True
qsub.node_type = node_type
qsub.env['ROOTDIR'] = conf.rootdir
qsub.env['JOBNAME'] = job.name
if depend:
qsub.afterok = cptjob
if queue:
qsub.queue = queue
qsub.build(joinpath(progpath, 'job.py'))
if verbose:
print 'PBS Command: %s' % qsub.command
if not onlyecho:
ec = qsub.do()
if ec == 0:
jobid = qsub.result
print 'PBS Jobid: %s' % jobid
namehack.setname(jobid, job.name)
queued = date()
jobdir.echofile('.pbs_jobid', jobid)
jobdir.echofile('.pbs_jobname', job.name)
jobdir.echofile('.queued', queued)
jobdir.setstatus('queued on %s' % queued)
else:
print 'PBS Failed'

235
util/qdo
View File

@@ -1,235 +0,0 @@
#! /usr/bin/env python2.7
# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Important!
# This script expects a simple $ prompt, if you are using a shell other than
# sh which defaults to this you'll need to add something like the following
# to your bashrc/bash_profile script:
#if [ "$OAR_USER" = "xxxx" ]; then
# PS1='$ '
import sys
import os
import re
import time
import optparse
import pexpect
progname = os.path.basename(sys.argv[0])
usage = "%prog [options] command [command arguments]"
optparser = optparse.OptionParser(usage=usage)
optparser.allow_interspersed_args=False
optparser.add_option('-e', dest='stderr_file',
help='command stderr output file')
optparser.add_option('-o', dest='stdout_file',
help='command stdout output file')
optparser.add_option('-l', dest='save_log', action='store_true',
help='save oarsub output log file')
optparser.add_option('-N', dest='job_name',
help='oarsub job name')
optparser.add_option('-q', dest='dest_queue',
help='oarsub destination queue')
optparser.add_option('--qwait', dest='oarsub_timeout', type='int',
help='oarsub queue wait timeout', default=30*60)
optparser.add_option('-t', dest='cmd_timeout', type='int',
help='command execution timeout', default=600*60)
(options, cmd) = optparser.parse_args()
if cmd == []:
print >>sys.stderr, "%s: missing command" % progname
sys.exit(1)
# If we want to do this, need to add check here to make sure cmd[0] is
# a valid PBS job name, else oarsub will die on us.
#
#if not options.job_name:
# options.job_name = cmd[0]
cwd = os.getcwd()
# Deal with systems where /n is a symlink to /.automount
if cwd.startswith('/.automount/'):
cwd = cwd.replace('/.automount/', '/n/', 1)
if not cwd.startswith('/n/poolfs/'):
print >>sys.stderr, "Error: current directory must be under /n/poolfs."
sys.exit(1)
# The Shell class wraps pexpect.spawn with some handy functions that
# assume the thing on the other end is a Bourne/bash shell.
class Shell(pexpect.spawn):
# Regexp to match the shell prompt. We change the prompt to
# something fixed and distinctive to make it easier to match
# reliably.
prompt_re = re.compile('qdo\$ ')
def __init__(self, cmd):
# initialize base pexpect.spawn object
try:
pexpect.spawn.__init__(self, cmd)
except pexpect.ExceptionPexpect, exc:
print "%s:" % progname, exc
sys.exit(1)
# full_output accumulates the full output of the session
self.full_output = ""
self.quick_timeout = 15
# wait for a prompt, then change it
try:
self.expect('\$ ', options.oarsub_timeout)
except pexpect.TIMEOUT:
print >>sys.stderr, "%s: oarsub timed out." % progname
self.kill(9)
self.safe_close()
sys.exit(1)
self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
# version of expect that updates full_output too
def expect(self, regexp, timeout = -1):
pexpect.spawn.expect(self, regexp, timeout)
self.full_output += self.before + self.after
# Just issue a command and wait for the next prompt.
# Returns a string containing the output of the command.
def do_bare_command(self, cmd, timeout = -1):
global full_output
self.sendline(cmd)
# read back the echo of the command
self.readline()
# wait for the next prompt
self.expect(self.prompt_re, timeout)
output = self.before.rstrip()
return output
# Issue a command, then query its exit status.
# Returns a (string, int) tuple with the command output and the status.
def do_command(self, cmd, timeout = -1):
# do the command itself
output = self.do_bare_command(cmd, timeout)
# collect status
status = int(self.do_bare_command("echo $?", self.quick_timeout))
return (output, status)
# Check to see if the given directory exists.
def dir_exists(self, dirname):
(output, status) = shell.do_command('[ -d %s ]' % dirname,
self.quick_timeout)
return status == 0
# Don't actually try to close it.. just wait until it closes by itself
# We can't actually kill the pid which is what it's trying to do, and if
# we call wait we could be in an unfortunate situation of it printing input
# right as we call wait, so the input is never read and the process never ends
def safe_close(self):
count = 0
while self.isalive() and count < 10:
time.sleep(1)
self.close(force=False)
# Spawn the interactive pool job.
# Hack to do link on poolfs... disabled for now since
# compiler/linker/library versioning problems between poolfs and
# nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
if False and len(cmd) > 50:
shell_cmd = 'ssh -t poolfs /bin/sh -l'
print "%s: running %s on poolfs" % (progname, cmd[0])
else:
shell_cmd = 'oarsub -I'
if options.job_name:
shell_cmd += ' -n "%s"' % options.job_name
if options.dest_queue:
shell_cmd += ' -q ' + options.dest_queue
shell_cmd += ' -d %s' % cwd
shell = Shell(shell_cmd)
try:
# chdir to cwd
(output, status) = shell.do_command('cd ' + cwd)
if status != 0:
raise OSError, "Can't chdir to %s" % cwd
# wacky hack: sometimes scons will create an output directory then
# fork a job to generate files in that directory, and the job will
# get run before the directory creation propagates through NFS.
# This hack looks for a '-o' option indicating an output file and
# waits for the corresponding directory to appear if necessary.
try:
if 'cc' in cmd[0] or 'g++' in cmd[0]:
output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
elif 'm5' in cmd[0]:
output_dir = cmd[cmd.index('-d')+1]
else:
output_dir = None
except (ValueError, IndexError):
# no big deal if there's no '-o'/'-d' or if it's the final argument
output_dir = None
if output_dir:
secs_waited = 0
while not shell.dir_exists(output_dir) and secs_waited < 90:
time.sleep(5)
secs_waited += 5
if secs_waited > 30:
print "waited", secs_waited, "seconds for", output_dir
# run command
if options.stdout_file:
cmd += ['>', options.stdout_file]
if options.stderr_file:
cmd += ['2>', options.stderr_file]
try:
(output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
except pexpect.TIMEOUT:
print >>sys.stderr, "%s: command timed out after %d seconds." \
% (progname, options.cmd_timeout)
shell.sendline('~.') # oarsub/ssh termination escape sequence
shell.safe_close()
status = 3
if output:
print output
finally:
# end job
if shell.isalive():
shell.sendline('exit')
shell.expect('Disconnected from OAR job .*')
shell.safe_close()
# if there was an error, log the output even if not requested
if status != 0 or options.save_log:
log = file('qdo-log.' + str(os.getpid()), 'w')
log.write(shell.full_output)
log.close()
del shell
sys.exit(status)