#!/usr/bin/perl -w
###########################################################################
#
# This is qsub, a very hacked version of condor_run.
#
# Copyright 2010,2020 Philip Johnson.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published
# by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License at for
# more details.
#
#
# Version history:
# 1.0 --> created 2005 or 2006
# 2.0 --> Jan 2008 [much improved; no longer requires auxilary 'run' file]
# 2.1 --> Aug 2008 [enhanced file-transfer ability]
# 2.2 --> Apr 2010 [force usage of bash shell for command execution]
# 2.3 --> Oct 2012 [add requestcpus option for SMP processing]
# 2.4 --> Nov 2012 [UDP notification of job completion]
# 2.5 --> Jan 2013 [improved -dry-run to test -transfers]
# 2.6 --> Mar 2014 [further tweaks to -dry-run to test -transfers]
# 2.7 --> Nov 2014 [added call to caffeinate (no idle sleep) on OSX systems]
# 2.8 --> Jan 2020 [made compatible with qsub.slurm]
#
# Basic idea: create a job definition file ("JDF") for condor_submit
# that uses *this* perl script as the executable, passing in the
# desired command as an environment variable. Stdout & stderr are
# captured in files named '.condor_{out,err}.$CLUSTER.$PROCESS' during
# the run and renamed w/o the leading period at the end of the run if any
# output was produced (otherwise the files are deleted). If running
# the (custom) notify-netsend daemon on submitter's computer, will
# display a notification upon job completion.
#
###########################################################################
use strict;
use Getopt::Long;
use File::Temp;
use IO::Socket;
use Cwd;
my $clusterId;
my (%opts, $runCmd);
my $runMode = 'submit'; #by default
$opts{n} = 1;
$opts{p} = 0;
$opts{'numCPU'} = 1;
if (!GetOptions("n=s" => \$opts{'n'},
"dry-run|no-submit|interactive" => \$opts{'dry-run'},
"et|enable-transfer" => \$opts{'enable-transfer'},
"preemptable" => \$opts{'preemptable'},
"transfer=s" => sub{ $opts{'transfer'}->{$_[1]}=1 },
"exclude=s" => sub{ $opts{'exclude'}->{$_[1]}=1 },
"priority=i" => \$opts{'p'},
"requirements=s" => \$opts{'req'},
"cpus=i" => \$opts{'numCPU'},
"rt|time|runtime=i" => 0, #used by slurm, not condor
"env=s" => sub { my @e = $_[1] =~ /^(.+)=(.+)$/;
die "\n-->Bad -env parameter\n\n" if @e != 2;
$ENV{$e[0]}=$e[1]},
"force" => \$opts{'f'},
"log=s" => \$opts{'log'}) ||
(@ARGV == 0 && !exists($ENV{'CONDOR_CMD_LINE'}))) {
print "usage: $0 'shell-cmd'\n",
"where are any of: \n",
"\t-dry-run --> execute directly; do not submit to condor\n",
"\t-n <#> --> number of times to repeat command\n",
"\t-t --> file to transfer not already named in 'shell-cmd'\n",
"\t-et --> enable transfers (-t automatically selects this)\n",
"\t-exc --> exclude file from automatic transfer\n",
"\t-pre --> enable preemptability & partial output w/ transfers\n",
"\t-pri <#> --> priority from -20 (low) to +20 (high), with default = 0\n",
"\t-r --> ClassAd job requirements (see condor documentation)\n",
"\t-cpus --> Number of CPUs needed (on a single SMP machine)\n",
"\t-env X=Y --> set environment variable X to Y when running job\n",
"\t-f --> force ignoring of warnings\n",
"\t-log --> condor log file (defaults to no log)\n",
"\n";
exit 1;
}
# install signal handler
$SIG{'HUP'} = \&handler;
$SIG{'INT'} = \&handler;
$SIG{'QUIT'} = \&handler;
$SIG{'TERM'} = \&handler;
# if reentered prog --> execute command stored in envirnment variable
if (exists($ENV{'CONDOR_CMD_LINE'})) {
$runMode = 'exec';
$clusterId = $ENV{'CONDOR_CLUSTER'};
$ENV{'CONDOR_CMD_LINE'} =~ s/SeMiCoLoN/;/g; #hack (condor hates ;)
$ENV{'CONDOR_LCLCMD_LINE'} =~ s/SeMiCoLoN/;/g; #hack (condor hates ;)
$ENV{'CONDOR_CMD_LINE'} =~ s/NeWlInE/\n/g;
$ENV{'CONDOR_LCLCMD_LINE'} =~ s/NeWlInE/\n/g;
my $pwd = `pwd`;
chomp $pwd;
if ($ENV{'CONDOR_OLD_PWD'} eq $pwd) { #did not transfer
ExecCmd($ENV{'CONDOR_CMD_LINE'});
} else { #did transfer
# fiddle with execute permissions
my @needExecPerm = split(//, $ENV{'TRANSFER_X_PERMISSION'});
my $thisExe = File::Basename::basename($0);
my @transferredFiles = grep(!/$thisExe/, sort <*>);
if (@needExecPerm == @transferredFiles) { #otherwise we have a logic flaw
for (my $i = 0; $i < @transferredFiles; ++$i) {
chmod(0700, $transferredFiles[$i]) if ($needExecPerm[$i]);
}
}
$ENV{'PATH'} .= ':.'; #want to find any files that we transfered
ExecCmd($ENV{'CONDOR_LCLCMD_LINE'});
}
exit 0;
}
my $cmdline = join(' ', @ARGV); #assume the remainder is the cmd to be executed
if ($cmdline eq '-') {#special case to read from stdin
$cmdline = ;
chomp $cmdline;
}
#specifying files to tranfer implies enabling transfers
if (defined $opts{'transfer'}) {
$opts{'enable-transfer'} = 1;
}
#double-check that all transfer files exist
foreach my $file (keys(%{$opts{'transfer'}})) {
if (!-e $file) {
&abort("Could not find file to transfer: '$file'\n");
}
}
#parse command line for additional files to transfer
my $lclCmdline = ''; #local command line strips paths
{
foreach my $item (split /(\s+|\'|\"|>|<|\|)/, $cmdline) {
if (!exists($opts{'exclude'}->{$item})) {
$item = x_FindFullPath($item);
if ($item !~ /\n/ && -f $item) {
$opts{'transfer'}->{$item} = 1 if $opts{'enable-transfer'};
my ($filename) = reverse($item) =~ /(.+?)(?:\/|$)/;
$filename = reverse($filename);
$item = './'.$filename;
}
}
$lclCmdline .= $item;
}
}
#for all files to transfer, check which (if any) have execute
#permissions -- we'll need to reconstruct this on the remote side (by
#default, condor transfers the file to have 300 permissions)
$ENV{'TRANSFER_X_PERMISSION'} = '';
foreach my $file (sort {File::Basename::basename($a) cmp File::Basename::basename($b)}
keys(%{$opts{'transfer'}})) {
$ENV{'TRANSFER_X_PERMISSION'} .= -x $file ? 1 : 0
}
# grab current working directory for initial dir in system using automounter
my $pwd = `pwd`;
chomp $pwd;
$ENV{'CONDOR_OLD_PWD'} = $pwd;
if (0 && !$opts{'f'} && !$opts{'enable-transfer'} &&
$pwd !~ /\/Network\// && $pwd !~ /\/net\//) {
&abort("WARNING: Are you sure the current directory ($pwd) is accessible from the execute nodes? (reexcute with '-force' if so)\n");
}
# note port if running (custom) daemon to recieve notifications over net
if (-e "/tmp/notify-net.port") {
$ENV{'NOTIFY_NETPORT'} = `cat /tmp/notify-net.port`;
}
# stash command line in environment variable to be passed into condor
$ENV{'CONDOR_CMD_LINE'} = $cmdline;
$ENV{'CONDOR_LCLCMD_LINE'} = $lclCmdline;
if ($cmdline =~ /;/) { #condor can't handle environment variables containing semicolons.. I use hacky workaround by substituting
$ENV{'CONDOR_CMD_LINE'} =~ s/;/SeMiCoLoN/g;
$ENV{'CONDOR_LCLCMD_LINE'} =~ s/;/SeMiCoLoN/g;
}
if ($cmdline =~ /\n/) { #environment variables can't handle newlines
$ENV{'CONDOR_CMD_LINE'} =~ s/\n/NeWlInE/g;
$ENV{'CONDOR_LCLCMD_LINE'} =~ s/\n/NeWlInE/g;
}
# if 'dry-run' then just run interactively -- don't hand off to condor
if ($opts{'dry-run'}) {
$ENV{'CONDOR_CLUSTER'} = 0;
$ENV{'QSUB_SUBMITTER'} = $ENV{'USER'}.'@'.`hostname`;
if ($opts{'enable-transfer'} && keys(%{$opts{'transfer'}})) {
my $tmpdir = File::Temp->newdir();
print STDERR "Would have transfered:\n";
my %basefns;
foreach my $fn (keys(%{$opts{'transfer'}})) {
print STDERR "\t$fn\n";
my $basefn = File::Basename::basename($fn);
$basefns{$basefn} = -M $fn; #last mod time
system("cp -p $fn $tmpdir/$basefn");
}
my $cwd = Cwd::cwd();
chdir $tmpdir;
$ENV{'PATH'} =
join(':', grep({x_IsSystemPath($_);} split(/:/, $ENV{'PATH'})),'.');
print STDERR "Using path:\n\t", $ENV{'PATH'}, "\n";
print STDERR "Executing $lclCmdline\n";
my $exitStatus = 0;
for (my $i = 0; $i < $opts{n} && $exitStatus == 0; ++$i) {
$ENV{'CONDOR_PROCESS'} = $i;
$exitStatus = ExecCmd($lclCmdline, 1); #1 means interactive
}
my @newfiles = grep {!exists($basefns{$_}) || ($basefns{$_} > -M $_)} <*>;
if (@newfiles) {
print STDERR "Would have transferred back:\n";
print "\t", join("\n\t",@newfiles), "\n";
system("cp -p ".join(' ', @newfiles)." $cwd");
}
chdir $cwd; #so can delete tmpdir
exit($exitStatus);
}
print STDERR "Executing $cmdline\n";
exit(ExecCmd($cmdline, 1)); #1 means interactive
}
# -----------------------------------------------------------------------------
# create a job description file to submit the shell script to Condor
my $JDF = new File::Temp() ||
&abort("Failed to create temporary (JDF) file.\n");
print $JDF "
universe = vanilla
executable = $0
initialdir = $pwd
requestcpus = $opts{'numCPU'}
";
if (defined $opts{'log'}) {
print $JDF "log = $opts{'log'}\n";
}
if (!$opts{'enable-transfer'}) {
print $JDF "should_transfer_files = NO
";
} else {
if ($opts{'preemptable'}) {
#note we add +FileSystemDomain to ClassAd because condor will only automatically supply FileSystemDomain if transfer == NO or IF_NEEDED
my $submitFilesystem =
`condor_config_val FILESYSTEM_DOMAIN 2> /dev/null` ||
&abort("Failed to run condor_config_val. ".
"Please check your path.\n");
chomp $submitFilesystem;
print $JDF "should_transfer_files = YES
when_to_transfer_output = ON_EXIT_OR_EVICT
+Preemptable = True
+FileSystemDomain = \"$submitFilesystem\"
";
} else {
print $JDF "should_transfer_files = IF_NEEDED
when_to_transfer_output = ON_EXIT
";
}
print $JDF "transfer_input_files = ";
my @files = keys(%{$opts{'transfer'}});
for (my $i=0; $i < @files; ++$i) {
print $JDF ", " if $i > 0;
print $JDF $files[$i];
}
print $JDF "\n";
}
if (defined $opts{'req'}) {
print $JDF "requirements = ".$opts{'req'}."\n";
}
print $JDF "notification = NEVER
rank = ((TARGET.FileSystemDomain == MY.FileSystemDomain) * 1000) + (COLLECTOR_HOST_STRING != Machine) * 100 + (TotalSlots - TotalLoadAvg) * 10 + (TotalSlots - SlotID)
getenv = True
environment = CONDOR_CLUSTER=\$(Cluster);CONDOR_PROCESS=\$(Process);QSUB_SUBMITTER=\$\$([User])
priority = $opts{p}
queue $opts{n}
";
close($JDF) ||
&abort("Failed to write temporary (JDF) file.\n");
# -----------------------------------------------------------------------------
# submit the job; $clusterId contains cluster number if successful
open(SUBMIT, "condor_submit $JDF 2>&1 |") ||
&abort("Failed to run condor_submit. Please check your path.\n");
while() {
print $_;
if (/job\(s\) submitted to cluster (\d+)./) {
$clusterId = $1;
}
}
close(SUBMIT) ||
&abort("Failed to run condor_submit. Please check your path.\n");
if (!defined $clusterId) {
&abort("Did not successfully submit job.\n");
}
# -----------------------------------------------------------------------------
sub ExecCmd {
my ($cmd, $interactive) = @_;
if (!exists($ENV{'SLURM_ARRAY_JOB_ID'})) { #compatibility w/ slurm
$ENV{'SLURM_ARRAY_JOB_ID'} = $ENV{'CONDOR_CLUSTER'};
$ENV{'SLURM_ARRAY_TASK_ID'} = $ENV{'CONDOR_PROCESS'};
}
$ENV{'JOB_ID'} = $ENV{'CONDOR_CLUSTER'}; #generic env vars
$ENV{'JOB_SUBID'} = $ENV{'CONDOR_PROCESS'};
my $CC = $ENV{'CONDOR_CLUSTER'};
my $CP = $ENV{'CONDOR_PROCESS'};
my $out = ".condor_out.$CC.$CP";
my $err = ".condor_err.$CC.$CP";
my $permOut = "condor_out.$CC.$CP";
my $permErr = "condor_err.$CC.$CP";
my $exitStatus;
if (defined $interactive && $interactive == 1) {
if (-e '/usr/bin/caffeinate') { # OSX program to block idle sleeping
$exitStatus = system('caffeinate', '-i', 'bash', '-c', "($cmd)") == 0 ? 0:1;
} else {
$exitStatus = system('bash', '-c', "($cmd)") == 0 ? 0:1;
}
} else {
if (-e '/usr/bin/caffeinate') { # OSX program to block idle sleeping
$exitStatus = system('caffeinate', '-i', 'bash', '-c', "($cmd) 1> $out 2> $err") == 0 ? 0:1;
} else {
$exitStatus = system('bash', '-c', "($cmd) 1> $out 2> $err") == 0 ? 0:1;
}
}
if (exists $ENV{'NOTIFY_NETPORT'}) { # if running notify-netsend daemon
my $sock = IO::Socket::INET->new(
Proto => 'udp',
PeerPort => $ENV{'NOTIFY_NETPORT'},
PeerAddr => $ENV{'QSUB_SUBMITTER'} =~ /@(.+)$/,
);
if ($sock) { # if doesn't work, no big deal
if ($? == 0) {
$sock->send("Condor job $CC.$CP complete:\0$cmd");
} else {
$sock->send("Condor job $CC.$CP non-zero exit ($?):\0$cmd");
}
}
}
if (!defined $interactive || $interactive != 1) {
unlink $out if (-z $out);
unlink $err if (-z $err);
rename $out, $permOut if (-e $out);
rename $err, $permErr if (-e $err);
}
return $exitStatus;
}
# -----------------------------------------------------------------------------
# Takes path and returns whether it is a system path (and thus should
# be ignored for transfers)
sub x_IsSystemPath {
return ($_[0] eq '/usr/bin' || $_[0] eq '/usr/sbin' ||
$_[0] eq '/bin' || $_[0] eq '/sbin');
}
# -----------------------------------------------------------------------------
# Takes string representing potential filename as input. Interpolates
# any shell glob characters (~, ?, *, etc.) and then looks for file in
# the current directory. If not in the current directory, searches
# non-system PATHes. If file found, returns full path; otherwise
# returns string unmodified.
sub x_FindFullPath {
my ($fn) = @_;
#filenames can't have newlines (stat will give error!)
return $fn if ($fn =~ /\n/);
my @globbed = glob($fn);
if (@globbed == 1 && -e $globbed[0] && -f $globbed[0]) {
return $globbed[0];
}
if ($fn !~ /\//) { # if does NOT contain slash (/) then search path
my @pathes = split /:/, $ENV{PATH}; #/;
foreach my $path (@pathes) {
# skip system pathes -- don't want to transfer & exe might
# not work on another system anyway
if (!x_IsSystemPath($path) &&
-f "$path/$fn" && -x "$path/$fn") {
return "$path/$fn";
}
}
}
return $fn;
}
# -----------------------------------------------------------------------------
# signal handler and abort subroutine
sub abort {
if (defined $clusterId) {
if ($runMode eq 'submit') {
`condor_rm $clusterId 2>&1 > /dev/null`;
} else { # $runmode eq 'exec'
unlink <.condor_{out,err}.$clusterId.$ENV{CONDOR_PROCESS}>;
}
}
die @_;
}
sub handler {
my($sig) = @_;
&abort("Killed by SIG$sig.\n");
}