#!/usr/bin/perl -w ########################################################################### # # This is qsub, a very hacked version of condor_run. # # Copyright 2010,2020 Philip Johnson. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published # by the Free Software Foundation, either version 3 of the License, # or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License at for # more details. # # # Version history: # 1.0 --> created 2005 or 2006 # 2.0 --> Jan 2008 [much improved; no longer requires auxilary 'run' file] # 2.1 --> Aug 2008 [enhanced file-transfer ability] # 2.2 --> Apr 2010 [force usage of bash shell for command execution] # 2.3 --> Oct 2012 [add requestcpus option for SMP processing] # 2.4 --> Nov 2012 [UDP notification of job completion] # 2.5 --> Jan 2013 [improved -dry-run to test -transfers] # 2.6 --> Mar 2014 [further tweaks to -dry-run to test -transfers] # 2.7 --> Nov 2014 [added call to caffeinate (no idle sleep) on OSX systems] # 2.8 --> Jan 2020 [made compatible with qsub.slurm] # # Basic idea: create a job definition file ("JDF") for condor_submit # that uses *this* perl script as the executable, passing in the # desired command as an environment variable. Stdout & stderr are # captured in files named '.condor_{out,err}.$CLUSTER.$PROCESS' during # the run and renamed w/o the leading period at the end of the run if any # output was produced (otherwise the files are deleted). If running # the (custom) notify-netsend daemon on submitter's computer, will # display a notification upon job completion. # ########################################################################### use strict; use Getopt::Long; use File::Temp; use IO::Socket; use Cwd; my $clusterId; my (%opts, $runCmd); my $runMode = 'submit'; #by default $opts{n} = 1; $opts{p} = 0; $opts{'numCPU'} = 1; if (!GetOptions("n=s" => \$opts{'n'}, "dry-run|no-submit|interactive" => \$opts{'dry-run'}, "et|enable-transfer" => \$opts{'enable-transfer'}, "preemptable" => \$opts{'preemptable'}, "transfer=s" => sub{ $opts{'transfer'}->{$_[1]}=1 }, "exclude=s" => sub{ $opts{'exclude'}->{$_[1]}=1 }, "priority=i" => \$opts{'p'}, "requirements=s" => \$opts{'req'}, "cpus=i" => \$opts{'numCPU'}, "rt|time|runtime=i" => 0, #used by slurm, not condor "env=s" => sub { my @e = $_[1] =~ /^(.+)=(.+)$/; die "\n-->Bad -env parameter\n\n" if @e != 2; $ENV{$e[0]}=$e[1]}, "force" => \$opts{'f'}, "log=s" => \$opts{'log'}) || (@ARGV == 0 && !exists($ENV{'CONDOR_CMD_LINE'}))) { print "usage: $0 'shell-cmd'\n", "where are any of: \n", "\t-dry-run --> execute directly; do not submit to condor\n", "\t-n <#> --> number of times to repeat command\n", "\t-t --> file to transfer not already named in 'shell-cmd'\n", "\t-et --> enable transfers (-t automatically selects this)\n", "\t-exc --> exclude file from automatic transfer\n", "\t-pre --> enable preemptability & partial output w/ transfers\n", "\t-pri <#> --> priority from -20 (low) to +20 (high), with default = 0\n", "\t-r --> ClassAd job requirements (see condor documentation)\n", "\t-cpus --> Number of CPUs needed (on a single SMP machine)\n", "\t-env X=Y --> set environment variable X to Y when running job\n", "\t-f --> force ignoring of warnings\n", "\t-log --> condor log file (defaults to no log)\n", "\n"; exit 1; } # install signal handler $SIG{'HUP'} = \&handler; $SIG{'INT'} = \&handler; $SIG{'QUIT'} = \&handler; $SIG{'TERM'} = \&handler; # if reentered prog --> execute command stored in envirnment variable if (exists($ENV{'CONDOR_CMD_LINE'})) { $runMode = 'exec'; $clusterId = $ENV{'CONDOR_CLUSTER'}; $ENV{'CONDOR_CMD_LINE'} =~ s/SeMiCoLoN/;/g; #hack (condor hates ;) $ENV{'CONDOR_LCLCMD_LINE'} =~ s/SeMiCoLoN/;/g; #hack (condor hates ;) $ENV{'CONDOR_CMD_LINE'} =~ s/NeWlInE/\n/g; $ENV{'CONDOR_LCLCMD_LINE'} =~ s/NeWlInE/\n/g; my $pwd = `pwd`; chomp $pwd; if ($ENV{'CONDOR_OLD_PWD'} eq $pwd) { #did not transfer ExecCmd($ENV{'CONDOR_CMD_LINE'}); } else { #did transfer # fiddle with execute permissions my @needExecPerm = split(//, $ENV{'TRANSFER_X_PERMISSION'}); my $thisExe = File::Basename::basename($0); my @transferredFiles = grep(!/$thisExe/, sort <*>); if (@needExecPerm == @transferredFiles) { #otherwise we have a logic flaw for (my $i = 0; $i < @transferredFiles; ++$i) { chmod(0700, $transferredFiles[$i]) if ($needExecPerm[$i]); } } $ENV{'PATH'} .= ':.'; #want to find any files that we transfered ExecCmd($ENV{'CONDOR_LCLCMD_LINE'}); } exit 0; } my $cmdline = join(' ', @ARGV); #assume the remainder is the cmd to be executed if ($cmdline eq '-') {#special case to read from stdin $cmdline = ; chomp $cmdline; } #specifying files to tranfer implies enabling transfers if (defined $opts{'transfer'}) { $opts{'enable-transfer'} = 1; } #double-check that all transfer files exist foreach my $file (keys(%{$opts{'transfer'}})) { if (!-e $file) { &abort("Could not find file to transfer: '$file'\n"); } } #parse command line for additional files to transfer my $lclCmdline = ''; #local command line strips paths { foreach my $item (split /(\s+|\'|\"|>|<|\|)/, $cmdline) { if (!exists($opts{'exclude'}->{$item})) { $item = x_FindFullPath($item); if ($item !~ /\n/ && -f $item) { $opts{'transfer'}->{$item} = 1 if $opts{'enable-transfer'}; my ($filename) = reverse($item) =~ /(.+?)(?:\/|$)/; $filename = reverse($filename); $item = './'.$filename; } } $lclCmdline .= $item; } } #for all files to transfer, check which (if any) have execute #permissions -- we'll need to reconstruct this on the remote side (by #default, condor transfers the file to have 300 permissions) $ENV{'TRANSFER_X_PERMISSION'} = ''; foreach my $file (sort {File::Basename::basename($a) cmp File::Basename::basename($b)} keys(%{$opts{'transfer'}})) { $ENV{'TRANSFER_X_PERMISSION'} .= -x $file ? 1 : 0 } # grab current working directory for initial dir in system using automounter my $pwd = `pwd`; chomp $pwd; $ENV{'CONDOR_OLD_PWD'} = $pwd; if (0 && !$opts{'f'} && !$opts{'enable-transfer'} && $pwd !~ /\/Network\// && $pwd !~ /\/net\//) { &abort("WARNING: Are you sure the current directory ($pwd) is accessible from the execute nodes? (reexcute with '-force' if so)\n"); } # note port if running (custom) daemon to recieve notifications over net if (-e "/tmp/notify-net.port") { $ENV{'NOTIFY_NETPORT'} = `cat /tmp/notify-net.port`; } # stash command line in environment variable to be passed into condor $ENV{'CONDOR_CMD_LINE'} = $cmdline; $ENV{'CONDOR_LCLCMD_LINE'} = $lclCmdline; if ($cmdline =~ /;/) { #condor can't handle environment variables containing semicolons.. I use hacky workaround by substituting $ENV{'CONDOR_CMD_LINE'} =~ s/;/SeMiCoLoN/g; $ENV{'CONDOR_LCLCMD_LINE'} =~ s/;/SeMiCoLoN/g; } if ($cmdline =~ /\n/) { #environment variables can't handle newlines $ENV{'CONDOR_CMD_LINE'} =~ s/\n/NeWlInE/g; $ENV{'CONDOR_LCLCMD_LINE'} =~ s/\n/NeWlInE/g; } # if 'dry-run' then just run interactively -- don't hand off to condor if ($opts{'dry-run'}) { $ENV{'CONDOR_CLUSTER'} = 0; $ENV{'QSUB_SUBMITTER'} = $ENV{'USER'}.'@'.`hostname`; if ($opts{'enable-transfer'} && keys(%{$opts{'transfer'}})) { my $tmpdir = File::Temp->newdir(); print STDERR "Would have transfered:\n"; my %basefns; foreach my $fn (keys(%{$opts{'transfer'}})) { print STDERR "\t$fn\n"; my $basefn = File::Basename::basename($fn); $basefns{$basefn} = -M $fn; #last mod time system("cp -p $fn $tmpdir/$basefn"); } my $cwd = Cwd::cwd(); chdir $tmpdir; $ENV{'PATH'} = join(':', grep({x_IsSystemPath($_);} split(/:/, $ENV{'PATH'})),'.'); print STDERR "Using path:\n\t", $ENV{'PATH'}, "\n"; print STDERR "Executing $lclCmdline\n"; my $exitStatus = 0; for (my $i = 0; $i < $opts{n} && $exitStatus == 0; ++$i) { $ENV{'CONDOR_PROCESS'} = $i; $exitStatus = ExecCmd($lclCmdline, 1); #1 means interactive } my @newfiles = grep {!exists($basefns{$_}) || ($basefns{$_} > -M $_)} <*>; if (@newfiles) { print STDERR "Would have transferred back:\n"; print "\t", join("\n\t",@newfiles), "\n"; system("cp -p ".join(' ', @newfiles)." $cwd"); } chdir $cwd; #so can delete tmpdir exit($exitStatus); } print STDERR "Executing $cmdline\n"; exit(ExecCmd($cmdline, 1)); #1 means interactive } # ----------------------------------------------------------------------------- # create a job description file to submit the shell script to Condor my $JDF = new File::Temp() || &abort("Failed to create temporary (JDF) file.\n"); print $JDF " universe = vanilla executable = $0 initialdir = $pwd requestcpus = $opts{'numCPU'} "; if (defined $opts{'log'}) { print $JDF "log = $opts{'log'}\n"; } if (!$opts{'enable-transfer'}) { print $JDF "should_transfer_files = NO "; } else { if ($opts{'preemptable'}) { #note we add +FileSystemDomain to ClassAd because condor will only automatically supply FileSystemDomain if transfer == NO or IF_NEEDED my $submitFilesystem = `condor_config_val FILESYSTEM_DOMAIN 2> /dev/null` || &abort("Failed to run condor_config_val. ". "Please check your path.\n"); chomp $submitFilesystem; print $JDF "should_transfer_files = YES when_to_transfer_output = ON_EXIT_OR_EVICT +Preemptable = True +FileSystemDomain = \"$submitFilesystem\" "; } else { print $JDF "should_transfer_files = IF_NEEDED when_to_transfer_output = ON_EXIT "; } print $JDF "transfer_input_files = "; my @files = keys(%{$opts{'transfer'}}); for (my $i=0; $i < @files; ++$i) { print $JDF ", " if $i > 0; print $JDF $files[$i]; } print $JDF "\n"; } if (defined $opts{'req'}) { print $JDF "requirements = ".$opts{'req'}."\n"; } print $JDF "notification = NEVER rank = ((TARGET.FileSystemDomain == MY.FileSystemDomain) * 1000) + (COLLECTOR_HOST_STRING != Machine) * 100 + (TotalSlots - TotalLoadAvg) * 10 + (TotalSlots - SlotID) getenv = True environment = CONDOR_CLUSTER=\$(Cluster);CONDOR_PROCESS=\$(Process);QSUB_SUBMITTER=\$\$([User]) priority = $opts{p} queue $opts{n} "; close($JDF) || &abort("Failed to write temporary (JDF) file.\n"); # ----------------------------------------------------------------------------- # submit the job; $clusterId contains cluster number if successful open(SUBMIT, "condor_submit $JDF 2>&1 |") || &abort("Failed to run condor_submit. Please check your path.\n"); while() { print $_; if (/job\(s\) submitted to cluster (\d+)./) { $clusterId = $1; } } close(SUBMIT) || &abort("Failed to run condor_submit. Please check your path.\n"); if (!defined $clusterId) { &abort("Did not successfully submit job.\n"); } # ----------------------------------------------------------------------------- sub ExecCmd { my ($cmd, $interactive) = @_; if (!exists($ENV{'SLURM_ARRAY_JOB_ID'})) { #compatibility w/ slurm $ENV{'SLURM_ARRAY_JOB_ID'} = $ENV{'CONDOR_CLUSTER'}; $ENV{'SLURM_ARRAY_TASK_ID'} = $ENV{'CONDOR_PROCESS'}; } $ENV{'JOB_ID'} = $ENV{'CONDOR_CLUSTER'}; #generic env vars $ENV{'JOB_SUBID'} = $ENV{'CONDOR_PROCESS'}; my $CC = $ENV{'CONDOR_CLUSTER'}; my $CP = $ENV{'CONDOR_PROCESS'}; my $out = ".condor_out.$CC.$CP"; my $err = ".condor_err.$CC.$CP"; my $permOut = "condor_out.$CC.$CP"; my $permErr = "condor_err.$CC.$CP"; my $exitStatus; if (defined $interactive && $interactive == 1) { if (-e '/usr/bin/caffeinate') { # OSX program to block idle sleeping $exitStatus = system('caffeinate', '-i', 'bash', '-c', "($cmd)") == 0 ? 0:1; } else { $exitStatus = system('bash', '-c', "($cmd)") == 0 ? 0:1; } } else { if (-e '/usr/bin/caffeinate') { # OSX program to block idle sleeping $exitStatus = system('caffeinate', '-i', 'bash', '-c', "($cmd) 1> $out 2> $err") == 0 ? 0:1; } else { $exitStatus = system('bash', '-c', "($cmd) 1> $out 2> $err") == 0 ? 0:1; } } if (exists $ENV{'NOTIFY_NETPORT'}) { # if running notify-netsend daemon my $sock = IO::Socket::INET->new( Proto => 'udp', PeerPort => $ENV{'NOTIFY_NETPORT'}, PeerAddr => $ENV{'QSUB_SUBMITTER'} =~ /@(.+)$/, ); if ($sock) { # if doesn't work, no big deal if ($? == 0) { $sock->send("Condor job $CC.$CP complete:\0$cmd"); } else { $sock->send("Condor job $CC.$CP non-zero exit ($?):\0$cmd"); } } } if (!defined $interactive || $interactive != 1) { unlink $out if (-z $out); unlink $err if (-z $err); rename $out, $permOut if (-e $out); rename $err, $permErr if (-e $err); } return $exitStatus; } # ----------------------------------------------------------------------------- # Takes path and returns whether it is a system path (and thus should # be ignored for transfers) sub x_IsSystemPath { return ($_[0] eq '/usr/bin' || $_[0] eq '/usr/sbin' || $_[0] eq '/bin' || $_[0] eq '/sbin'); } # ----------------------------------------------------------------------------- # Takes string representing potential filename as input. Interpolates # any shell glob characters (~, ?, *, etc.) and then looks for file in # the current directory. If not in the current directory, searches # non-system PATHes. If file found, returns full path; otherwise # returns string unmodified. sub x_FindFullPath { my ($fn) = @_; #filenames can't have newlines (stat will give error!) return $fn if ($fn =~ /\n/); my @globbed = glob($fn); if (@globbed == 1 && -e $globbed[0] && -f $globbed[0]) { return $globbed[0]; } if ($fn !~ /\//) { # if does NOT contain slash (/) then search path my @pathes = split /:/, $ENV{PATH}; #/; foreach my $path (@pathes) { # skip system pathes -- don't want to transfer & exe might # not work on another system anyway if (!x_IsSystemPath($path) && -f "$path/$fn" && -x "$path/$fn") { return "$path/$fn"; } } } return $fn; } # ----------------------------------------------------------------------------- # signal handler and abort subroutine sub abort { if (defined $clusterId) { if ($runMode eq 'submit') { `condor_rm $clusterId 2>&1 > /dev/null`; } else { # $runmode eq 'exec' unlink <.condor_{out,err}.$clusterId.$ENV{CONDOR_PROCESS}>; } } die @_; } sub handler { my($sig) = @_; &abort("Killed by SIG$sig.\n"); }