#!/usr/local/bin/perl
# forces sh to find perl in above path and run.  This is to remain
# compatible with kernels that don't recognize the #! exec style
#
# The idea behind this tool originally was we wanted to do something on
# each machine.  The old tool would serially go to each machine run the
# command, wait for it to finish, and continue to the next machine.
# I saw no reason why this couldn't be done in parallel.  The problems,
# however, were many.  First of all, the output from finishing parallel
# jobs needs to be buffered in such a way that different machines wouldn't
# output their results on top of eachother.  A final bit was added because
# Tony wanted output to be alphabetical rather than first-done, first-seen.
# The result is a parallel job spawner that displays output from the machines
# alphabetically, as soon as it is available.  If consort take longer than
# everyone else, there will be no output past consort until consort is
# finished.  As soon as consort is finished, though, everyone's output
# is printed.
#
# $Header: /kees/projects/gsh/RCS/gsh,v 1.14 2001/11/28 12:42:53 nemesis Exp $
#
# Copyright (C) 1998,1999,2000 Cornelius Cook
# Supposedly based on original code distributed with Perl Distribution.
# cook@cpoint.net, http://outflux.net/
#
# Thanks to:
#	- whoever originally gave this idea to: Mike Murphy
#	- Mike Murphy for actually implementing this at a site
#	- Paul Holcomb for various fixes
#	- Nick Asvos for finding an out-of-memory bug and reporting it
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
# http://www.gnu.org/copyleft/gpl.html
#

$VERSION="0.9.1";

use POSIX "sys_wait_h";

require "sysadmin.pl";

#$debug=1;		# turns on debuging & slows down reap loop
#$showpid=1;		# shows PIDs
#$signals=1;		# uses the child signal handler
$| = 1;
$remote=1;
$locally=1;
$showhost=1;
$silent=1;

# set up signal handlers: we must die gracefully and attempt to kill children
$SIG{'QUIT'} = 'quit';			# install signal handler for SIGQUIT
$SIG{'INT'} = 'quit';			# install signal handler for Ctrl-C
$SIG{'USR1'} = 'ReportWaiting';		# install USR1 handler
# getting this signal usually means that ssh is asking a question
#$SIG{'TTIN'} = 'IGNORE';		# stop waiting for input
# in case we miss a child finishing during the forking time,
#  we want to catch it, rather than have it get blocked and forgotten.
$SIG{'CHLD'} = 'catch' if ($signals);

$n=" -n";		# for ssh to not want STDIN


sub getswitches {
    while ($ARGV[0] =~ /^[-\+]/) {		# parse switches
	$ARGV[0] =~ /^-h/ && ($showhost=0,shift(@ARGV),next);
	$ARGV[0] =~ /^-s/ && ($silent=0,shift(@ARGV),next);
	$ARGV[0] =~ /^-d/ && ($debug=1,shift(@ARGV),next);
	$ARGV[0] =~ /^-n/ && ($n='',shift(@ARGV),next);
	$ARGV[0] =~ /^-r/ && ($remote=0,shift(@ARGV),next);
	$ARGV[0] =~ /^-o/ && ($locally=0,shift(@ARGV),next);
	$ARGV[0] =~ /^-l/ && ($l=' -l ' . $ARGV[1],shift(@ARGV),shift(@ARGV),
				next);
	last;
    }
}

do getswitches();			# get any switches before class
$systype = shift(@ARGV);		# get name representing set of hosts
do getswitches();			# same switches allowed after class

$cmd = join(' ',@ARGV);			# remaining args constitute the command
$cmd =~ s/'/'"'"'/g;			# quote any embedded single quotes

die
"Usage: gsh [+r] [-h] [-s] [+n] [-l username] ghost-line cmd
version $VERSION

-r	runs the command locally with '\$host' replaced
-h	do NOT display machine name at beginning of each output line
-s	displays commandline for each report
-n	turns OFF the -n argument passed to the ssh's
-l	uses username for the ssh's
-d	turn on debugging
-o	do NOT run command on 'self' locally, instead use ssh
" if ($cmd eq "");

&LoadGhosts;
&SetBACKBONES($systype);

# for each machine that matched the ghosts systype do the following:
$oldcmd = $cmd;
foreach $host (@BACKBONES) {

	# clear this machine's output buffer
	$output{$host}="";

	# make a column header for this machine if needed
	if ($showhost) {
		$showlist{$host} = "$host:\t";
	}

#	push(@tried,$host);
	# do the fork
	$pid = fork();				# fork
	if ($pid==0) {

		close(STDIN) if ($n ne "");	# get rid of STDIN
		open(STDOUT,">/tmp/gsh.$$");	# open stdout to tmp file
		open(STDERR,">&STDOUT");	# dup stderr to stdout
						# this results in rather
						# broken output sometimes
						# maybe have two files?

		select(STDERR); $|=1;		# set outputs to unbuffered
		select(STDOUT); $|=1;

		$cmd = $oldcmd;
		$cmd =~ s/\$host/$host/gi;

		if ( (defined($locally) && ($host eq $SYSADM_HOST)) ||
		      $remote != 1 ) {
			exec "$cmd 2>&1";	# exec the rsh
		} else {
			exec "ssh $n$l -o BatchMode=yes $host '$cmd'$dist 2>&1";  # exec the rsh
		}

		# should never get to next line
		die "Exec of rsh to $host failed!\n";
	}
	elsif (!$pid) {				# report failures
		# !$pid is true for 0 also...
		warn "Couldn't fork to '$host': $!\n";
	}
	else {
		print "#spawned $pid for $host\n" if ($debug);
		print STDERR "$host " if ($showpid);
		$pidlist{$pid}=$host;		# record the child's pid
	}
#    }
}
close(STDIN);

$waitfail=0;
#$forked = join(' ',@tried);

# sometimes wait will return a -1.  I'm not sure what this is.  I've read
# too many different man pages on wait, and r3,r4, and aix all handle things
# differently.  My solution is to ignore -1's, and continue waiting.

# but since I'm not using r3 anymore, $signal is undef, and I use waitpid

$cycles = 0;
@left = keys %pidlist;
$togo = $#left;
$before = $togo;
while (defined($togo)) {
	# every 5 cycles (cycle == .5 seconds) we should EXPLICITLY
	# wait on a child.  Sometimes children don't get reaped
	# correctly by the SIGCHLD handler, so we need to wait
	# on them and call the handler directly.  This seemed to
	# fix all my problems with catching children.

	# the debugging output will show lists of what machine are still
	# being waited on, etc
	print STDERR "[$togo]\n" if ($debug);

	if ($before != $togo) {
		$cycles = 0;
		$before = $togo;
	}
	else {
		$cycles ++;
	}

	if ($cycles >= ($signals ? 5 : 0)) {
		if ($viewwaiting || $debug) {
			$viewwaiting=0;
			print STDERR "Waiting on: ";
			foreach (keys %pidlist) {
				print STDERR "$pidlist{$_} ";
			}
			print STDERR "\n";
		}
		# if we catch something greater than 0, call SIGCHLD directly
		if (($pid = waitpid(-1,&WNOHANG))>0) {
			&catch('',$pid);
		}
	}

	# this loop checks to see if there is any output waiting to be
	# printed.  Since we're going it alphabetically by machine name,
	# it will quit immediately if it comes across an "empty" output
	# in the alpha-sorted list of keys.
	# a lone "." means that a machine finished without any output
	OUTPUTLOOP:
	foreach $key (sort keys %output) {
		if ($output{$key} ne "") {
			print $output{$key} unless ($output{$key} eq ".");
			delete $output{$key};
		}
		else {
			last OUTPUTLOOP;
		}		
	}

	# wait for a half second
	select(undef,undef,undef,0.5) if ($debug);
	# see which processes are left
	@left = keys %pidlist;
	# update the "how many are left?" counters
	if (@left) {
		$togo = $#left;
	}
	else {
		undef $togo;
	}
}

# handle any other output that hadn't been printed yet
foreach $key (sort keys %output) {
	if ($output{$key} ne "") {
		print $output{$key} unless ($output{$key} eq ".");
		delete $output{$key};
	}
}

#print "skipped machines: $forked\n";
#@tried=split(/\s+/,$forked);
#
#foreach (@tried) {
#	print "No report: $_\n";
#}

exit(0);



# subroutines


sub quit {
    $| = 1;
    print "\r\n#caught SigInt...\n" if ($debug);
    # clear handlers
    $SIG{'INT'} = '';
    $SIG{'QUIT'} = '';
# for each child, kill the child, then unlink it's output file
    foreach $pid (keys %pidlist) {
	print "#cleaning up pid: $pid\n" if ($debug);
	kill 2, $pid;
	unlink("/tmp/gsh.$pid");
    }
    # kill self
    kill 2, $$;
}

# sig handler for when a child dies
sub catch {
	# first arg is signal caught, second only comes if we force a call
	local($undef,$forwarded) = @_;
	local($pid,$READ,$host,$type);

	if ($forwarded) {
		$pid = $forwarded;
		$type = "forwarded";
	}
	else {
		# get the pid of the dead child
		$pid = wait;
		$type = "caught";
	}
	# yell if wait is lying to us
	if ($pid < 0) {
		print "Missed a child??!  May have to Ctrl-C out.\n";
	}
	else {
		# which machine finished?
		$host = $pidlist{$pid};
		print "\n#$type $pid $host\n" if ($debug);
		$output{$host} .= "$cmd\n" unless $silent;
		# make a unique filehandle name: handler needs to be reentrant
		$READ = time . "$pid";
		if (!open($READ,"</tmp/gsh.$pid")) {
			$output{$host} .= "$showlist{$host}error with output recording\n";
		}
		while (<$READ>) {
			$output{$host} .= $showlist{$host} . $_;
		}
		# if there was no output, signal to the output printing loops
		if ($output{$host} eq "") {
			$output{$host} = ".";
		}
#		if ($output{$host} eq "" && $showlist{$host} ne "") {
#			$output{$host} = "$showlist{$host}\n";
#		}
		close($READ);
		unlink("/tmp/gsh.$pid");	# clean up
#		$forked =~ s/$pidlist{$pid}//;
		delete $pidlist{$pid};		# remove from pending pid list
	}
}

sub ReportWaiting {
	$viewwaiting=1;
	# on bad systems, you may need to do this
	#$SIG{'USR1'} = 'ReportWaiting';		# install USR1 handler
}

