/*
 ********************************************************************
 *
 * launchagent.c - helper application for the launcher
 *
 *     CSIRO Automation
 *     Queensland Centre for Advanced Technologies
 *     PO Box 883, Kenmore, QLD 4069, Australia
 *     www.cat.csiro.au/cmst
 *
 *      $Id: launchagent.c 1770 2007-01-17 07:44:35Z pra077 $
 *
 * Copyright (c) CSIRO Manufacturing Science & Technology
 *
 ********************************************************************
 */

static char *rcsid = "$Id: launchagent.c 1770 2007-01-17 07:44:35Z pra077 $";

/**
 ********************************************************************
 * 
 * \file launchagent.c
 * \brief helper application for the launcher
 * \author Pavan Sikka
 */

/**
 * \page launchagent helper application for launcher
 *
 * This program is used by the launcher to startup and shutdown
 * programs on diffent machines. When the launcher starts up, it
 * uses the "rsh" program to start launchagent on each
 * target machine. The launchagent then establishes a TCP/IP
 * connection with the launcher for further communication.
 *
 * The program understands three commands: LAUNCH, KILL and DONE.
 *
 * The SIGRTMIN signal is used as an indication that the
 * application it started has come up in a satisfactory state.
 * The application should send its pid as the data associated
 * with the signal. The rtx_main_wait_shutdown() function
 * provides this facility to applications.
 *
 * The CHLD signal is used as an indication that a launched 
 * application has died. 
 *
 * The signals are blocked and the program uses sigwait() to 
 * respond to signals.
 *
 * The program shuts down if any internal errors are detected.
 *
 */

#include <unistd.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sched.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>

#include <rtx/main.h>
#include <rtx/message.h>
#include <rtx/thread.h>
#include <rtx/mutex.h>
#include <rtx/sem.h>
#include <rtx/auth.h>
#include <rtx/lock.h>
#include <rtx/error.h>
#include <rtx/timer.h>
#include <rtx/signal.h>
#include <rtx/getopt.h>

#include "launch.h"

/* forward definitions */

int putenv (char * string);

/* threads */
void * launchagent_reader_thread (void * arg);
void * launchagent_writer_thread (void * arg);
void * launchagent_child_up_thread (void * arg);
void * launchagent_child_done_thread (void * arg);
void launchagent_shutdown_thread (LaunchAgent * l, int errors);
int launchagent_startup (LaunchAgent * l);
int launchagent_shutdown (LaunchAgent * l);
void launchagent_cleanup_statefile();
void launchagent_remove_statefile();
void launchagent_update_statefile(RtxList * list);

/* data */

/**
  The launch structure
  */
static LaunchAgent launchagent;

/**
 * The command-line options
 */
static RtxGetopt launchagentOpts[] = {
	{"launchhost", "Launch hostname", 
		{
			{RTX_GETOPT_STR, &launchagent.launchHostName, "hostname"},
			RTX_GETOPT_END_ARG
		}
	},
	{"directory", "Launch directory", 
		{
			{RTX_GETOPT_STR, &launchagent.dir, "directory"},
			RTX_GETOPT_END_ARG
		}
	},
	{"cfgdir", "Exported config file directory", 
		{
			{RTX_GETOPT_STR, &launchagent.confdir, "directory"},
			RTX_GETOPT_END_ARG
		}
	},
	{"preclean", "Check and tidy for leftovers from previous runs", 
		{
			{RTX_GETOPT_INT, &launchagent.cleanup, "bool"},
			RTX_GETOPT_END_ARG
		}
	},
	RTX_GETOPT_END
};
char * launchagentHelpStr = NULL;

/**
 * launch helper
 *
 * The main function for the launchagent. This function initializes
 * the data structures, launches the threads, informs the launcher
 * that its ready and then waits for shutdown. The shutdown can be
 * initiated by the launcher with the DONE command, or by an internal
 * error. In either case, the program cleans up and exits.
 *
 */
int
main (
		int argc,          /**< number of tokens on the command line */
		char * argv[]      /**< command-line tokens */
	 )
{
	LaunchAgent * l = &launchagent;
	LaunchMsgNode * nd = NULL;
	int errs = 0;
	pid_t myPid;
	char * p = NULL;

	/* let the parent process exit - its not needed anymore */

	if ((myPid = fork ()) < 0) {
		fprintf (stderr, "fork() failed: %s\n", strerror (errno));
		exit (1);
	} else if (myPid != 0) {
		exit (0);
	}

	/* close standard i/o */

	close (STDIN_FILENO);
	dup2 (STDERR_FILENO,STDOUT_FILENO);

	/* become session leader */

	setsid ();

	/* now, resume normal programming */

	rtx_signal_block (SIGTSTP);
	rtx_signal_block (SIGINT);
	rtx_signal_block (SIGQUIT);
	rtx_signal_block (SIGTERM);
	rtx_signal_block_realtime ();

	/* This program is started by the launcher and may not
	 * have a console/terminal to write to. Send all messages
	 * to the message daemon.
	 */
	rtx_message_init ("launchagent", RTX_MESSAGE_STDERR | RTX_MESSAGE_MESSAGE);
	if (rtx_error_init ("launchagent", RTX_MESSAGE_STDERR | RTX_ERROR_MESSAGE,
				NULL) == -1) {
		rtx_message_warning ("rtx_error_init()");
		exit (1);
	}

	/* get own hostname/pid */
	if (gethostname (l->myHostName, BUFSIZ) == -1)
		return (rtx_error_errno ("gethostname()"));
	/* strip the domain name from the hostname */
	if ((p = strchr (l->myHostName, '.')) != NULL)
		p[0] = '\0';
	l->myPid = getpid ();
	l->baseprio = LAUNCH_PRIO;
	l->verbose = 0;
	l->launchHostName = l->myHostName;
	l->dir = ".";
	l->confdir = ".";

	/* get user-specified options */
	if ((errs = RTX_GETOPT_CMD (launchagentOpts, argc, argv, rcsid, 
					launchagentHelpStr)) <= 0) {
		if (errs == -1)
			RTX_GETOPT_PRINT (launchagentOpts, argv[0], rcsid, launchagentHelpStr);
		exit (1);
	}    
	errs = 0;
	l->verbose = atoi (rtx_getopt_get_debug ("0"));
	l->baseprio = rtx_getopt_get_int ("priority");

	if (l->cleanup) {
		rtx_message("LaunchAgent started in clean up mode");
		launchagent_cleanup_statefile();
		rtx_message("Exiting from clean up mode");
		exit(0);
	} else {
		rtx_message("LaunchAgent started in normal mode");
	}
	l->cleanup = 0;

	/* startup the launchagent */
	if (launchagent_startup (l) == -1) {
		rtx_error_flush ("launchagent_startup[%s]", l->launchHostName);
		exit (1);
	}
	/* send a message to the launcher to indicate readiness */
	if ((nd = launch_create_msg (LAUNCH_MSG_START, 
					l->myHostName)) != NULL) {
		if (rtx_list_enqueue (l->events, nd) == -1) {
			rtx_error ("rtx_list_enqueue() failed");
			l->errors++;
		}
		if (rtx_sem_post (l->eventSem) == -1) {
			rtx_error ("rtx_sem_post() failed");
			l->errors++;
		}
	} else {
		rtx_error ("launch_create_msg() failed");
		l->errors++;
	}

	/* wait for shutdown */
	if (! l->errors)
		if (rtx_main_wait_shutdown (0) == -1) {
			rtx_message_warning ("rtx_init_wait_shutdown () failed");
			rtx_error ("rtx_init_wait_shutdown () failed");
		}

	if (l->verbose)
		rtx_message ("shutting down [%d]", l->errors);
	if (launchagent_shutdown (l)) {
		rtx_error_flush ("launchagent_shutdown");
		exit (1);
	}
	launchagent_remove_statefile();
	exit (0);
}

/**
 * the reader thread
 *
 * reads commands from the launcher and then carries them
 * out. 
 */
void *
launchagent_reader_thread (
		void * arg    /**< pointer to LaunchAgent
					   **  structure */
		)
{
	LaunchAgent * l = (LaunchAgent *) arg;
	char * buf = NULL;
	int i = 0, n;
	char * myargv[64];
	LaunchMsgNode * nd = NULL;
	LaunchAppNode * appNd = NULL;
	pid_t childPid;

	if ((buf = calloc (1, 1024)) == NULL) {
		rtx_error ("unable to allocate buffer");
		launchagent_shutdown_thread (l, 1);
	}
	if (l->verbose)
		rtx_message ("launchagent_reader_thread: up");
	while (! l->done) {
		if (l->verbose)
			rtx_message ("launchagent_reader_thread: waiting");
		if ((n = rtx_inet_readline (l->launchSock->sock, buf, 
						1024, NULL)) == -1) {
			if (! l->done) {
				rtx_error_flush ("rtx_inet_readline");
				launchagent_shutdown_thread (l, 1);
			}
		}
		if (l->done)
			continue;
		if (n == 0) {
			/* end-of-file */
			rtx_error_flush ("rtx_inet_readline returned 0");
			launchagent_shutdown_thread (l, 1);
		}
		if (l->verbose)
			rtx_message ("launchagent_reader_thread: rx: %s", buf);
		/* Have a complete message for processing */
		if ((nd = launch_parse_msg (buf)) == NULL) {
			rtx_error_flush ("launch_parse_msg() failed [%s]",
					buf);
			continue;
		}
		if (l->verbose)
			rtx_message ("launchagent_reader_thread: msg parsed");
		/* Have a valid message for processing */
		switch (nd->msgType) {
			case LAUNCH_MSG_LAUNCH :
				if (l->verbose)
					rtx_message ("launchagent_reader_thread: "
							"LAUNCH_MSG_LAUNCH");
				if ((appNd = rtx_list_lookup 
							(l->apps, nd->appName)) == NULL) {
					if ((appNd = (LaunchAppNode *) calloc
								(1, sizeof (LaunchAppNode))) == NULL) {
						rtx_error_flush ("no memory");
						launchagent_shutdown_thread (l, 1);
					}
					/* fill in the details */
					if ((appNd->appName = strdup (nd->appName))
							== NULL) {
						rtx_error_flush ("no memory");
						launchagent_shutdown_thread (l, 1);
					}
					if ((appNd->hostName = strdup (nd->hostName))
							== NULL) {
						rtx_error_flush ("no memory");
						launchagent_shutdown_thread (l, 1);
					}
					if ((appNd->command = strdup (nd->command))
							== NULL) {
						rtx_error_flush ("no memory");
						launchagent_shutdown_thread (l, 1);
					}
					appNd->status = 0;
					appNd->appType = nd->appType;
					appNd->timeout = nd->timeout;
					if (rtx_list_add (l->apps, appNd->appName,
								appNd) == -1) {
						rtx_error_flush ("rtx_list_add");
						launchagent_shutdown_thread (l, 1);
					}
				}
				if (l->verbose)
					rtx_message ("launchagent_reader_thread: "
							"found app node");
				/* have app node */
				/* Tokenize the command for use with execvp() */
				i = 0;
				myargv[i] = strtok (nd->command, "\n\t ");
				while ((i < 63) && (myargv[i] != NULL)) {
					myargv[++i] = strtok (NULL, "\n\t ");
				}
				/* lock the event Q so no events can be added */
				if (rtx_mutex_lock (l->eventQ) == -1) {
					rtx_error_flush ("rtx_mutex_lock() failed");
					launchagent_shutdown_thread (l, 1);
				}
				//rtx_message("Forking %s",myargv[0]);
				if ((childPid = fork ()) == -1) {
					if ((nd = launch_create_msg 
								(LAUNCH_MSG_LAUNCH_FAILED, appNd->appName))
							== NULL) {
						rtx_error_flush ("launch_create_msg() "
								"failed");
						rtx_mutex_unlock (l->eventQ);
						launchagent_shutdown_thread (l, 1);
					}
					strncpy (nd->errMsg, strerror (errno), 127);
					if (rtx_list_enqueue (l->events, nd) == -1) {
						rtx_error_flush ("rtx_list_enqueue() "
								"failed");
						rtx_mutex_unlock (l->eventQ);
						launchagent_shutdown_thread (l, 1);

					}
					if (rtx_mutex_unlock (l->eventQ) == -1) {
						rtx_error_flush ("rtx_mutex_unlock() "
								"failed");
						launchagent_shutdown_thread (l, 1);
					}
					if (rtx_sem_post (l->eventSem) == -1) {
						rtx_error_flush ("rtx_sem_post() "
								"failed");
						launchagent_shutdown_thread (l, 1);
					}
					break;
				}
				if (childPid == 0) { /* child */
					int newstdout;
					rtx_mutex_unlock (l->eventQ);
					setsid ();
					//rtx_message("Starting %s",myargv[0]);
					close (STDIN_FILENO);
					/* close makes printf becomes inpredictible 
					 * close (STDOUT_FILENO);
					 * */
					newstdout = open("/dev/null",O_APPEND|O_WRONLY);
					if (newstdout < 0) {
						dup2(STDERR_FILENO,STDOUT_FILENO);
					} else {
						dup2(newstdout,STDOUT_FILENO);
					}
					close (l->launchSock->sock->sockfd);
					putenv ("LAUNCHER=TRUE");
					if (l->confdir) {
						setenv ("LAUNCHER_CONFIGPATH",l->confdir,1);
					}
					if (execvp (myargv[0], &(myargv[0])) == -1) {
						rtx_error_errno_flush ("execvp(%s) failed",
								nd->appName);
						exit (1);
					}
				}
				/* Parent */
				if (l->verbose)
					rtx_message ("launchagent_reader_thread: "
							"app launched");
				appNd->appPid = childPid;
				appNd->status = 1;
				if (appNd->appType != LAUNCH_APP_AWARE) {
					if ((nd = launch_create_msg 
								(LAUNCH_MSG_LAUNCH_DONE, appNd->appName))
							== NULL) {
						rtx_error_flush ("launch_create_msg() "
								"failed");
						rtx_mutex_unlock (l->eventQ);
						launchagent_shutdown_thread (l, 1);
					}
					if (l->verbose)
						rtx_message ("launchagent_reader_thread: "
								"msg created");
					nd->appPid = childPid;
					if (rtx_list_enqueue (l->events, nd) == -1) {
						rtx_error_flush ("rtx_list_enqueue() "
								"failed");
						rtx_mutex_unlock (l->eventQ);
						launchagent_shutdown_thread (l, 1);
					}
					if (l->verbose)
						rtx_message ("launchagent_reader_thread: "
								"msg enqueued");
					if (rtx_mutex_unlock (l->eventQ) == -1) {
						rtx_error_flush ("rtx_mutex_unlock() "
								"failed");
						launchagent_shutdown_thread (l, 1);
					}
					if (rtx_sem_post (l->eventSem) == -1) {
						rtx_error_flush ("rtx_sem_post() "
								"failed");
						launchagent_shutdown_thread (l, 1);
					}
					if (l->verbose)
						rtx_message ("launchagent_reader_thread: "
								"sem posted");
				} else {
					if (rtx_mutex_unlock (l->eventQ) == -1) {
						rtx_error_flush ("rtx_mutex_unlock() "
								"failed");
						launchagent_shutdown_thread (l, 1);
					}
				}
				launchagent_update_statefile(l->apps);
				break;
			case LAUNCH_MSG_KILL :
				if ((appNd = rtx_list_lookup 
							(l->apps, nd->appName)) == NULL) {
					rtx_error_flush ("app name [%s] not found",
							nd->appName);
					launchagent_shutdown_thread (l, 1);
				}
				/* have app node */
				if (kill (appNd->appPid, (appNd->status>0)?SIGTERM:SIGKILL) == -1) {
					if ((nd = launch_create_msg 
								(LAUNCH_MSG_KILL_FAILED, appNd->appName))
							== NULL) {
						rtx_error_flush ("launch_create_msg() "
								"failed");
						launchagent_shutdown_thread (l, 1);
					}
					strncpy (nd->errMsg, strerror (errno), 127);
					if (rtx_list_enqueue (l->events, nd) == -1) {
						rtx_error_flush ("rtx_list_enqueue() "
								"failed");
						launchagent_shutdown_thread (l, 1);
					}
					break;
					if (rtx_sem_post (l->eventSem) == -1) {
						rtx_error_flush ("rtx_sem_post() "
								"failed");
						launchagent_shutdown_thread (l, 1);
					}
				}
				/* -1 means that this object has been killed once already */
				if (appNd->status == 1)
					appNd->status = -1;
				break;
			case LAUNCH_MSG_DONE :
				l->done = 1;
				rtx_main_signal_shutdown ();
				break;
			case LAUNCH_MSG_START :
			case LAUNCH_MSG_LAUNCH_DONE :
			case LAUNCH_MSG_LAUNCH_FAILED :
			case LAUNCH_MSG_KILL_FAILED :
			case LAUNCH_MSG_KILL_DONE :
			case LAUNCH_MSG_NONE :
			default :
				rtx_error_flush ("message not valid [%s]", buf);
				break;
		}
	}
	free (buf);
	pthread_exit (NULL);
	return (NULL);
}

/**
 * writer thread
 *
 * this function fetches a message from the event Q and then
 * sends the corresponding message to the main launcher.
 */
void *
launchagent_writer_thread (
		void * arg    /**< pointer to LaunchAgent
					   **  structure */
		)
{
	LaunchAgent * l = (LaunchAgent *) arg;
	LaunchMsgNode * msgNd = NULL;
	char * buf = NULL;

	if ((buf = calloc (1, 1024)) == NULL) {
		rtx_error ("unable to allocate buffer");
		launchagent_shutdown_thread (l, 1);
	}
	if (l->verbose)
		rtx_message ("launchagent_writer_thread: up");
	while (! l->done) {
		if (l->verbose)
			rtx_message ("launchagent_writer_thread: waiting");
		if (rtx_sem_wait (l->eventSem) == -1) {
			rtx_error_flush ("rtx_sem_wait() failed");
			launchagent_shutdown_thread (l, 1);
		}
		if (l->done)
			continue;
		if (l->verbose)
			rtx_message ("launchagent_writer_thread: "
					"request signalled");
		/* a request is now available in the Q */
		if ((msgNd = rtx_list_dequeue (l->events)) == NULL) {
			rtx_error_flush ("rtx_list_dequeue() --> NULL");
			launchagent_shutdown_thread (l, 1);
		}
		if (l->verbose)
			rtx_message ("launchagent_writer_thread: "
					"got request");
		/* act on the request */
		switch (msgNd->msgType) {
			case LAUNCH_MSG_START :
				if (l->verbose)
					rtx_message ("launchagent_writer_thread: "
							"LAUNCH_MSG_START [%d]",
							l->launchSock->sock->sockfd);
				sprintf (buf, "START %s %d\n", msgNd->hostName,
						(int) msgNd->appPid);
				if (rtx_inet_write (l->launchSock->sock, buf,
							strlen (buf), NULL) == -1) {
					rtx_error_flush ("rtx_inet_write() failed");
					launchagent_shutdown_thread (l, 1);
				}
				break;
			case LAUNCH_MSG_LAUNCH_DONE :
				if (l->verbose)
					rtx_message ("launchagent_writer_thread: "
							"LAUNCH_MSG_LAUNCH_DONE [%d]",
							l->launchSock->sock->sockfd);
				sprintf (buf, "LAUNCH_DONE %s %d\n", msgNd->appName,
						(int) msgNd->appPid);
				if (rtx_inet_write (l->launchSock->sock, buf,
							strlen (buf), NULL) == -1) {
					rtx_error_flush ("rtx_inet_write() failed");
					launchagent_shutdown_thread (l, 1);
				}
				break;
			case LAUNCH_MSG_LAUNCH_FAILED :
				if (l->verbose)
					rtx_message ("launchagent_writer_thread: "
							"LAUNCH_MSG_LAUNCH_FAILED");
				sprintf (buf, "LAUNCH_FAILED %s %s\n", msgNd->appName,
						msgNd->errMsg);
				if (rtx_inet_write (l->launchSock->sock, buf,
							strlen (buf), NULL) == -1) {
					rtx_error_flush ("rtx_inet_write() failed");
					launchagent_shutdown_thread (l, 1);
				}
				break;
			case LAUNCH_MSG_KILL_DONE :
				if (l->verbose)
					rtx_message ("launchagent_writer_thread: "
							"LAUNCH_MSG_KILL_DONE");
				sprintf (buf, "KILL_DONE %s\n", msgNd->appName);
				if (rtx_inet_write (l->launchSock->sock, buf,
							strlen (buf), NULL) == -1) {
					rtx_error_flush ("rtx_inet_write() failed");
					launchagent_shutdown_thread (l, 1);
				}
				break;
			case LAUNCH_MSG_KILL_FAILED :
				if (l->verbose)
					rtx_message ("launchagent_writer_thread: "
							"LAUNCH_MSG_KILL_FAILED");
				sprintf (buf, "KILL_FAILED %s %s\n", msgNd->appName,
						msgNd->errMsg);
				if (rtx_inet_write (l->launchSock->sock, buf,
							strlen (buf), NULL) == -1) {
					rtx_error_flush ("rtx_inet_write() failed");
					launchagent_shutdown_thread (l, 1);
				}
				break;
			case LAUNCH_MSG_LAUNCH :
			case LAUNCH_MSG_KILL :
			case LAUNCH_MSG_DONE :
			case LAUNCH_MSG_NONE :
			default :
				rtx_error_flush ("message not valid [%s]", buf);
				break;
		}
	}
	return (NULL);
}

/**
 * update /tmp/launch.state to be used to clean up if needed
 *
 * */

static char stateFilePath[] = "/tmp/launch.state";

void launchagent_cleanup_statefile()
{
	struct stat sfstate;
	if (stat(stateFilePath,&sfstate) == -1) {
		return;
	}
	if (system(stateFilePath) == -1) {
		rtx_error_errno_flush ("cleanup failed");
	}
	launchagent_remove_statefile();
}

void launchagent_update_statefile(RtxList * list)
{
	LaunchAppNode * appNd = NULL;
	FILE * fp = fopen(stateFilePath,"w");
	fprintf(fp,"#!/bin/sh\n\n");
	fprintf(fp,"echo \"Sending SIGTERM\"\n");
	while ((appNd = (LaunchAppNode *) rtx_list_iterate (list)) != NULL) {
		if (appNd->status != 0) {
			fprintf(fp,"kill %d 2>/dev/null\n",appNd->appPid);
		}
	}
	fprintf(fp,"sleep 2; echo \"Sending SIGKILL\"\n");
	while ((appNd = (LaunchAppNode *) rtx_list_iterate (list)) != NULL) {
		if (appNd->status != 0) {
			fprintf(fp,"kill -9 %d 2>/dev/null\n",appNd->appPid);
		}
	}
	fclose(fp);
	chmod(stateFilePath,0777);
}

void launchagent_remove_statefile()
{
	unlink(stateFilePath);
}

/**
 * watch for child processes that have exited
 *
 * this function uses the wait() system call to wait for
 * child processes that have exited. Once the child process
 * is identified, this function locates the application node
 * and then adds the corresponding message to the event Q.
 */
void *
launchagent_child_done_thread (
		void * arg    /**< pointer to 
					   ** LaunchAgent
					   **  structure */
		)
{
	LaunchAgent * l = (LaunchAgent *) arg;
	int procStat;
	pid_t procPid;
	LaunchAppNode * appNd = NULL, * appNdFound = NULL;
	LaunchMsgNode * nd = NULL;

	if (l->verbose)
		rtx_message ("launchagent_child_done_thread: up");
	while (! l->done) {
		if (l->verbose)
			rtx_message ("launchagent_child_done_thread: waiting");
		if ((procPid = wait (&procStat)) == -1) {
			if (errno != ECHILD) {
				rtx_error_errno_flush ("wait() failed");
				launchagent_shutdown_thread (l, 1);
			}
			sleep (1);
			continue;
		}
		if (l->verbose)
			rtx_message ("launchagent_child_done_thread: [%d]",
					procPid);
		/* a child has terminated and we have its pid */
		appNdFound = NULL;
		while ((appNd = (LaunchAppNode *) rtx_list_iterate
					(l->apps)) != NULL)
			if (appNd->appPid == procPid)
				appNdFound = appNd;
		appNd = appNdFound;
		if (appNd) {
			appNd->status = 0;
			if (l->verbose)
				rtx_message ("launchagent_child_done_thread: [%s]",
						appNd->appName);
			if ((nd = launch_create_msg 
						(LAUNCH_MSG_KILL_DONE, appNd->appName))
					== NULL) {
				rtx_error_flush ("launch_create_msg() "
						"failed");
				launchagent_shutdown_thread (l, 1);
			}
			if (rtx_list_enqueue (l->events, nd) == -1) {
				rtx_error_flush ("rtx_list_enqueue() "
						"failed");
				launchagent_shutdown_thread (l, 1);
			}
			if (rtx_sem_post (l->eventSem) == -1) {
				rtx_error_flush ("rtx_sem_post() failed");
				launchagent_shutdown_thread (l, 1);
			}
		} else {
			rtx_error_flush ("app not found [pid = %d]\n",
					procPid);
			launchagent_shutdown_thread (l, 1);
		}
		launchagent_update_statefile(l->apps);
	}
	return (NULL);
}

/**
 * watch for child processes that have come up
 *
 * this function waits for the SIGRTMIN signal from child 
 * processes. The signal provides the pid of the child process
 * and the function uses that to identify the application node.
 * It then adds the corresponding message to the event Q.
 */
void *
launchagent_child_up_thread (
		void * arg    /**< pointer to 
					   ** LaunchAgent
					   **  structure */
		)
{
	LaunchAgent * l = (LaunchAgent *) arg;
	sigset_t childUpSet;
	siginfo_t info;
	pid_t appPid = 0;
	LaunchAppNode * appNd = NULL, * appNdFound = NULL;
	LaunchMsgNode * nd = NULL;

	if (sigemptyset (&childUpSet) == -1) {
		rtx_error_errno ("sigemptyset() failed");
		launchagent_shutdown_thread (l, 1);
	}
	if (sigaddset (&childUpSet, SIGRTMIN) == -1) {
		rtx_error_errno ("sigaddset() failed");
		launchagent_shutdown_thread (l, 1);
	}
	if (l->verbose)
		rtx_message ("launchagent_child_up_thread: up");
	while (! l->done) {
		if (l->verbose)
			rtx_message ("launchagent_child_up_thread: waiting");
		if (sigwaitinfo (&childUpSet, &info) == -1) {
			rtx_error_errno ("sigwaitinfo () failed");
			launchagent_shutdown_thread (l, 1);
		}
		/* have a valid signal */
		appPid = (pid_t) info.si_value.sival_int;
		if (l->verbose)
			rtx_message ("launchagent_child_up_thread: [%d]",
					appPid);
		appNdFound = NULL;
		while ((appNd = (LaunchAppNode *) rtx_list_iterate
					(l->apps)) != NULL)
			if (appNd->appPid == appPid)
				appNdFound = appNd;
		appNd = appNdFound;
		if (appNd) {
			if (l->verbose)
				rtx_message ("launchagent_child_up_thread: [%s]",
						appNd->appName);
			if ((nd = launch_create_msg 
						(LAUNCH_MSG_LAUNCH_DONE, appNd->appName))
					== NULL) {
				rtx_error_flush ("launch_create_msg() "
						"failed");
				launchagent_shutdown_thread (l, 1);
			}
			if (l->verbose)
				rtx_message ("launchagent_child_up_thread: "
						"msg created");
			nd->appPid = appNd->appPid;
			if (rtx_list_enqueue (l->events, nd) == -1) {
				rtx_error_flush ("rtx_list_enqueue() "
						"failed");
				launchagent_shutdown_thread (l, 1);
			}
			if (l->verbose)
				rtx_message ("launchagent_child_up_thread: "
						"msg enqueued");
			if (rtx_sem_post (l->eventSem) == -1) {
				rtx_error_flush ("rtx_sem_post() "
						"failed");
				launchagent_shutdown_thread (l, 1);
			}
			if (l->verbose)
				rtx_message ("launchagent_child_up_thread: "
						"sem posted");
		} else {
			rtx_error_flush ("app not found [pid = %d]\n",
					appPid);
			launchagent_shutdown_thread (l, 1);
		}
	}
	return (NULL);
}

/**
 * shutdown the launchagent process
 * 
 * a helper function that can be used by threads to signal
 * the launchagent to shutdown, usually as a result of an
 * internal error.
 */
void 
launchagent_shutdown_thread (
		LaunchAgent * l,   /**< LaunchAgent 
							**  handle */
		int errors         /**< error count */
		)
{
	l->errors += errors;
	if (! l->done)
		rtx_main_signal_shutdown ();
	pthread_exit (NULL);
}

/**
 * startup the launchagent program
 *
 * initialize all the data-structures and create all the
 * threads.
 */
int 
launchagent_startup (
		LaunchAgent * l   /**< LaunchAgent handle */
		)
{
	rtx_message ("launchagent_startup: host [%s]", l->myHostName);

	/* change to the desired cwd */
	if (chdir (l->dir) == -1)
		return (rtx_error_errno ("chdir (%s) failed", l->dir));

	/* block the relevant signals:
	 * RTMIN is used by apps to signal that they are up
	 * CHLD is sent when a child terminates
	 * PIPE is sent if the program attempts to write
	 *      to a socket thats been closed at the other
	 *      end
	 */
	if (rtx_signal_block (SIGRTMIN) == -1)
		return (rtx_error ("rtx_signal_block() failed"));
	if (rtx_signal_block (SIGCHLD) == -1)
		return (rtx_error ("rtx_signal_block() failed"));
	if (rtx_signal_block (SIGPIPE) == -1)
		return (rtx_error ("rtx_signal_block() failed"));

	/* initialize the lists */
	if ((l->apps = rtx_list_init ()) == NULL)
		return (rtx_error ("rtx_list_init() failed"));
	if ((l->events = rtx_list_init ()) == NULL)
		return (rtx_error ("rtx_list_init() failed"));

	/* initialize sems/mutexes */
	if ((l->eventSem = rtx_sem_init (NULL, 0, 0)) == NULL)
		return (rtx_error ("rtx_sem_init() failed"));
	if ((l->eventQ = rtx_mutex_init (NULL, RTX_MUTEX_DEFAULT, 
					0)) == NULL)
		return (rtx_error ("rtx_mutex_init() failed"));

	/* connect to the launcher */
	if ((l->launchSock = rtx_inet_init 
				(RTX_INET_TCP_CLIENT, NULL, 0, l->launchHostName,
				 LAUNCH_PORT, NULL, NULL, NULL)) == NULL)
		return (rtx_error ("rtx_inet_init failed [%s:%d]",
					l->launchHostName, LAUNCH_PORT));

	/* startup the threads */

	if ((l->readerTh = rtx_thread_create 
				("launchagent[reader]", (l->verbose>1)?1:0,
				 RTX_THREAD_SCHED_OTHER,
				 0, 0, RTX_THREAD_CANCEL_DEFERRED, 
				 launchagent_reader_thread, l, NULL, NULL)) == NULL)
		return (rtx_error ("rtx_thread_create(reader_thread)"));
	if ((l->writerTh = rtx_thread_create 
				("launchagent[writer]", (l->verbose>1)?1:0, 
				 RTX_THREAD_SCHED_OTHER,
				 0, 0, RTX_THREAD_CANCEL_DEFERRED, 
				 launchagent_writer_thread, l, NULL, NULL)) == NULL)
		return (rtx_error ("rtx_thread_create(writer_thread)"));
	if ((l->childUpTh = rtx_thread_create 
				("launchagent[childUp]", (l->verbose>1)?1:0, 
				 RTX_THREAD_SCHED_OTHER,
				 0, 0, RTX_THREAD_CANCEL_DEFERRED, 
				 launchagent_child_up_thread, l, NULL, NULL)) == NULL)
		return (rtx_error ("rtx_thread_create(child_up_thread)"));
	if ((l->childDoneTh = rtx_thread_create 
				("launchagent[childDone]", (l->verbose>1)?1:0, 
				 RTX_THREAD_SCHED_OTHER, 0, 0, RTX_THREAD_CANCEL_DEFERRED, 
				 launchagent_child_done_thread, l, NULL, NULL)) == NULL)
		return (rtx_error ("rtx_thread_create(child_done_thread)"));
	return (0);
}

/**
 * shutdown the launchagent program
 *
 * kill all the threads and then clean up all data-structures
 */
int 
launchagent_shutdown (
		LaunchAgent * l    /**< LaunchAgent handle */
		)
{
	int errs = 0;

	l->done = 1;
	if (rtx_sem_post (l->eventSem) == -1) {
		rtx_error ("rtx_sem_post() failed");
		errs++;
	}
	/* kill all current children */
	if (l->readerTh != NULL)
		if (rtx_thread_join (l->readerTh) == -1) {
			rtx_error ("rtx_thread_join(reader_thread)");
			errs++;
		}
	if (l->writerTh != NULL)
		if (rtx_thread_destroy_sync (l->writerTh) == -1) {
			rtx_error ("rtx_thread_destroy_sync(writer_thread)");
			errs++;
		}
	if (l->childUpTh != NULL)
		if (rtx_thread_destroy_sync (l->childUpTh) == -1) {
			rtx_error ("rtx_thread_destroy_sync(child_up_thread)");
			errs++;
		}
	if (l->childDoneTh != NULL)
		if (rtx_thread_destroy_sync (l->childDoneTh) == -1) {
			rtx_error ("rtx_thread_destroy_sync(child_done_thread)");
			errs++;
		}
	if (rtx_inet_done (l->launchSock) == -1) {
		rtx_error ("rtx_inet_done");
		errs++;
	}
	if (l->eventQ != NULL)
		if (rtx_mutex_destroy (l->eventQ)) {
			rtx_error ("rtx_mutex_destroy)");
			errs++;
		}
	if (l->eventSem != NULL)
		if (rtx_sem_destroy (l->eventSem)) {
			rtx_error ("rtx_mutex_destroy)");
			errs++;
		}
	if (l->apps != NULL)
		rtx_list_destroy (l->apps, 0);
	if (l->events != NULL)
		rtx_list_destroy (l->events, 0);

	launchagent_cleanup_statefile();

	return (errs);
}

