#! /bin/sh
# A script to verify/validate the proper operation of NWS and NWS-SLAPD
# Use FQDNs for the input hostnames... there is very little error checking
# at this point.

USAGE="Usage: $0 [nameserver host] [memoryserver host] [sensor host]"

BIN_PREFIX="${GRID}/nwsinst/bin"
N_BIN_PREFIX='${HOME}/grid/nwsinst/bin'
M_BIN_PREFIX='${HOME}/grid/nwsinst/bin'
S_BIN_PREFIX='${HOME}/grid/nwsinst/bin'

#BIN_PREFIX="${GLOBUS_LOCATION}/bin"

# this messes with LD_LIBRARY_PATH !!!!
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${GLOBUS_LOCATION}/lib"

if [ $# != 3 ]; then
	echo ${USAGE};
	exit 1;
	fi

# maybe will help nws-hostadmin
if [ -e "${HOME}/.nwsdb" ]; then
	mv ${HOME}/.nwsdb ${HOME}/.nwsdb.orig
fi

N_HOST=$1
M_HOST=$2
S_HOST=$3
N_PORT=44000
M_PORT=44010
S_PORT=44020
N_HOME=$(ssh ${N_HOST} 'echo $HOME')
M_HOME=$(ssh ${M_HOST} 'echo $HOME')
S_HOME=$(ssh ${S_HOST} 'echo $HOME')

# more for nws-hostadmin
(echo "${N_HOST}:${N_PORT}" > ${HOME}/.nwsdb);
(echo "${M_HOST}:${M_PORT}" >> ${HOME}/.nwsdb);
(echo "${S_HOST}:${S_PORT}" >> ${HOME}/.nwsdb);

# set up scratch file locations on all hosts
TMPDIR="${HOME}/NWSdir"
N_TMPDIR="${N_HOME}/NWSdir"
M_TMPDIR="${M_HOME}/NWSdir"
S_TMPDIR="${S_HOME}/NWSdir"
rm -fr ${TMPDIR}; mkdir ${TMPDIR}
N_DIR_CMD="rm -fr ${N_TMPDIR}; mkdir ${N_TMPDIR}"
M_DIR_CMD="rm -fr ${M_TMPDIR}; mkdir ${M_TMPDIR}"
S_DIR_CMD="rm -fr ${S_TMPDIR}; mkdir ${S_TMPDIR}"
ssh ${N_HOST} ${N_DIR_CMD}
ssh ${M_HOST} ${M_DIR_CMD}
ssh ${S_HOST} ${S_DIR_CMD}

# build the clique configuration file
echo "name:clique_verify"			 > ${TMPDIR}/clique.conf; echo "" >> ${TMPDIR}/clique.conf;
echo "skillName:tcpMessageMonitor"	>> ${TMPDIR}/clique.conf; echo "" >> ${TMPDIR}/clique.conf;
echo "member:${N_HOST}"				>> ${TMPDIR}/clique.conf; echo "" >> ${TMPDIR}/clique.conf;
echo "member:${M_HOST}"				>> ${TMPDIR}/clique.conf; echo "" >> ${TMPDIR}/clique.conf;
echo "member:${S_HOST}"				>> ${TMPDIR}/clique.conf; echo "" >> ${TMPDIR}/clique.conf;
echo "size:32"						>> ${TMPDIR}/clique.conf; echo "" >> ${TMPDIR}/clique.conf;
echo "period:10"					>> ${TMPDIR}/clique.conf; echo "" >> ${TMPDIR}/clique.conf;

# name server
ssh ${N_HOST} ${N_BIN_PREFIX}/nws_nameserver -e ${N_TMPDIR}/name.err \
	-l ${N_TMPDIR}/name.log -f ${N_TMPDIR}/name.regs -p ${N_PORT} \
	2>&1 > ${TMPDIR}/n_noise &
echo "* Name Server on ${N_HOST}:${N_PORT}"

# memory server
ssh ${M_HOST} ${M_BIN_PREFIX}/nws_memory -d ${M_TMPDIR} \
	-e ${M_TMPDIR}/memory.err -l ${M_TMPDIR}/memory.log \
	-N ${N_HOST}:${N_PORT} -p ${M_PORT} \
	2>&1 > ${TMPDIR}/m_noise &
echo "* Memory Server on ${M_HOST}:${M_PORT}"

# sensor
ssh ${S_HOST} ${S_BIN_PREFIX}/nws_sensor -e ${S_TMPDIR}/sensor.err \
	-l ${S_TMPDIR}/sensor.log -N ${N_HOST}:${N_PORT} \
	-M ${M_HOST}:${M_PORT} -p ${S_PORT} \
	2>&1 > ${TMPDIR}/s_noise &
echo "* Sensor on ${S_HOST}:${S_PORT}"

ssh ${N_HOST} ${S_BIN_PREFIX}/nws_sensor -e ${N_TMPDIR}/sensor.err \
	-l ${N_TMPDIR}/sensor.log -N ${N_HOST}:${N_PORT} \
	-M ${M_HOST}:${M_PORT} -p ${S_PORT} \
	2>&1 > ${TMPDIR}/sn_noise &
echo "* Sensor on ${N_HOST}:${S_PORT}"

ssh ${M_HOST} ${S_BIN_PREFIX}/nws_sensor -e ${M_TMPDIR}/sensor.err \
	-l ${M_TMPDIR}/sensor.log -N ${N_HOST}:${N_PORT} \
	-M ${M_HOST}:${M_PORT} -p ${S_PORT} \
	2>&1 > ${TMPDIR}/sm_noise &
echo "* Sensor on ${M_HOST}:${S_PORT}"

# find remote pids for nws components
N_REMOTE_PID=$(ssh ${N_HOST} ps aux | grep nws_nameserver | grep ${N_PORT} | grep -v ssh | awk '{print $2;}')
M_REMOTE_PID=$(ssh ${M_HOST} ps aux | grep nws_memory | grep ${M_PORT} | grep -v ssh | awk '{print $2;}')
S_REMOTE_PID=$(ssh ${S_HOST} ps aux | grep nws_sensor | grep ${S_PORT} | grep -v ssh | awk '{print $2;}')
SN_REMOTE_PID=$(ssh ${N_HOST} ps aux | grep nws_sensor | grep ${S_PORT} | grep -v ssh | awk '{print $2;}')
SM_REMOTE_PID=$(ssh ${M_HOST} ps aux | grep nws_sensor | grep ${S_PORT} | grep -v ssh | awk '{print $2;}')

if [ "${N_REMOTE_PID}" -a "${M_REMOTE_PID}" -a "${S_REMOTE_PID}" -a "${SN_REMOTE_PID}" -a "${SM_REMOTE_PID}" ]; then
	echo "* Remote jobs have started. ${N_REMOTE_PID} ${M_REMOTE_PID} ${S_REMOTE_PID} ${SN_REMOTE_PID} ${SM_REMOTE_PID}";
else
	echo "=( Remote jobs NOT started.";
fi

# look for registrations and verify activity
echo "* Starting activity..."
${BIN_PREFIX}/start_activity ${S_HOST}:${S_PORT} -f ${TMPDIR}/clique.conf \
	2>&1 > ${TMPDIR}/start.log

${BIN_PREFIX}/start_activity ${N_HOST}:${S_PORT} -f ${TMPDIR}/clique.conf \
	2>&1 >> ${TMPDIR}/start.log

${BIN_PREFIX}/start_activity ${M_HOST}:${S_PORT} -f ${TMPDIR}/clique.conf \
	2>&1 >> ${TMPDIR}/start.log

# let the sensors gather some measurements...
sleep 20;
echo "* Testing sensors..."

BAND_N_M=$(${BIN_PREFIX}/nws_extract -N ${N_HOST} band ${N_HOST} ${M_HOST})
BAND_N_S=$(${BIN_PREFIX}/nws_extract -N ${N_HOST} band ${N_HOST} ${S_HOST})
BAND_M_S=$(${BIN_PREFIX}/nws_extract -N ${N_HOST} band ${M_HOST} ${S_HOST})
echo "${BAND_N_M} ${BAND_N_S} ${BAND_M_S}"

LAT_N_M=$(${BIN_PREFIX}/nws_extract -N ${N_HOST} lat ${N_HOST} ${M_HOST})
LAT_N_S=$(${BIN_PREFIX}/nws_extract -N ${N_HOST} lat ${N_HOST} ${S_HOST})
LAT_M_S=$(${BIN_PREFIX}/nws_extract -N ${N_HOST} lat ${M_HOST} ${S_HOST})
echo "${LAT_N_M} ${LAT_N_S} ${LAT_M_S}"

echo "* Cleaning up..."
ssh ${N_HOST} kill -9 ${N_REMOTE_PID} 2>&1 > ${TMPDIR}/ded.log
ssh ${M_HOST} kill -9 ${M_REMOTE_PID} 2>&1 >> ${TMPDIR}/ded.log
ssh ${S_HOST} kill -9 ${S_REMOTE_PID} 2>&1 >> ${TMPDIR}/ded.log
ssh ${N_HOST} kill -9 ${SN_REMOTE_PID} 2>&1 >> ${TMPDIR}/ded.log
ssh ${M_HOST} kill -9 ${SM_REMOTE_PID} 2>&1 >> ${TMPDIR}/ded.log

echo "* Done."
