#!/bin/bash #Command transformation script # Hadoop's hadoop jar command invokes RunJar which copies all the class files into a temp dir. # This leads to increase in the startup time. Hence, the command is invoked manually in the form of # $HADOOP_HOME/bin/hadoop . The splunk jar is exported into HADOOP_CLASSPATH # If the sudobash is not the typical hadoop command , the command formed will be : /bin/bash # The scipts also takes care of executing under MAPREDUCE_USER when it is present in the env OLD_SPLUNK_JAR1_NAME=SplunkMR-s6.0-h1.0.jar OLD_SPLUNK_JAR2_NAME=SplunkMR-s6.0-h2.0.jar OLD_SPLUNK_JAR3_NAME=SplunkMR-s6.0-hy2.0.jar # ERP-695 delete old SplunkMR jars at the first time of upgrading Hunk function deleteOldSplunkMRJars() { SPLUNKMR_OLD_JAR1="$SPLUNK_HOME/bin/jars/$OLD_SPLUNK_JAR1_NAME" SPLUNKMR_OLD_JAR2="$SPLUNK_HOME/bin/jars/$OLD_SPLUNK_JAR2_NAME" SPLUNKMR_OLD_JAR3="$SPLUNK_HOME/bin/jars/$OLD_SPLUNK_JAR3_NAME" if [ -f $SPLUNKMR_OLD_JAR1 ]; then log "Deleting an old version of SplunkMR jar: $SPLUNKMR_OLD_JAR1" rm -f $SPLUNKMR_OLD_JAR1 fi if [ -f $SPLUNKMR_OLD_JAR2 ]; then log "Deleting an old version of SplunkMR jar: $SPLUNKMR_OLD_JAR2" rm -f $SPLUNKMR_OLD_JAR2 fi if [ -f $SPLUNKMR_OLD_JAR3 ]; then log "Deleting an old version of SplunkMR jar: $SPLUNKMR_OLD_JAR3" rm -f $SPLUNKMR_OLD_JAR3 fi } deleteOldSplunkMRJars # ERP-695 replace old SplunkMR jar name to the new one function replaceOldSplunkMRJar() { s=$1 s=${s//$OLD_SPLUNK_JAR1_NAME/SplunkMR-h1\.jar} s=${s//$OLD_SPLUNK_JAR2_NAME/SplunkMR-h2\.jar} s=${s//$OLD_SPLUNK_JAR3_NAME/SplunkMR-hy2\.jar} echo $s } ARGS="$@" ARGS="$(replaceOldSplunkMRJar "$ARGS")" #DEFAULT COMMAND COMMAND="/bin/bash $ARGS" #Hadoop Command HADOOP_COMMAND=$1 #Splunk Jar Name SPLUNK_JAR="$(replaceOldSplunkMRJar $3)" function log() { timestamp="" if [ "$SPLUNK_LOG_INCLUDE_TIMESTAMP" != "" ]; then timestamp=`date "+%Y-%m-%d %H:%M:%S.%3N %z "` fi # leaving no space between the possible timestamp and log level. # Hunk will treat lines that doesn't start with a log level as an error. log_level="DEBUG" if [ "$2" != "" ]; then log_level="$2" fi echo "$timestamp""$log_level " $1 >&2 } function log_error() { log "$1" "ERROR" } log "MAPREDUCE_USER=$MAPREDUCE_USER, SPLUNK_HOME=$SPLUNK_HOME, HADOOP_CLASSPATH=$HADOOP_CLASSPATH" ##### # Check if this is the usual command, else do not construct command function checkSplunkMR() { if [[ $HADOOP_COMMAND =~ .*hadoop$ ]]; then if [[ "$2" == "jar" ]]; then if [[ "$3" =~ SplunkMR.*jar$ ]]; then if [[ "$4" == "com.splunk.mr.SplunkMR" ]]; then echo "true" fi fi fi fi } # Decrypt AWS access and secret keys. function decrypt_aws_creds { S3_CREDS_FILE="$1" SPLUNK_SECRET_FILE="$2" GPG_BIN=${GPG_BIN:-/usr/bin/gpg} CREDS=$(mktemp) $GPG_BIN --no-use-agent --yes --batch --decrypt --passphrase-file "$SPLUNK_SECRET_FILE" --output ${CREDS} "$S3_CREDS_FILE" > /dev/null 2>&1 source ${CREDS} rm -f ${CREDS} } # Mask AWS keys function mask_aws_keys { local SECRET_MASK="s/fs\.s3a\.secret\.key=[^ ]*/fs\.s3a\.secret\.key=\*\*\*\*/g" MASKED_COMMAND=$(echo "$1" | sed -e "$SECRET_MASK") } # Get AWS credentials function get_aws_creds { PARAMS=() for ARG in "$@"; do if [[ -n $(echo "$ARG" | grep "\-Dfs\.s3a\.access\.key\=") ]]; then ACCESS_KEY=$(echo "$ARG" | cut -d= -f2) elif [[ -n $(echo "$ARG" | grep "\-Dfs\.s3a\.secret\.key\=") ]]; then SECRET_KEY=$(echo "$ARG" | cut -d= -f2) elif [[ -n $(echo "$ARG" | grep "encrypted_aws_creds") ]]; then ENCRYPTED_AWS_CREDS=$(echo "$ARG" | cut -d= -f2) elif [[ -n $(echo "$ARG" | grep "splunk_secret") ]]; then SPLUNK_SECRET=$(echo "$ARG" | cut -d= -f2) else PARAMS+=("$ARG") fi done if [[ -n "$ACCESS_KEY" && -n "$SECRET_KEY" ]]; then PARAMS+=("-Dfs.s3a.access.key=$ACCESS_KEY" "-Dfs.s3a.secret.key=$SECRET_KEY") elif [[ -n "$ENCRYPTED_AWS_CREDS" && -n "$SPLUNK_SECRET" ]]; then decrypt_aws_creds "$ENCRYPTED_AWS_CREDS" "$SPLUNK_SECRET" PARAMS+=("-Dfs.s3a.access.key=$AWS_ACCESS_KEY_ID" "-Dfs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY") fi } ###### #Check if the hadoop RunJar is intended purpose, else exit 1 function checkIfHadoopRunJar(){ if [[ $(checkSplunkMR $ARGS) != "true" ]]; then echo "false" else #Construct the command, $4 is the main class we need to call and args following $4 are args to the main class. get_aws_creds "${@:4}" echo "$HADOOP_COMMAND ${PARAMS[@]}" fi } ##### # add third party jars to classpath if [[ "$HUNK_THIRDPARTY_JARS" != "" ]]; then ADD_TO_CLASSPATH=`eval echo ${HUNK_THIRDPARTY_JARS//,/:}` #else # TP_COM_DIR_PREFIX="$SPLUNK_HOME/bin/jars/thirdparty/common" # TP_HIV_DIR_PREFIX="$SPLUNK_HOME/bin/jars/thirdparty/hive" # ADD_TO_CLASSPATH="$TP_COM_DIR_PREFIX/avro-1.7.4.jar:$TP_COM_DIR_PREFIX/avro-mapred-1.7.4.jar:$TP_COM_DIR_PREFIX/commons-compress-1.5.jar:$TP_COM_DIR_PREFIX/commons-io-2.1.jar:" \ # "$TP_COM_DIR_PREFIX/libfb303-0.9.0.jar:$TP_COM_DIR_PREFIX/parquet-hive-bundle-1.5.0.jar$TP_COM_DIR_PREFIX/snappy-java-1.0.5.jar:" \ # "$TP_HIV_DIR_PREFIX/hive-exec-0.12.0.jar:$TP_HIV_DIR_PREFIX/hive-metastore-0.12.0.jar:$TP_HIV_DIR_PREFIX/hive-serde-0.12.0.jar" fi # add aws related jars to the classpath if the hunk oem app is installed # this app should only be installed on the AMI if [ -d "$SPLUNK_HOME/etc/apps/splunk_emr/" ] ; then ADD_TO_CLASSPATH="$ADD_TO_CLASSPATH:$SPLUNK_HOME"'/bin/jars/thirdparty/aws/*' fi RUNJAR_CMD=$(checkIfHadoopRunJar $ARGS) if [[ "$RUNJAR_CMD" != "false" ]]; then ADD_TO_CLASSPATH=$ADD_TO_CLASSPATH:$SPLUNK_JAR log "Adding SplunkMR jar to classpath ..." COMMAND=$RUNJAR_CMD else log "Providers other than SplunkMR not supported." exit 1 fi mask_aws_keys "$COMMAND" export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$ADD_TO_CLASSPATH if [[ "$HADOOP_USER_CLASSPATH_FIRST" != "" ]]; then export HADOOP_USER_CLASSPATH_FIRST="$HADOOP_USER_CLASSPATH_FIRST" fi log "HADOOP_USER_CLASSPATH_FIRST=$HADOOP_USER_CLASSPATH_FIRST" log "HADOOP_CLASSPATH=$HADOOP_CLASSPATH" log "Invoking command: $MASKED_COMMAND" #Start process as MAPREDUCE_USER if [[ $MAPREDUCE_USER != "" ]]; then log "sudo -E -u $MAPREDUCE_USER $MASKED_COMMAND " sudo -E -u $MAPREDUCE_USER $COMMAND else $COMMAND fi rc=$? if [ $rc != 0 ]; then log_error "Error while invoking command: $MASKED_COMMAND - Return code: $rc" exit $rc fi