You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
204 lines
6.7 KiB
204 lines
6.7 KiB
8 months ago
|
#!/bin/bash
|
||
|
|
||
|
#Command transformation script
|
||
|
|
||
|
# Hadoop's hadoop jar command invokes RunJar which copies all the class files into a temp dir.
|
||
|
# This leads to increase in the startup time. Hence, the command is invoked manually in the form of
|
||
|
# $HADOOP_HOME/bin/hadoop <splunk_main_class>. The splunk jar is exported into HADOOP_CLASSPATH
|
||
|
# If the sudobash is not the typical hadoop command , the command formed will be : /bin/bash <rest of the args>
|
||
|
# The scipts also takes care of executing under MAPREDUCE_USER when it is present in the env
|
||
|
|
||
|
|
||
|
OLD_SPLUNK_JAR1_NAME=SplunkMR-s6.0-h1.0.jar
|
||
|
OLD_SPLUNK_JAR2_NAME=SplunkMR-s6.0-h2.0.jar
|
||
|
OLD_SPLUNK_JAR3_NAME=SplunkMR-s6.0-hy2.0.jar
|
||
|
|
||
|
# ERP-695 delete old SplunkMR jars at the first time of upgrading Hunk
|
||
|
function deleteOldSplunkMRJars() {
|
||
|
SPLUNKMR_OLD_JAR1="$SPLUNK_HOME/bin/jars/$OLD_SPLUNK_JAR1_NAME"
|
||
|
SPLUNKMR_OLD_JAR2="$SPLUNK_HOME/bin/jars/$OLD_SPLUNK_JAR2_NAME"
|
||
|
SPLUNKMR_OLD_JAR3="$SPLUNK_HOME/bin/jars/$OLD_SPLUNK_JAR3_NAME"
|
||
|
|
||
|
if [ -f $SPLUNKMR_OLD_JAR1 ]; then
|
||
|
log "Deleting an old version of SplunkMR jar: $SPLUNKMR_OLD_JAR1"
|
||
|
rm -f $SPLUNKMR_OLD_JAR1
|
||
|
fi
|
||
|
|
||
|
if [ -f $SPLUNKMR_OLD_JAR2 ]; then
|
||
|
log "Deleting an old version of SplunkMR jar: $SPLUNKMR_OLD_JAR2"
|
||
|
rm -f $SPLUNKMR_OLD_JAR2
|
||
|
fi
|
||
|
|
||
|
if [ -f $SPLUNKMR_OLD_JAR3 ]; then
|
||
|
log "Deleting an old version of SplunkMR jar: $SPLUNKMR_OLD_JAR3"
|
||
|
rm -f $SPLUNKMR_OLD_JAR3
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
deleteOldSplunkMRJars
|
||
|
|
||
|
# ERP-695 replace old SplunkMR jar name to the new one
|
||
|
function replaceOldSplunkMRJar() {
|
||
|
s=$1
|
||
|
s=${s//$OLD_SPLUNK_JAR1_NAME/SplunkMR-h1\.jar}
|
||
|
s=${s//$OLD_SPLUNK_JAR2_NAME/SplunkMR-h2\.jar}
|
||
|
s=${s//$OLD_SPLUNK_JAR3_NAME/SplunkMR-hy2\.jar}
|
||
|
echo $s
|
||
|
}
|
||
|
|
||
|
ARGS="$@"
|
||
|
ARGS="$(replaceOldSplunkMRJar "$ARGS")"
|
||
|
|
||
|
#DEFAULT COMMAND
|
||
|
COMMAND="/bin/bash $ARGS"
|
||
|
|
||
|
#Hadoop Command
|
||
|
HADOOP_COMMAND=$1
|
||
|
|
||
|
#Splunk Jar Name
|
||
|
SPLUNK_JAR="$(replaceOldSplunkMRJar $3)"
|
||
|
|
||
|
function log() {
|
||
|
timestamp=""
|
||
|
if [ "$SPLUNK_LOG_INCLUDE_TIMESTAMP" != "" ]; then
|
||
|
timestamp=`date "+%Y-%m-%d %H:%M:%S.%3N %z "`
|
||
|
fi
|
||
|
# leaving no space between the possible timestamp and log level.
|
||
|
# Hunk will treat lines that doesn't start with a log level as an error.
|
||
|
log_level="DEBUG"
|
||
|
if [ "$2" != "" ]; then
|
||
|
log_level="$2"
|
||
|
fi
|
||
|
echo "$timestamp""$log_level " $1 >&2
|
||
|
}
|
||
|
|
||
|
function log_error() {
|
||
|
log "$1" "ERROR"
|
||
|
}
|
||
|
|
||
|
log "MAPREDUCE_USER=$MAPREDUCE_USER, SPLUNK_HOME=$SPLUNK_HOME, HADOOP_CLASSPATH=$HADOOP_CLASSPATH"
|
||
|
|
||
|
#####
|
||
|
# Check if this is the usual command, else do not construct command
|
||
|
function checkSplunkMR() {
|
||
|
if [[ $HADOOP_COMMAND =~ .*hadoop$ ]]; then
|
||
|
if [[ "$2" == "jar" ]]; then
|
||
|
if [[ "$3" =~ SplunkMR.*jar$ ]]; then
|
||
|
if [[ "$4" == "com.splunk.mr.SplunkMR" ]]; then
|
||
|
echo "true"
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
fi
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
# Decrypt AWS access and secret keys.
|
||
|
function decrypt_aws_creds {
|
||
|
S3_CREDS_FILE="$1"
|
||
|
SPLUNK_SECRET_FILE="$2"
|
||
|
GPG_BIN=${GPG_BIN:-/usr/bin/gpg}
|
||
|
CREDS=$(mktemp)
|
||
|
$GPG_BIN --no-use-agent --yes --batch --decrypt --passphrase-file "$SPLUNK_SECRET_FILE" --output ${CREDS} "$S3_CREDS_FILE" > /dev/null 2>&1
|
||
|
source ${CREDS}
|
||
|
rm -f ${CREDS}
|
||
|
}
|
||
|
|
||
|
# Mask AWS keys
|
||
|
function mask_aws_keys {
|
||
|
local SECRET_MASK="s/fs\.s3a\.secret\.key=[^ ]*/fs\.s3a\.secret\.key=\*\*\*\*/g"
|
||
|
MASKED_COMMAND=$(echo "$1" | sed -e "$SECRET_MASK")
|
||
|
}
|
||
|
|
||
|
# Get AWS credentials
|
||
|
function get_aws_creds {
|
||
|
PARAMS=()
|
||
|
for ARG in "$@"; do
|
||
|
if [[ -n $(echo "$ARG" | grep "\-Dfs\.s3a\.access\.key\=") ]]; then
|
||
|
ACCESS_KEY=$(echo "$ARG" | cut -d= -f2)
|
||
|
elif [[ -n $(echo "$ARG" | grep "\-Dfs\.s3a\.secret\.key\=") ]]; then
|
||
|
SECRET_KEY=$(echo "$ARG" | cut -d= -f2)
|
||
|
elif [[ -n $(echo "$ARG" | grep "encrypted_aws_creds") ]]; then
|
||
|
ENCRYPTED_AWS_CREDS=$(echo "$ARG" | cut -d= -f2)
|
||
|
elif [[ -n $(echo "$ARG" | grep "splunk_secret") ]]; then
|
||
|
SPLUNK_SECRET=$(echo "$ARG" | cut -d= -f2)
|
||
|
else
|
||
|
PARAMS+=("$ARG")
|
||
|
fi
|
||
|
done
|
||
|
if [[ -n "$ACCESS_KEY" && -n "$SECRET_KEY" ]]; then
|
||
|
PARAMS+=("-Dfs.s3a.access.key=$ACCESS_KEY" "-Dfs.s3a.secret.key=$SECRET_KEY")
|
||
|
elif [[ -n "$ENCRYPTED_AWS_CREDS" && -n "$SPLUNK_SECRET" ]]; then
|
||
|
decrypt_aws_creds "$ENCRYPTED_AWS_CREDS" "$SPLUNK_SECRET"
|
||
|
PARAMS+=("-Dfs.s3a.access.key=$AWS_ACCESS_KEY_ID" "-Dfs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY")
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
######
|
||
|
#Check if the hadoop RunJar is intended purpose, else exit 1
|
||
|
function checkIfHadoopRunJar(){
|
||
|
if [[ $(checkSplunkMR $ARGS) != "true" ]]; then
|
||
|
echo "false"
|
||
|
else
|
||
|
#Construct the command, $4 is the main class we need to call and args following $4 are args to the main class.
|
||
|
get_aws_creds "${@:4}"
|
||
|
echo "$HADOOP_COMMAND ${PARAMS[@]}"
|
||
|
fi
|
||
|
}
|
||
|
#####
|
||
|
|
||
|
# add third party jars to classpath
|
||
|
if [[ "$HUNK_THIRDPARTY_JARS" != "" ]]; then
|
||
|
ADD_TO_CLASSPATH=`eval echo ${HUNK_THIRDPARTY_JARS//,/:}`
|
||
|
#else
|
||
|
# TP_COM_DIR_PREFIX="$SPLUNK_HOME/bin/jars/thirdparty/common"
|
||
|
# TP_HIV_DIR_PREFIX="$SPLUNK_HOME/bin/jars/thirdparty/hive"
|
||
|
# ADD_TO_CLASSPATH="$TP_COM_DIR_PREFIX/avro-1.7.4.jar:$TP_COM_DIR_PREFIX/avro-mapred-1.7.4.jar:$TP_COM_DIR_PREFIX/commons-compress-1.5.jar:$TP_COM_DIR_PREFIX/commons-io-2.1.jar:" \
|
||
|
# "$TP_COM_DIR_PREFIX/libfb303-0.9.0.jar:$TP_COM_DIR_PREFIX/parquet-hive-bundle-1.5.0.jar$TP_COM_DIR_PREFIX/snappy-java-1.0.5.jar:" \
|
||
|
# "$TP_HIV_DIR_PREFIX/hive-exec-0.12.0.jar:$TP_HIV_DIR_PREFIX/hive-metastore-0.12.0.jar:$TP_HIV_DIR_PREFIX/hive-serde-0.12.0.jar"
|
||
|
fi
|
||
|
|
||
|
# add aws related jars to the classpath if the hunk oem app is installed
|
||
|
# this app should only be installed on the AMI
|
||
|
if [ -d "$SPLUNK_HOME/etc/apps/splunk_emr/" ] ; then
|
||
|
ADD_TO_CLASSPATH="$ADD_TO_CLASSPATH:$SPLUNK_HOME"'/bin/jars/thirdparty/aws/*'
|
||
|
fi
|
||
|
|
||
|
RUNJAR_CMD=$(checkIfHadoopRunJar $ARGS)
|
||
|
if [[ "$RUNJAR_CMD" != "false" ]]; then
|
||
|
ADD_TO_CLASSPATH=$ADD_TO_CLASSPATH:$SPLUNK_JAR
|
||
|
log "Adding SplunkMR jar to classpath ..."
|
||
|
|
||
|
COMMAND=$RUNJAR_CMD
|
||
|
else
|
||
|
log "Providers other than SplunkMR not supported."
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
mask_aws_keys "$COMMAND"
|
||
|
|
||
|
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$ADD_TO_CLASSPATH
|
||
|
if [[ "$HADOOP_USER_CLASSPATH_FIRST" != "" ]]; then
|
||
|
export HADOOP_USER_CLASSPATH_FIRST="$HADOOP_USER_CLASSPATH_FIRST"
|
||
|
fi
|
||
|
log "HADOOP_USER_CLASSPATH_FIRST=$HADOOP_USER_CLASSPATH_FIRST"
|
||
|
|
||
|
log "HADOOP_CLASSPATH=$HADOOP_CLASSPATH"
|
||
|
|
||
|
log "Invoking command: $MASKED_COMMAND"
|
||
|
|
||
|
#Start process as MAPREDUCE_USER
|
||
|
if [[ $MAPREDUCE_USER != "" ]]; then
|
||
|
log "sudo -E -u $MAPREDUCE_USER $MASKED_COMMAND "
|
||
|
sudo -E -u $MAPREDUCE_USER $COMMAND
|
||
|
else
|
||
|
$COMMAND
|
||
|
fi
|
||
|
|
||
|
rc=$?
|
||
|
|
||
|
if [ $rc != 0 ]; then
|
||
|
log_error "Error while invoking command: $MASKED_COMMAND - Return code: $rc"
|
||
|
exit $rc
|
||
|
fi
|