You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
222 lines
9.0 KiB
222 lines
9.0 KiB
# scripted inputs entry point
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import datetime
|
|
import splunk_instrumentation.datetime_util as datetime_util
|
|
from time import sleep
|
|
|
|
'''
|
|
This must happen before splunk_instrumentation.constants is imported.
|
|
'''
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--scheme', action='store_true')
|
|
parser.add_argument('-v', '--validate-arguments', action='store_true')
|
|
parser.add_argument('--no-collect', action='store_true', help='will not collect and index data')
|
|
parser.add_argument('--no-send', action='store_true', help='will not query _telemetry and send data')
|
|
parser.add_argument('-m', '--mode', default="INPUT", help='is required if not running from splund modular inputs')
|
|
parser.add_argument('--test-schema')
|
|
parser.add_argument('--log-level')
|
|
parser.add_argument('--username')
|
|
parser.add_argument('--password')
|
|
parser.add_argument('--execution-id')
|
|
parser.add_argument('--quickdraw-url', help='used to override the quickdraw-url')
|
|
parser.add_argument('--run-unscheduled', help='Run even if not scheduled', default=False)
|
|
parser.add_argument('--default-quickdraw', help='used to override the quickdraw-url response')
|
|
parser.add_argument('--start-date', help='first date to query, in YYYY-MM-DD format (defaults to yesterday)')
|
|
parser.add_argument('--stop-date', help='last date to query, in YYY-MM-DD format (inclusive) (defaults to yesterday)')
|
|
parser.add_argument('--batch-num', help='batch number')
|
|
args = parser.parse_args()
|
|
|
|
|
|
# configuration is done through environmental variables. Convert command line to environmental.
|
|
|
|
if args.mode:
|
|
os.environ['INST_MODE'] = args.mode
|
|
if args.no_collect:
|
|
os.environ['INST_NO_COLLECT'] = args.no_collect
|
|
if args.no_send:
|
|
os.environ['INST_NO_SEND'] = args.no_send
|
|
if args.test_schema:
|
|
os.environ['INST_TEST_SCHEMA'] = args.test_schema
|
|
if args.log_level:
|
|
os.environ['INST_DEBUG_LEVEL'] = args.log_level
|
|
if args.execution_id:
|
|
os.environ['INST_EXECUTION_ID'] = args.execution_id
|
|
if args.quickdraw_url:
|
|
os.environ['QUICKDRAW_URL'] = args.quickdraw_url
|
|
if args.default_quickdraw:
|
|
os.environ['DEFAULT_QUICKDRAW'] = args.default_quickdraw
|
|
if args.username:
|
|
os.environ['SPLUNK_USERNAME'] = args.username
|
|
if args.password:
|
|
os.environ['SPLUNK_PASSWORD'] = args.password
|
|
if args.run_unscheduled:
|
|
os.environ['RUN_UNSCHEDULE'] = args.run_unscheduled
|
|
|
|
|
|
# Routine to get the value of an input token
|
|
def get_key():
|
|
# read everything from stdin
|
|
config_str = sys.stdin.read()
|
|
# stdin is just a token
|
|
os.environ['INST_TOKEN'] = config_str.rstrip()
|
|
|
|
if not os.environ.get("SPLUNK_DB"):
|
|
os.environ['SPLUNK_DB'] = os.path.join(os.environ.get('SPLUNK_HOME') + 'var', 'lib', 'splunk')
|
|
|
|
# the default mode is INPUT and is what scripted inputs uses and implies
|
|
# there is a token passed in to stdin.
|
|
if os.environ['INST_MODE'] == "INPUT":
|
|
get_key()
|
|
|
|
# these imports inlude splunk_instrumentation.constants which need to be imported after environmental vars are set
|
|
from splunk_instrumentation.constants import SPLUNKRC, INST_PRE_EXECUTE_SLEEP, SPLUNKD_URI, BATCHES_PER_HOUR, BATCHES_MAX_SIZE # noqa: E402
|
|
from splunk_instrumentation.service_bundle import ServiceBundle # noqa: E402
|
|
from splunk_instrumentation.splunkd import Splunkd # noqa: E402
|
|
from splunk_instrumentation.input import run_input # noqa: E402
|
|
from splunk_instrumentation.report import report # noqa: E402
|
|
|
|
|
|
def normalize_date_range_params(args, report_start_date):
|
|
'''
|
|
Normalizes date range used for Data collection.
|
|
Start date for Data collection could be args.start_date, reportStartDate or yesterday
|
|
End data for Data collection could be args.stop_date or yesterday
|
|
:param args: List of arguments provided through CLI
|
|
:param report_start_date: reportStartDate specified in telemetry.conf
|
|
:return:
|
|
'''
|
|
yesterday = datetime.date.today() - datetime.timedelta(days=1)
|
|
|
|
args.start_date = datetime_util.str_to_date(args.start_date) if args.start_date\
|
|
else datetime_util.str_to_date(report_start_date) if report_start_date else yesterday
|
|
|
|
args.stop_date = datetime_util.str_to_date(args.stop_date) if args.stop_date else yesterday
|
|
|
|
|
|
def validate_date_range(args):
|
|
# SPL-153360 This can happen when the user has gone from no opt-in to some opt-in
|
|
# on the same day of the scheduled collection, before the script has run. This is
|
|
# due to the TelemetryHandler.cpp file, which detects the switch from no opt-in to
|
|
# some opt-in and sets the reportStartDate to today.
|
|
#
|
|
# When the script finally runs, it has an default stop date of yesterday, but
|
|
# reportStartDate sets the lower bound, which is today in that case. We do not
|
|
# want to generate alarming error messages, so just log the occurrence and exit
|
|
# gracefully.
|
|
if args.stop_date < args.start_date:
|
|
report.report('collection-canceled', {
|
|
'reason': 'Start date is after stop date. No data to collect.',
|
|
'start_date': args.start_date,
|
|
'stop_date': args.stop_date
|
|
})
|
|
exit(0)
|
|
|
|
|
|
def should_input_run(telemetry_conf_service, batch_num):
|
|
'''
|
|
Compares current time with the scheduledDay and scheduledHour
|
|
to determine whether Input should execute or not
|
|
:param telemetry_conf_service: Service for telemetry.conf
|
|
:return: True if current time matched scheduling in telemetry.conf
|
|
'''
|
|
scheduled_day = telemetry_conf_service.content.get('scheduledDay')
|
|
scheduled_hour = telemetry_conf_service.content.get('scheduledHour')
|
|
# Compare day and hour to time now
|
|
now = datetime.datetime.now()
|
|
|
|
# we execute all savedseaerches in batches[0, BATCHES_MAX_SIZE) in two hours; verify if current batch number should be part of
|
|
# scheduledHour [0, BATCHES_PER_HOUR) or scheduledHour + 1 [BATCHES_PER_HOUR, BATCHES_MAX_SIZE);
|
|
should_run = False
|
|
if (scheduled_day == '*' or scheduled_day == str(now.weekday())):
|
|
if batch_num is not None:
|
|
# all batches which are marked to be executed at scheduledHour will have 'execute_hour' value of 0;
|
|
# all batches which marked to be executed at (scheduledHour + 1) will have 1.
|
|
execute_hour = batch_num // BATCHES_PER_HOUR
|
|
if(scheduled_hour == str(now.hour) and execute_hour == 0):
|
|
should_run = True
|
|
elif (int(scheduled_hour) + 1 == now.hour and execute_hour == 1):
|
|
should_run = True
|
|
else:
|
|
# batch num is not provided as part of this script invocation; run all batches by default
|
|
if(scheduled_hour == str(now.hour)):
|
|
should_run = True
|
|
|
|
report.report('schedule-data', {
|
|
'schedule': {
|
|
'day': scheduled_day,
|
|
'hour': scheduled_hour
|
|
},
|
|
'now': {
|
|
'day': str(now.weekday()),
|
|
'hour': str(now.hour)
|
|
},
|
|
'batchNum': str(batch_num),
|
|
'should_run': should_run
|
|
})
|
|
return should_run
|
|
|
|
|
|
def process_input_params(telemetry_conf_service, args):
|
|
'''
|
|
Processes Input date range params and sets reportStartDate in telemetery.conf
|
|
:param telemetry_conf_service: Service for telemetry.conf
|
|
:param args: List of arguments passed to Scripted input
|
|
:return:
|
|
'''
|
|
report_start_date = telemetry_conf_service.content.get('reportStartDate')
|
|
report.report('reportStartDate', report_start_date)
|
|
|
|
normalize_date_range_params(args, report_start_date)
|
|
validate_date_range(args)
|
|
|
|
# update the 'reportStartDate' before triggering input.py ONLY if
|
|
# 1. if the batch num not provided; we execute all batches by default OR
|
|
# 2. if it is currently executing the last batch (BATCHES_MAX_SIZE - 1, since we start from 0) OR
|
|
# 3. if it is an unscheduled invocation
|
|
batch_num = get_batch_num(args)
|
|
if((batch_num is None) or (batch_num == BATCHES_MAX_SIZE - 1) or os.environ.get('RUN_UNSCHEDULE')):
|
|
reportStartDate = args.stop_date
|
|
if type(args.stop_date) == datetime.date:
|
|
reportStartDate = reportStartDate.strftime('%Y-%m-%d')
|
|
telemetry_conf_service.update({
|
|
'reportStartDate': reportStartDate
|
|
})
|
|
|
|
|
|
def get_batch_num(args):
|
|
'''
|
|
get input argument --batch-num
|
|
'''
|
|
if args.batch_num and args.batch_num != "None":
|
|
return int(args.batch_num);
|
|
return None;
|
|
|
|
|
|
# Routine to index data
|
|
def main():
|
|
if os.environ['INST_MODE'] == "DEV":
|
|
splunkd = Splunkd(**SPLUNKRC)
|
|
else:
|
|
sleep(INST_PRE_EXECUTE_SLEEP)
|
|
splunkd = Splunkd(token=os.environ['INST_TOKEN'], server_uri=SPLUNKD_URI)
|
|
|
|
services = ServiceBundle(splunkd)
|
|
telemetry_conf_service = services.telemetry_conf_service
|
|
|
|
batch_num = get_batch_num(args)
|
|
if os.environ.get('RUN_UNSCHEDULE') or should_input_run(telemetry_conf_service, batch_num):
|
|
process_input_params(telemetry_conf_service, args)
|
|
run_input({'start': args.start_date, 'stop': args.stop_date, 'batchNum': batch_num})
|
|
else:
|
|
# indicate to caller that input wasn't executed
|
|
sys.exit(114)
|
|
|
|
|
|
# Script must implement these args: scheme, validate-arguments
|
|
main()
|
|
|
|
sys.exit(0)
|