You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

222 lines
9.0 KiB

# scripted inputs entry point
import os
import sys
import argparse
import datetime
import splunk_instrumentation.datetime_util as datetime_util
from time import sleep
'''
This must happen before splunk_instrumentation.constants is imported.
'''
parser = argparse.ArgumentParser()
parser.add_argument('--scheme', action='store_true')
parser.add_argument('-v', '--validate-arguments', action='store_true')
parser.add_argument('--no-collect', action='store_true', help='will not collect and index data')
parser.add_argument('--no-send', action='store_true', help='will not query _telemetry and send data')
parser.add_argument('-m', '--mode', default="INPUT", help='is required if not running from splund modular inputs')
parser.add_argument('--test-schema')
parser.add_argument('--log-level')
parser.add_argument('--username')
parser.add_argument('--password')
parser.add_argument('--execution-id')
parser.add_argument('--quickdraw-url', help='used to override the quickdraw-url')
parser.add_argument('--run-unscheduled', help='Run even if not scheduled', default=False)
parser.add_argument('--default-quickdraw', help='used to override the quickdraw-url response')
parser.add_argument('--start-date', help='first date to query, in YYYY-MM-DD format (defaults to yesterday)')
parser.add_argument('--stop-date', help='last date to query, in YYY-MM-DD format (inclusive) (defaults to yesterday)')
parser.add_argument('--batch-num', help='batch number')
args = parser.parse_args()
# configuration is done through environmental variables. Convert command line to environmental.
if args.mode:
os.environ['INST_MODE'] = args.mode
if args.no_collect:
os.environ['INST_NO_COLLECT'] = args.no_collect
if args.no_send:
os.environ['INST_NO_SEND'] = args.no_send
if args.test_schema:
os.environ['INST_TEST_SCHEMA'] = args.test_schema
if args.log_level:
os.environ['INST_DEBUG_LEVEL'] = args.log_level
if args.execution_id:
os.environ['INST_EXECUTION_ID'] = args.execution_id
if args.quickdraw_url:
os.environ['QUICKDRAW_URL'] = args.quickdraw_url
if args.default_quickdraw:
os.environ['DEFAULT_QUICKDRAW'] = args.default_quickdraw
if args.username:
os.environ['SPLUNK_USERNAME'] = args.username
if args.password:
os.environ['SPLUNK_PASSWORD'] = args.password
if args.run_unscheduled:
os.environ['RUN_UNSCHEDULE'] = args.run_unscheduled
# Routine to get the value of an input token
def get_key():
# read everything from stdin
config_str = sys.stdin.read()
# stdin is just a token
os.environ['INST_TOKEN'] = config_str.rstrip()
if not os.environ.get("SPLUNK_DB"):
os.environ['SPLUNK_DB'] = os.path.join(os.environ.get('SPLUNK_HOME') + 'var', 'lib', 'splunk')
# the default mode is INPUT and is what scripted inputs uses and implies
# there is a token passed in to stdin.
if os.environ['INST_MODE'] == "INPUT":
get_key()
# these imports inlude splunk_instrumentation.constants which need to be imported after environmental vars are set
from splunk_instrumentation.constants import SPLUNKRC, INST_PRE_EXECUTE_SLEEP, SPLUNKD_URI, BATCHES_PER_HOUR, BATCHES_MAX_SIZE # noqa: E402
from splunk_instrumentation.service_bundle import ServiceBundle # noqa: E402
from splunk_instrumentation.splunkd import Splunkd # noqa: E402
from splunk_instrumentation.input import run_input # noqa: E402
from splunk_instrumentation.report import report # noqa: E402
def normalize_date_range_params(args, report_start_date):
'''
Normalizes date range used for Data collection.
Start date for Data collection could be args.start_date, reportStartDate or yesterday
End data for Data collection could be args.stop_date or yesterday
:param args: List of arguments provided through CLI
:param report_start_date: reportStartDate specified in telemetry.conf
:return:
'''
yesterday = datetime.date.today() - datetime.timedelta(days=1)
args.start_date = datetime_util.str_to_date(args.start_date) if args.start_date\
else datetime_util.str_to_date(report_start_date) if report_start_date else yesterday
args.stop_date = datetime_util.str_to_date(args.stop_date) if args.stop_date else yesterday
def validate_date_range(args):
# SPL-153360 This can happen when the user has gone from no opt-in to some opt-in
# on the same day of the scheduled collection, before the script has run. This is
# due to the TelemetryHandler.cpp file, which detects the switch from no opt-in to
# some opt-in and sets the reportStartDate to today.
#
# When the script finally runs, it has an default stop date of yesterday, but
# reportStartDate sets the lower bound, which is today in that case. We do not
# want to generate alarming error messages, so just log the occurrence and exit
# gracefully.
if args.stop_date < args.start_date:
report.report('collection-canceled', {
'reason': 'Start date is after stop date. No data to collect.',
'start_date': args.start_date,
'stop_date': args.stop_date
})
exit(0)
def should_input_run(telemetry_conf_service, batch_num):
'''
Compares current time with the scheduledDay and scheduledHour
to determine whether Input should execute or not
:param telemetry_conf_service: Service for telemetry.conf
:return: True if current time matched scheduling in telemetry.conf
'''
scheduled_day = telemetry_conf_service.content.get('scheduledDay')
scheduled_hour = telemetry_conf_service.content.get('scheduledHour')
# Compare day and hour to time now
now = datetime.datetime.now()
# we execute all savedseaerches in batches[0, BATCHES_MAX_SIZE) in two hours; verify if current batch number should be part of
# scheduledHour [0, BATCHES_PER_HOUR) or scheduledHour + 1 [BATCHES_PER_HOUR, BATCHES_MAX_SIZE);
should_run = False
if (scheduled_day == '*' or scheduled_day == str(now.weekday())):
if batch_num is not None:
# all batches which are marked to be executed at scheduledHour will have 'execute_hour' value of 0;
# all batches which marked to be executed at (scheduledHour + 1) will have 1.
execute_hour = batch_num // BATCHES_PER_HOUR
if(scheduled_hour == str(now.hour) and execute_hour == 0):
should_run = True
elif (int(scheduled_hour) + 1 == now.hour and execute_hour == 1):
should_run = True
else:
# batch num is not provided as part of this script invocation; run all batches by default
if(scheduled_hour == str(now.hour)):
should_run = True
report.report('schedule-data', {
'schedule': {
'day': scheduled_day,
'hour': scheduled_hour
},
'now': {
'day': str(now.weekday()),
'hour': str(now.hour)
},
'batchNum': str(batch_num),
'should_run': should_run
})
return should_run
def process_input_params(telemetry_conf_service, args):
'''
Processes Input date range params and sets reportStartDate in telemetery.conf
:param telemetry_conf_service: Service for telemetry.conf
:param args: List of arguments passed to Scripted input
:return:
'''
report_start_date = telemetry_conf_service.content.get('reportStartDate')
report.report('reportStartDate', report_start_date)
normalize_date_range_params(args, report_start_date)
validate_date_range(args)
# update the 'reportStartDate' before triggering input.py ONLY if
# 1. if the batch num not provided; we execute all batches by default OR
# 2. if it is currently executing the last batch (BATCHES_MAX_SIZE - 1, since we start from 0) OR
# 3. if it is an unscheduled invocation
batch_num = get_batch_num(args)
if((batch_num is None) or (batch_num == BATCHES_MAX_SIZE - 1) or os.environ.get('RUN_UNSCHEDULE')):
reportStartDate = args.stop_date
if type(args.stop_date) == datetime.date:
reportStartDate = reportStartDate.strftime('%Y-%m-%d')
telemetry_conf_service.update({
'reportStartDate': reportStartDate
})
def get_batch_num(args):
'''
get input argument --batch-num
'''
if args.batch_num and args.batch_num != "None":
return int(args.batch_num);
return None;
# Routine to index data
def main():
if os.environ['INST_MODE'] == "DEV":
splunkd = Splunkd(**SPLUNKRC)
else:
sleep(INST_PRE_EXECUTE_SLEEP)
splunkd = Splunkd(token=os.environ['INST_TOKEN'], server_uri=SPLUNKD_URI)
services = ServiceBundle(splunkd)
telemetry_conf_service = services.telemetry_conf_service
batch_num = get_batch_num(args)
if os.environ.get('RUN_UNSCHEDULE') or should_input_run(telemetry_conf_service, batch_num):
process_input_params(telemetry_conf_service, args)
run_input({'start': args.start_date, 'stop': args.stop_date, 'batchNum': batch_num})
else:
# indicate to caller that input wasn't executed
sys.exit(114)
# Script must implement these args: scheme, validate-arguments
main()
sys.exit(0)

Powered by BW's shoe-string budget.