You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
499 lines
18 KiB
499 lines
18 KiB
from __future__ import absolute_import
|
|
from __future__ import print_function
|
|
# Version 4.0
|
|
#
|
|
# Intersplunk provides simple access to the comm protocol between Splunk search
|
|
# operators.
|
|
#
|
|
# The intersplunk format is plain CSV, with a first-line field header.
|
|
#
|
|
|
|
from builtins import zip
|
|
from builtins import range
|
|
import csv
|
|
import sys
|
|
import copy
|
|
import re
|
|
if sys.version_info >= (3, 0):
|
|
from io import (BytesIO, TextIOWrapper, StringIO)
|
|
else:
|
|
from StringIO import StringIO
|
|
BytesIO = StringIO
|
|
from future.moves.urllib import parse as urllib_parse
|
|
import os
|
|
|
|
# set the maximum allowable CSV field size
|
|
#
|
|
# The default of the csv module is 128KB; upping to 10MB. See SPL-12117 for
|
|
# the background on issues surrounding field sizes.
|
|
# (this method is new in python 2.5)
|
|
csv.field_size_limit(10485760)
|
|
|
|
MV_ENABLED = True
|
|
|
|
def set_binary_mode(fileobj):
|
|
# Pylint can't handle platform-dependent code.
|
|
# pylint: disable-all
|
|
|
|
# This works around a design error in Intersplunk where it assumes that the
|
|
# bytes it writes to stdout will be identical to the bytes which are
|
|
# emitted.
|
|
# This is false on windows where \n is mapped to \r\n
|
|
# The typical solution is to simply open the file in binary mode, but stdout
|
|
# is already open, thus this hack
|
|
if sys.platform == 'win32':
|
|
import msvcrt
|
|
msvcrt.setmode(fileobj.fileno(), os.O_BINARY)
|
|
|
|
def default_stdout_stream():
|
|
if sys.version_info >= (3, 0):
|
|
return sys.stdout.buffer
|
|
set_binary_mode(sys.stdout)
|
|
return sys.stdout
|
|
|
|
def splunkHome():
|
|
import os
|
|
return os.path.normpath(os.environ["SPLUNK_HOME"])
|
|
|
|
def isGetInfo(args):
|
|
if (len(args) >= 2) and (args[1] == "__GETINFO__"):
|
|
newargs = [args[0]]
|
|
newargs.extend(args[2:])
|
|
return (True, newargs)
|
|
elif (len(args) >= 2) and (args[1] == "__EXECUTE__"):
|
|
newargs = [args[0]]
|
|
newargs.extend(args[2:])
|
|
return (False, newargs)
|
|
else: # invalid invocation, exit and return error message immediately
|
|
generateErrorResults("Unexpected first argument to script, expected '__GETINFO__' or '__EXECUTE__'.")
|
|
sys.exit()
|
|
|
|
def parseError(msg):
|
|
generateErrorResults(msg)
|
|
sys.exit()
|
|
|
|
def outputInfo(streaming, generating, retevs, reqsop, preop, timeorder=False, clear_req_fields=False, req_fields = None):
|
|
infodict = {
|
|
'streaming_preop' : preop,
|
|
'streaming' : '0',
|
|
'generating' : '0',
|
|
'retainsevents' : '0',
|
|
'requires_preop' : '0',
|
|
'generates_timeorder' : '0',
|
|
'overrides_timeorder' : '1',
|
|
'clear_required_fields' : '0' }
|
|
|
|
if streaming:
|
|
infodict['streaming'] = '1'
|
|
|
|
if generating:
|
|
infodict['generating'] = '1'
|
|
if timeorder:
|
|
infodict['generates_timeorder'] = '1'
|
|
else:
|
|
if timeorder:
|
|
infodict['overrides_timeorder'] = '0'
|
|
|
|
if retevs:
|
|
infodict['retainsevents'] = '1'
|
|
|
|
if reqsop:
|
|
infodict['requires_preop'] = '1'
|
|
|
|
if clear_req_fields:
|
|
infodict['clear_required_fields'] = '1'
|
|
|
|
if req_fields is not None and len(req_fields) > 0:
|
|
infodict['required_fields'] = req_fields
|
|
|
|
outputResults([ infodict ], mvdelim=',')
|
|
sys.exit()
|
|
|
|
'''
|
|
For multivalues, values are wrapped in '$' and separated using ';'
|
|
Literal '$' values are represented with'$$'
|
|
'''
|
|
def getEncodedMV(vals):
|
|
s = ""
|
|
for val in vals:
|
|
val = val.replace('$', '$$')
|
|
if len(s):
|
|
s += ';'
|
|
s += '$' + val + '$'
|
|
return s
|
|
|
|
|
|
def decodeMV(s, vals):
|
|
if len(s) == 0:
|
|
return False
|
|
|
|
tok = ""
|
|
inval = False
|
|
|
|
i = 0
|
|
while i < len(s):
|
|
if not inval:
|
|
if s[i] == '$':
|
|
inval = True
|
|
elif s[i] != ';':
|
|
return False
|
|
else:
|
|
if s[i] == '$' and i+1 < len(s) and s[i+1] == '$':
|
|
tok += '$'
|
|
i += 1
|
|
elif s[i] == '$':
|
|
inval = False
|
|
vals.append(tok)
|
|
tok = ""
|
|
else:
|
|
tok += s[i]
|
|
i += 1
|
|
return True
|
|
|
|
|
|
def addMessage(messages, msg, key):
|
|
if key not in messages:
|
|
messages[key] = []
|
|
messages[key].append(msg)
|
|
|
|
def addInfoMessage(messages, msg):
|
|
addMessage(messages, msg, "info_message")
|
|
def addWarnMessage(messages, msg):
|
|
addMessage(messages, msg, "warn_message")
|
|
def addErrorMessage(messages, msg):
|
|
addMessage(messages, msg, "error_message")
|
|
|
|
def outputResults(results, messages = None, fields = None, mvdelim = '\n', outputfile = None):
|
|
'''
|
|
Outputs the contents of a result set to STDOUT in Interplunk
|
|
format, for consumption by the next search processor.
|
|
'''
|
|
|
|
if outputfile is None:
|
|
outputfile = default_stdout_stream()
|
|
|
|
if messages is not None:
|
|
# message header is everything before the first empty line, similar to the input
|
|
# header format. also key = value, with stripping of whitespace
|
|
for level, messages in messages.items():
|
|
for msg in messages:
|
|
msg = "%s=%s\n" % (level, msg)
|
|
if sys.version_info >= (3, 0):
|
|
msg = msg.encode()
|
|
outputfile.write(msg)
|
|
outputfile.write(b"\n")
|
|
|
|
if results is None:
|
|
return
|
|
|
|
s = set()
|
|
l = []
|
|
|
|
'''
|
|
Check each entry to see if it is a list (multivalued). If so, set
|
|
the multivalued key to the proper encoding Replace the list with a
|
|
newline separated string of the values
|
|
'''
|
|
for i in range(len(results)):
|
|
for key in list(results[i].keys()): # We wrapped the call to keys() in a list() for py3's dictionary changed size during iteration.
|
|
if(isinstance(results[i][key], list)):
|
|
results[i]['__mv_' + key] = getEncodedMV(results[i][key])
|
|
results[i][key] = mvdelim.join(results[i][key])
|
|
for k in list(results[i].keys()): # We wrapped the call to keys() in a list() for py3's dictionary changed size during iteration.
|
|
if k not in s:
|
|
s.add(k)
|
|
l.append(k)
|
|
#s.update(results[i].keys())
|
|
|
|
if fields is None:
|
|
h = l
|
|
else:
|
|
h = fields
|
|
|
|
if sys.version_info >= (3, 0):
|
|
outputfile = TextIOWrapper(outputfile, encoding = 'utf-8', write_through = True)
|
|
dw = csv.DictWriter(outputfile, h, extrasaction='ignore')
|
|
dw.writerow(dict(zip(h, h)))
|
|
dw.writerows(results)
|
|
if sys.version_info >= (3, 0):
|
|
outputfile.detach() # Don't close the underlying file
|
|
|
|
|
|
def outputStreamResults(results, version = "4.3", header = None, mvdelim = '\n', outputfile = None):
|
|
|
|
if outputfile is None:
|
|
outputfile = default_stdout_stream()
|
|
|
|
header_io = BytesIO()
|
|
header_str = b""
|
|
if header is not None:
|
|
outputResults(header, None, None, mvdelim, header_io)
|
|
header_str = header_io.getvalue()
|
|
header_io.close()
|
|
|
|
body_io = BytesIO()
|
|
body_str = b""
|
|
outputResults(results, None, None, mvdelim, body_io)
|
|
body_str = body_io.getvalue()
|
|
body_io.close()
|
|
|
|
if sys.version_info >= (3, 0):
|
|
version = version.encode()
|
|
outputfile.write(b"splunk %s,%d,%d\n" % (version, len(header_str), len(body_str)))
|
|
if len(header_str) > 0:
|
|
outputfile.write(header_str)
|
|
if len(body_str) > 0:
|
|
outputfile.write(body_str)
|
|
|
|
def generateErrorResults(errorStr):
|
|
'''
|
|
Generates a properly formatted error message for use by the
|
|
outputResults() method.
|
|
'''
|
|
h = ["ERROR"]
|
|
results = [ {"ERROR": errorStr} ]
|
|
outputfile = default_stdout_stream()
|
|
if sys.version_info >= (3, 0):
|
|
outputfile = TextIOWrapper(outputfile, encoding = 'utf-8', write_through = True)
|
|
dw = csv.DictWriter(outputfile, h)
|
|
dw.writerow(dict(zip(h, h)))
|
|
dw.writerows(results)
|
|
if sys.version_info >= (3, 0):
|
|
outputfile.detach() # Don't close the underlying file
|
|
# return [{"ERROR": errorStr}]
|
|
return None # legacy calls tried to use this value.
|
|
|
|
|
|
def readResults(input_buf = None, settings = None, has_header = True):
|
|
'''
|
|
Converts an Intersplunk-formatted file object into a dict
|
|
representation of the contained events.
|
|
'''
|
|
|
|
if input_buf is None:
|
|
if sys.version_info >= (3, 0):
|
|
input_buf = TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
|
|
else:
|
|
input_buf = sys.stdin
|
|
|
|
results = []
|
|
|
|
if settings is None:
|
|
settings = {} # dummy
|
|
|
|
if has_header:
|
|
# until we get a blank line, read "attr:val" lines, setting the values in 'settings'
|
|
attr = last_attr = None
|
|
while True:
|
|
line = input_buf.readline()
|
|
line = line[:-1] # remove lastcharacter(newline)
|
|
if len(line) == 0:
|
|
break
|
|
|
|
colon = line.find(':')
|
|
if colon < 0:
|
|
if last_attr:
|
|
settings[attr] = settings[attr] + '\n' + urllib_parse.unquote(line)
|
|
else:
|
|
continue
|
|
|
|
# extract it and set value in settings
|
|
last_attr = attr = line[:colon]
|
|
val = urllib_parse.unquote(line[colon+1:])
|
|
settings[attr] = val
|
|
|
|
csvr = csv.reader(input_buf)
|
|
header = []
|
|
first = True
|
|
mv_fields = []
|
|
for line in csvr:
|
|
if first:
|
|
header = line
|
|
first = False
|
|
# Check which fields are multivalued (for a field 'foo', '__mv_foo' also exists)
|
|
if MV_ENABLED:
|
|
for field in header:
|
|
if "__mv_" + field in header:
|
|
mv_fields.append(field)
|
|
continue
|
|
|
|
# need to maintain field order
|
|
import splunk.util as util
|
|
result = util.OrderedDict()
|
|
i = 0
|
|
for val in line:
|
|
result[header[i]] = val
|
|
i = i+1
|
|
|
|
for key in mv_fields:
|
|
mv_key = "__mv_" + key
|
|
if key in result and mv_key in result:
|
|
# Expand the value of __mv_[key] to a list, store it in key, and delete __mv_[key]
|
|
vals = []
|
|
if decodeMV(result[mv_key], vals):
|
|
result[key] = copy.deepcopy(vals)
|
|
if len(result[key]) == 1:
|
|
result[key] = result[key][0]
|
|
del result[mv_key]
|
|
|
|
results.append(result)
|
|
|
|
return results
|
|
|
|
|
|
def getOrganizedResults(input_str = None):
|
|
'''
|
|
Converts an Intersplunk-formatted file object into a dict
|
|
representation of the contained events, and returns a tuple of:
|
|
|
|
(results, dummyresults, settings)
|
|
|
|
"dummyresults" is always an empty list, and "settings" is always
|
|
an empty dict, since the change to csv stopped sending the
|
|
searchinfo. It has not been updated to store the auth token.
|
|
'''
|
|
|
|
settings = {}
|
|
dummyresults = []
|
|
|
|
results = readResults(input_str, settings)
|
|
|
|
return results, dummyresults, settings
|
|
|
|
|
|
def rawresultsToString(results):
|
|
'''
|
|
Extracts the raw event data from a result set and returns all of
|
|
them as a single CR-delimited string.
|
|
'''
|
|
|
|
# TODO: is this method still being used?
|
|
# TODO: this can be optimized by list comprehensions
|
|
rawresults = []
|
|
for result in results:
|
|
for k, v in result.items():
|
|
if k == "_raw":
|
|
rawresults.append(v)
|
|
resultstext = "\n".join(rawresults)
|
|
return resultstext
|
|
|
|
|
|
def win32_utf8_argv():
|
|
"""Uses shell32.GetCommandLineArgvW to get sys.argv as a list of UTF-8
|
|
strings.
|
|
|
|
Versions 2.5 and older of Python don't support Unicode in sys.argv on
|
|
Windows, with the underlying Windows API instead replacing multi-byte
|
|
characters with '?'.
|
|
|
|
Returns None on failure.
|
|
|
|
Example usage:
|
|
|
|
>>> def main(argv=None):
|
|
... if argv is None:
|
|
... argv = win32_utf8_argv() or sys.argv
|
|
...
|
|
"""
|
|
|
|
if sys.version_info >= (3, 0):
|
|
return sys.argv
|
|
|
|
try:
|
|
from ctypes import POINTER, byref, cdll, c_int, windll
|
|
from ctypes.wintypes import LPCWSTR, LPWSTR
|
|
|
|
GetCommandLineW = cdll.kernel32.GetCommandLineW
|
|
GetCommandLineW.argtypes = []
|
|
GetCommandLineW.restype = LPCWSTR
|
|
|
|
CommandLineToArgvW = windll.shell32.CommandLineToArgvW
|
|
CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
|
|
CommandLineToArgvW.restype = POINTER(LPWSTR)
|
|
|
|
cmd = GetCommandLineW()
|
|
|
|
argc = c_int(0)
|
|
argv = CommandLineToArgvW(cmd, byref(argc))
|
|
if argc.value > 0:
|
|
# Remove Python executable if present
|
|
if argc.value - len(sys.argv) == 1:
|
|
start = 1
|
|
else:
|
|
start = 0
|
|
return [argv[i].encode('utf-8') for i in
|
|
range(start, argc.value)]
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def getKeywordNewlineSafe(arg, argname):
|
|
argnamelen = len(argname)
|
|
if arg.startswith('"') and arg.endswith('"'):
|
|
arg = arg[1:-1]
|
|
if arg.startswith(argname):
|
|
# pick off just the search string and construct the list
|
|
# technically we could have gotten '::' or '==' and not just '='
|
|
if arg.startswith("%s::" % argname) or arg.startswith("%s==" % argname):
|
|
val = arg[argnamelen+2:]
|
|
else:
|
|
val = arg[argnamelen+1:]
|
|
return [(argname, '=', val)]
|
|
else:
|
|
return []
|
|
|
|
# from sys.argv, get key=value args as well as other plain keyword args (e.g. "file")
|
|
# decode the values if charset is provided
|
|
def getKeywordsAndOptions(charset=None):
|
|
keywords = []
|
|
kvs = {}
|
|
first = True
|
|
|
|
# SPL-30670 - handle unicode args specially in windows
|
|
argv = win32_utf8_argv() or sys.argv
|
|
|
|
# for each arg
|
|
for arg in argv:
|
|
if first:
|
|
first = False
|
|
continue
|
|
|
|
# ssquery could have newlines within the search, don't lose them - SPL-65995
|
|
if re.match( "\"?ssquery(::|={1,2})", arg.lower()):
|
|
matches = getKeywordNewlineSafe(arg, 'ssquery')
|
|
# message could have newlines within it, don't lose them
|
|
elif re.match( "\"?message(::|={1,2})", arg.lower()):
|
|
matches = getKeywordNewlineSafe(arg, 'message')
|
|
# footer could have newlines within it, don't lose them
|
|
elif re.match( "\"?footer(::|={1,2})", arg.lower()):
|
|
matches = getKeywordNewlineSafe(arg, 'footer')
|
|
else:
|
|
# handle case where arg is surrounded by quotes
|
|
# remove outter quotes and accept attr=<anything>
|
|
if arg.startswith('"') and arg.endswith('"'):
|
|
arg = arg[1:-1]
|
|
matches = re.findall(r'(?:^|\s+)([a-zA-Z0-9_-]+)\s*(::|==|=)\s*(.*)', arg)
|
|
else:
|
|
matches = re.findall(r'(?:^|\s+)([a-zA-Z0-9_-]+)\s*(::|==|=)\s*((?:[^"\s]+)|(?:"[^"]*"))', arg)
|
|
|
|
def needs_decoding(obj):
|
|
if sys.version_info >= (3, 0):
|
|
return isinstance(obj, bytes)
|
|
return isinstance(obj, str)
|
|
|
|
if len(matches) == 0:
|
|
if charset is not None and needs_decoding(arg):
|
|
arg = arg.decode(charset)
|
|
|
|
keywords.append(arg)
|
|
else:
|
|
# for each k=v match
|
|
for match in matches:
|
|
attr, eq, val = match
|
|
# put arg in a match
|
|
if charset is not None and needs_decoding(val):
|
|
kvs[attr] = val.decode(charset)
|
|
else:
|
|
kvs[attr] = val
|
|
return keywords, kvs
|