You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

499 lines
18 KiB

from __future__ import absolute_import
from __future__ import print_function
# Version 4.0
#
# Intersplunk provides simple access to the comm protocol between Splunk search
# operators.
#
# The intersplunk format is plain CSV, with a first-line field header.
#
from builtins import zip
from builtins import range
import csv
import sys
import copy
import re
if sys.version_info >= (3, 0):
from io import (BytesIO, TextIOWrapper, StringIO)
else:
from StringIO import StringIO
BytesIO = StringIO
from future.moves.urllib import parse as urllib_parse
import os
# set the maximum allowable CSV field size
#
# The default of the csv module is 128KB; upping to 10MB. See SPL-12117 for
# the background on issues surrounding field sizes.
# (this method is new in python 2.5)
csv.field_size_limit(10485760)
MV_ENABLED = True
def set_binary_mode(fileobj):
# Pylint can't handle platform-dependent code.
# pylint: disable-all
# This works around a design error in Intersplunk where it assumes that the
# bytes it writes to stdout will be identical to the bytes which are
# emitted.
# This is false on windows where \n is mapped to \r\n
# The typical solution is to simply open the file in binary mode, but stdout
# is already open, thus this hack
if sys.platform == 'win32':
import msvcrt
msvcrt.setmode(fileobj.fileno(), os.O_BINARY)
def default_stdout_stream():
if sys.version_info >= (3, 0):
return sys.stdout.buffer
set_binary_mode(sys.stdout)
return sys.stdout
def splunkHome():
import os
return os.path.normpath(os.environ["SPLUNK_HOME"])
def isGetInfo(args):
if (len(args) >= 2) and (args[1] == "__GETINFO__"):
newargs = [args[0]]
newargs.extend(args[2:])
return (True, newargs)
elif (len(args) >= 2) and (args[1] == "__EXECUTE__"):
newargs = [args[0]]
newargs.extend(args[2:])
return (False, newargs)
else: # invalid invocation, exit and return error message immediately
generateErrorResults("Unexpected first argument to script, expected '__GETINFO__' or '__EXECUTE__'.")
sys.exit()
def parseError(msg):
generateErrorResults(msg)
sys.exit()
def outputInfo(streaming, generating, retevs, reqsop, preop, timeorder=False, clear_req_fields=False, req_fields = None):
infodict = {
'streaming_preop' : preop,
'streaming' : '0',
'generating' : '0',
'retainsevents' : '0',
'requires_preop' : '0',
'generates_timeorder' : '0',
'overrides_timeorder' : '1',
'clear_required_fields' : '0' }
if streaming:
infodict['streaming'] = '1'
if generating:
infodict['generating'] = '1'
if timeorder:
infodict['generates_timeorder'] = '1'
else:
if timeorder:
infodict['overrides_timeorder'] = '0'
if retevs:
infodict['retainsevents'] = '1'
if reqsop:
infodict['requires_preop'] = '1'
if clear_req_fields:
infodict['clear_required_fields'] = '1'
if req_fields is not None and len(req_fields) > 0:
infodict['required_fields'] = req_fields
outputResults([ infodict ], mvdelim=',')
sys.exit()
'''
For multivalues, values are wrapped in '$' and separated using ';'
Literal '$' values are represented with'$$'
'''
def getEncodedMV(vals):
s = ""
for val in vals:
val = val.replace('$', '$$')
if len(s):
s += ';'
s += '$' + val + '$'
return s
def decodeMV(s, vals):
if len(s) == 0:
return False
tok = ""
inval = False
i = 0
while i < len(s):
if not inval:
if s[i] == '$':
inval = True
elif s[i] != ';':
return False
else:
if s[i] == '$' and i+1 < len(s) and s[i+1] == '$':
tok += '$'
i += 1
elif s[i] == '$':
inval = False
vals.append(tok)
tok = ""
else:
tok += s[i]
i += 1
return True
def addMessage(messages, msg, key):
if key not in messages:
messages[key] = []
messages[key].append(msg)
def addInfoMessage(messages, msg):
addMessage(messages, msg, "info_message")
def addWarnMessage(messages, msg):
addMessage(messages, msg, "warn_message")
def addErrorMessage(messages, msg):
addMessage(messages, msg, "error_message")
def outputResults(results, messages = None, fields = None, mvdelim = '\n', outputfile = None):
'''
Outputs the contents of a result set to STDOUT in Interplunk
format, for consumption by the next search processor.
'''
if outputfile is None:
outputfile = default_stdout_stream()
if messages is not None:
# message header is everything before the first empty line, similar to the input
# header format. also key = value, with stripping of whitespace
for level, messages in messages.items():
for msg in messages:
msg = "%s=%s\n" % (level, msg)
if sys.version_info >= (3, 0):
msg = msg.encode()
outputfile.write(msg)
outputfile.write(b"\n")
if results is None:
return
s = set()
l = []
'''
Check each entry to see if it is a list (multivalued). If so, set
the multivalued key to the proper encoding Replace the list with a
newline separated string of the values
'''
for i in range(len(results)):
for key in list(results[i].keys()): # We wrapped the call to keys() in a list() for py3's dictionary changed size during iteration.
if(isinstance(results[i][key], list)):
results[i]['__mv_' + key] = getEncodedMV(results[i][key])
results[i][key] = mvdelim.join(results[i][key])
for k in list(results[i].keys()): # We wrapped the call to keys() in a list() for py3's dictionary changed size during iteration.
if k not in s:
s.add(k)
l.append(k)
#s.update(results[i].keys())
if fields is None:
h = l
else:
h = fields
if sys.version_info >= (3, 0):
outputfile = TextIOWrapper(outputfile, encoding = 'utf-8', write_through = True)
dw = csv.DictWriter(outputfile, h, extrasaction='ignore')
dw.writerow(dict(zip(h, h)))
dw.writerows(results)
if sys.version_info >= (3, 0):
outputfile.detach() # Don't close the underlying file
def outputStreamResults(results, version = "4.3", header = None, mvdelim = '\n', outputfile = None):
if outputfile is None:
outputfile = default_stdout_stream()
header_io = BytesIO()
header_str = b""
if header is not None:
outputResults(header, None, None, mvdelim, header_io)
header_str = header_io.getvalue()
header_io.close()
body_io = BytesIO()
body_str = b""
outputResults(results, None, None, mvdelim, body_io)
body_str = body_io.getvalue()
body_io.close()
if sys.version_info >= (3, 0):
version = version.encode()
outputfile.write(b"splunk %s,%d,%d\n" % (version, len(header_str), len(body_str)))
if len(header_str) > 0:
outputfile.write(header_str)
if len(body_str) > 0:
outputfile.write(body_str)
def generateErrorResults(errorStr):
'''
Generates a properly formatted error message for use by the
outputResults() method.
'''
h = ["ERROR"]
results = [ {"ERROR": errorStr} ]
outputfile = default_stdout_stream()
if sys.version_info >= (3, 0):
outputfile = TextIOWrapper(outputfile, encoding = 'utf-8', write_through = True)
dw = csv.DictWriter(outputfile, h)
dw.writerow(dict(zip(h, h)))
dw.writerows(results)
if sys.version_info >= (3, 0):
outputfile.detach() # Don't close the underlying file
# return [{"ERROR": errorStr}]
return None # legacy calls tried to use this value.
def readResults(input_buf = None, settings = None, has_header = True):
'''
Converts an Intersplunk-formatted file object into a dict
representation of the contained events.
'''
if input_buf is None:
if sys.version_info >= (3, 0):
input_buf = TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
else:
input_buf = sys.stdin
results = []
if settings is None:
settings = {} # dummy
if has_header:
# until we get a blank line, read "attr:val" lines, setting the values in 'settings'
attr = last_attr = None
while True:
line = input_buf.readline()
line = line[:-1] # remove lastcharacter(newline)
if len(line) == 0:
break
colon = line.find(':')
if colon < 0:
if last_attr:
settings[attr] = settings[attr] + '\n' + urllib_parse.unquote(line)
else:
continue
# extract it and set value in settings
last_attr = attr = line[:colon]
val = urllib_parse.unquote(line[colon+1:])
settings[attr] = val
csvr = csv.reader(input_buf)
header = []
first = True
mv_fields = []
for line in csvr:
if first:
header = line
first = False
# Check which fields are multivalued (for a field 'foo', '__mv_foo' also exists)
if MV_ENABLED:
for field in header:
if "__mv_" + field in header:
mv_fields.append(field)
continue
# need to maintain field order
import splunk.util as util
result = util.OrderedDict()
i = 0
for val in line:
result[header[i]] = val
i = i+1
for key in mv_fields:
mv_key = "__mv_" + key
if key in result and mv_key in result:
# Expand the value of __mv_[key] to a list, store it in key, and delete __mv_[key]
vals = []
if decodeMV(result[mv_key], vals):
result[key] = copy.deepcopy(vals)
if len(result[key]) == 1:
result[key] = result[key][0]
del result[mv_key]
results.append(result)
return results
def getOrganizedResults(input_str = None):
'''
Converts an Intersplunk-formatted file object into a dict
representation of the contained events, and returns a tuple of:
(results, dummyresults, settings)
"dummyresults" is always an empty list, and "settings" is always
an empty dict, since the change to csv stopped sending the
searchinfo. It has not been updated to store the auth token.
'''
settings = {}
dummyresults = []
results = readResults(input_str, settings)
return results, dummyresults, settings
def rawresultsToString(results):
'''
Extracts the raw event data from a result set and returns all of
them as a single CR-delimited string.
'''
# TODO: is this method still being used?
# TODO: this can be optimized by list comprehensions
rawresults = []
for result in results:
for k, v in result.items():
if k == "_raw":
rawresults.append(v)
resultstext = "\n".join(rawresults)
return resultstext
def win32_utf8_argv():
"""Uses shell32.GetCommandLineArgvW to get sys.argv as a list of UTF-8
strings.
Versions 2.5 and older of Python don't support Unicode in sys.argv on
Windows, with the underlying Windows API instead replacing multi-byte
characters with '?'.
Returns None on failure.
Example usage:
>>> def main(argv=None):
... if argv is None:
... argv = win32_utf8_argv() or sys.argv
...
"""
if sys.version_info >= (3, 0):
return sys.argv
try:
from ctypes import POINTER, byref, cdll, c_int, windll
from ctypes.wintypes import LPCWSTR, LPWSTR
GetCommandLineW = cdll.kernel32.GetCommandLineW
GetCommandLineW.argtypes = []
GetCommandLineW.restype = LPCWSTR
CommandLineToArgvW = windll.shell32.CommandLineToArgvW
CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
CommandLineToArgvW.restype = POINTER(LPWSTR)
cmd = GetCommandLineW()
argc = c_int(0)
argv = CommandLineToArgvW(cmd, byref(argc))
if argc.value > 0:
# Remove Python executable if present
if argc.value - len(sys.argv) == 1:
start = 1
else:
start = 0
return [argv[i].encode('utf-8') for i in
range(start, argc.value)]
except Exception:
pass
def getKeywordNewlineSafe(arg, argname):
argnamelen = len(argname)
if arg.startswith('"') and arg.endswith('"'):
arg = arg[1:-1]
if arg.startswith(argname):
# pick off just the search string and construct the list
# technically we could have gotten '::' or '==' and not just '='
if arg.startswith("%s::" % argname) or arg.startswith("%s==" % argname):
val = arg[argnamelen+2:]
else:
val = arg[argnamelen+1:]
return [(argname, '=', val)]
else:
return []
# from sys.argv, get key=value args as well as other plain keyword args (e.g. "file")
# decode the values if charset is provided
def getKeywordsAndOptions(charset=None):
keywords = []
kvs = {}
first = True
# SPL-30670 - handle unicode args specially in windows
argv = win32_utf8_argv() or sys.argv
# for each arg
for arg in argv:
if first:
first = False
continue
# ssquery could have newlines within the search, don't lose them - SPL-65995
if re.match( "\"?ssquery(::|={1,2})", arg.lower()):
matches = getKeywordNewlineSafe(arg, 'ssquery')
# message could have newlines within it, don't lose them
elif re.match( "\"?message(::|={1,2})", arg.lower()):
matches = getKeywordNewlineSafe(arg, 'message')
# footer could have newlines within it, don't lose them
elif re.match( "\"?footer(::|={1,2})", arg.lower()):
matches = getKeywordNewlineSafe(arg, 'footer')
else:
# handle case where arg is surrounded by quotes
# remove outter quotes and accept attr=<anything>
if arg.startswith('"') and arg.endswith('"'):
arg = arg[1:-1]
matches = re.findall(r'(?:^|\s+)([a-zA-Z0-9_-]+)\s*(::|==|=)\s*(.*)', arg)
else:
matches = re.findall(r'(?:^|\s+)([a-zA-Z0-9_-]+)\s*(::|==|=)\s*((?:[^"\s]+)|(?:"[^"]*"))', arg)
def needs_decoding(obj):
if sys.version_info >= (3, 0):
return isinstance(obj, bytes)
return isinstance(obj, str)
if len(matches) == 0:
if charset is not None and needs_decoding(arg):
arg = arg.decode(charset)
keywords.append(arg)
else:
# for each k=v match
for match in matches:
attr, eq, val = match
# put arg in a match
if charset is not None and needs_decoding(val):
kvs[attr] = val.decode(charset)
else:
kvs[attr] = val
return keywords, kvs

Powered by BW's shoe-string budget.