You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
3.2 KiB

# Version 4.0
import splunk.Intersplunk as si
import splunk.mining.FieldLearning as ifl
import re
import traceback
import splunk.mining.dcutils as dcu
logger = dcu.getLogger()
if __name__ == '__main__':
try:
keywords,options = si.getKeywordsAndOptions()
examples = options.get('examples', None)
badexamples = options.get('counterexamples', None)
fromfield = options.get('fromfield', '_raw')
maxtrainers = options.get('maxtrainers', '100')
msg = None
if examples != None and examples.startswith('"') and examples.endswith('"'):
examples = examples[1:-1]
if badexamples != None and badexamples.startswith('"') and badexamples.endswith('"'):
badexamples = badexamples[1:-1]
logger.info("erex run with examples: %s" % examples)
if len(keywords) == 0:
msg = "A required fieldname is missing"
elif examples == None:
msg = "Value for 'examples' is required"
else:
try:
maxtrainers = int(maxtrainers)
if maxtrainers < 1 or maxtrainers > 1000:
raise Exception()
except: msg = "Value for 'maxtrainers' must be an integer between 1-1000"
if msg != None:
si.generateErrorResults(msg)
exit(0)
messages = {}
results,dummyresults,settings = si.getOrganizedResults()
values = []
# for first N result used as training
for result in results[:maxtrainers]:
val = result.get(fromfield, None)
if val != None:
values.append(val)
examples = [ex.strip() for ex in examples.split(",")]
if badexamples == None:
badexamples = []
else:
badexamples = [ex.strip() for ex in badexamples.split(",")]
try:
regexes, extractions = ifl.learn(values, examples, badexamples)
except Exception as e:
# log error
stack = traceback.format_exc()
logger.error("Exception encountered: %s" % e)
logger.info("Traceback: %s" % stack)
# just tell user that we couldn't extract anything
regexes = ""
if len(regexes) == 0:
si.generateErrorResults('Unable to learn any extractions. Provide different examples, counterexamples, or searchresults')
exit(0)
rex = regexes[0]
rex = rex.replace("?P<FIELDNAME>", "?P<%s>" % keywords[0])
si.addInfoMessage(messages, 'Successfully learned regex. Consider using: | rex "%s"' % rex.replace('"', '\\"'))
# for each result
for result in results:
val = result.get(fromfield, None)
# match regex and put values in
match = re.search(rex, val)
if match:
extractions = match.groupdict()
for k,v in extractions.items():
result[k] = v
si.outputResults(results, messages)
except Exception as e:
stack = traceback.format_exc()
si.generateErrorResults("Error '%s'" % e)
logger.error("Exception encountered: %s" % e)
logger.info("Traceback: %s" % stack)

Powered by BW's shoe-string budget.