You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

159 lines
6.4 KiB

# Version 4.0
import sys,splunk.Intersplunk
import re
import urllib
import time
from builtins import range, filter
from splunk.util import cmp
from functools import cmp_to_key
"""
This script looks for query ids that overlap in time or have time gaps between.
max and min. The generated results only make sense for inputs that have a min/max
time and a query/search id specified. Currently the following fields are required:
(1) info_min_time
(2) info_max_time
(3) info_search_time
(4) search_name
Overlaps/gaps are found only between evetns with same info_search_name value, but
different info_search_id.vents that do not contain these fields are ignored. If no
events contain these fields an error is thrown.
"""
min_time_key = "info_min_time"
max_time_key = "info_max_time"
query_id_key = "info_search_time"
search_name_key = "search_name"
infinity = 10E200
# windows python infinity float don't play nice - so we parse our own floats
def parse_float(x):
if x == "+Infinity":
return infinity
return float(x)
def fToStr(x):
if x == infinity:
return "inf"
str = "%.4f" % x
str = str.rstrip('0')
if str.endswith('.'): str = str[0:len(str)-1]
return str
#used to sort result by search name then min_time
def result_compare(x, y):
x_min = parse_float(x[min_time_key])
y_min = parse_float(y[min_time_key])
ret = cmp(x[search_name_key], y[search_name_key])
if ret == 0 :
if x_min == y_min : return cmp(x[query_id_key], y[query_id_key])
elif x_min < y_min : return -1
else : return 1
return ret
def valid_result(x):
return min_time_key in x and max_time_key in x and query_id_key in x and search_name_key in x
def gapsOverlaps(results, findGaps):
toReturn = []
try:
#
# algorithm: (1) sort results (uniq qid) by search_name then min_time
# (2) iterate over the events and find :
# A. the first gap and
# B. all overlaping records
# in events that have the same search name [it doesn't make sense to find
# overlaps/gaps for query ids of different saved searches]
# Complexity: mn + nlog(n)
# n - the number of results,
# m - average number of overlapping results (total_pairwise_overlaps/n)
# prelim: (1) ensure results are valid, ie they contain the required fields
# (2) keep only results with unique query ids
results = filter(valid_result, results)
uniq_qid = {}
for r in results:
if r[query_id_key] not in uniq_qid:
uniq_qid[r[query_id_key]] = r
results = uniq_qid.values()
if len(results) == 0:
return splunk.Intersplunk.generateErrorResults("Invalid events. Events should contain fields: %s, %s, %s and %s" % (min_time_key, max_time_key, query_id_key, search_name_key))
#### algo start ####
#1. sort the results by min_time
results = sorted(results, key = cmp_to_key(result_compare))
results_cnt = len(results)
#2. iterate over all results looking for overlaps/gaps
for i in range(0, results_cnt-1):
p = results[i]
p_min = parse_float(p[min_time_key])
p_max = parse_float(p[max_time_key])
p_qid = p[query_id_key]
p_name = p[search_name_key]
# look for (1) all overlaps and (2) the first gap (in events with same search name)
# need the for loop to find *all* overlaps
k = 0
for j in range(i+1, results_cnt):
c = results[j]
if p_name != c[search_name_key]:
break
c_qid = c[query_id_key]
c_min = parse_float(c[min_time_key])
c_max = parse_float(c[max_time_key])
r = {}
if c_min < p_max :
r["_overlap_et"] = c_min
r["_overlap_lt"] = min(c_max, p_max)
r["overlap_earliest"] = time.ctime(c_min)
r["overlap_latest" ] = "+Infinity"
if not infinity == r["_overlap_lt"] : r["overlap_latest" ] = time.ctime(r["_overlap_lt"])
r["_overlap_lt"] = fToStr(r["_overlap_lt"])
r["_time"] = c_min
r["_raw"] = "Found overlap in saved search '%s' between search ids: '%s' and '%s' from '%s' to '%s'" % (p_name, p_qid, c_qid, r["overlap_earliest"], r["overlap_latest"])
# look for a gap only with the first/next result (ie first gap)
elif findGaps and k == 0 and c_min > p_max :
r["_gap_et"] = p_max
r["_gap_lt"] = c_min
r["gap_earliest"] = time.ctime(p_max)
r["gap_latest" ] = time.ctime(c_min)
r["_time"] = p_max
r["_raw"] = "Found gap in saved search '%s' between search ids: '%s' and '%s' from '%s' to '%s'" % (p_name, p_qid, c_qid, r["gap_earliest"], r["gap_latest"])
else:
break
if len(r) > 0:
r["_orig_sid1"] = p_qid
r["_orig_sid2"] = c_qid
if "source" in p : r["orig_source1"] = p["source"]
if "source" in c : r["orig_source2"] = c["source"]
r["_orig_et1"] = fToStr(p_min)
r["_orig_et2"] = fToStr(c_min)
r["_orig_lt1"] = fToStr(p_max)
r["_orig_lt2"] = fToStr(c_max)
toReturn.append(r)
k += 1
# reverse order latest -> earliest
toReturn = sorted(toReturn, key=cmp_to_key(lambda x,y: int(x["_time"]-y["_time"])), reverse=True)
except:
import traceback
stack = traceback.format_exc()
toReturn = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack))
return toReturn
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults()
splunk.Intersplunk.outputResults( gapsOverlaps(results, True) )

Powered by BW's shoe-string budget.