You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

131 lines
4.1 KiB

# Version 4.0
import sys,splunk.Intersplunk
import re
import urllib
ipre = re.compile(r"\d+\.\d+\.\d+\.\d+")
results = []
def charCount(text):
return len(text)
def stats(r):
"""returns the median, average, standard deviation, min and max of a sequence"""
tot = sum(r)
avg = tot/len(r)
sdsq = sum([(i-avg)**2 for i in r])
s = sorted(r)
return s[len(s)//2], avg, (sdsq/(len(r)-1 or 1))**.5, min(r), max(r)
# return stats on width
def widthStats(lines):
widths = [len(line) for line in lines]
return stats(widths)
# return stats on left margin
wsre = re.compile(r"\s*")
def marginStats(lines):
leftMargins = [wsre.match(line).end() for line in lines]
return stats(leftMargins)
# first character of each line. javastack: firstchars="[ttttt*" (t=tab). conffile: firstchars = "*[==*[=="
def firstChars(lines):
firstchars = [line[0] for line in lines]
return '"' + "".join(firstchars) + '"'
breakers = re.compile("[\\t ,;#$%&+./:=?@'|*(){}<>\\[\\]^!-]")
# take first punc of each line and give most common one.
def firstPuncts(lines):
firstpuncts = []
for line in lines:
match = breakers.search(line)
if match:
if match == '\t':
match = 't'
elif match == '\r':
match = 'r'
elif match == '\\':
match = 'r'
firstpuncts.append(match.group())
return '"' + "".join(firstpuncts) + '"'
def classCounts(text):
cisalnum = cisalpha = cislower = cisupper = cisdigit = cisspace = cother = 0
for c in text:
if c.isalnum():
cisalnum = cisalnum + 1
if c.isalpha():
cisalpha = cisalpha + 1
if c.islower():
cislower = cislower + 1
elif c.isupper():
cisupper = cisupper + 1
elif c.isdigit():
cisdigit = cisdigit + 1
if c.isspace():
cisspace = cisspace + 1
else:
cother = cother + 1
return [cisalnum, cisalpha, cislower, cisupper, cisdigit, cisspace, cother]
def blanklines(lines):
count = 0
for line in lines:
if len(line.strip()) == 0:
count = count + 1
return count
try:
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults()
PREFIX = "rawstat_"
for r in results:
if "_raw" in r:
raw = r["_raw"]
r['stat_len'] = len(raw)
lines = raw.split('\n')
TYPE = "width_"
median, avgval, stddev, minval, maxval = widthStats(lines)
r[PREFIX + TYPE + 'median'] = median
r[PREFIX + TYPE + 'avg'] = avgval
r[PREFIX + TYPE + 'stddev'] = stddev
r[PREFIX + TYPE + 'min'] = minval
r[PREFIX + TYPE + 'maxval'] = maxval
TYPE = "leftmargin_"
median, avgval, stddev, minval, maxval = marginStats(lines)
r[PREFIX + TYPE + 'median'] = median
r[PREFIX + TYPE + 'avg'] = avgval
r[PREFIX + TYPE + 'stddev'] = stddev
r[PREFIX + TYPE + 'min'] = minval
r[PREFIX + TYPE + 'maxval'] = maxval
r[PREFIX + 'blanklines'] = blanklines(lines)
# trucate at 50 chars
r[PREFIX + 'firstchars'] = firstChars(lines)[0:50]
r[PREFIX + 'firstpuncts'] = firstPuncts(lines)[0:50]
cisalnum, cisalpha, cislower, cisupper, cisdigit, cisspace, cother = classCounts(raw)
r[PREFIX + 'isalnum'] = cisalnum
r[PREFIX + 'isalpha'] = cisalpha
r[PREFIX + 'islower'] = cislower
r[PREFIX + 'isupper'] = cisupper
r[PREFIX + 'isdigit'] = cisdigit
r[PREFIX + 'isspace'] = cisspace
r[PREFIX + 'otherchar'] = cother
# most common char = "=,[],&"
except:
import traceback
stack = traceback.format_exc()
results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack))
splunk.Intersplunk.outputResults( results )

Powered by BW's shoe-string budget.