You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

419 lines
13 KiB

# Version 9.2.2.20240415
# DO NOT EDIT THIS FILE!
# Changes to default files will be lost on update and are difficult to
# manage and support.
#
# Please make any changes to system defaults by overriding them in
# apps or $SPLUNK_HOME/etc/system/local
# (See "Configuration file precedence" in the web documentation).
#
# To override a specific setting, copy the name of the stanza and
# setting to the file where you wish to override it.
#
# This file contains possible attributes and values you can use to
# configure transform in transforms.conf.
#
SOURCE_KEY = _raw
LOOKAHEAD = 4096
REGEX =
FORMAT =
MATCH_LIMIT = 100000
DEPTH_LIMIT = 1000
DEFAULT_VALUE =
DEST_KEY =
WRITE_META = False
MV_ADD = False
CLEAN_KEYS = True
CAN_OPTIMIZE = True
KEEP_EMPTY_VALS = False
[sendToTCP]
DEST_KEY = queue
REGEX = .
FORMAT = tcpOutQueue
[filetype]
REGEX = (?:[\w_]\.([A-Za-z]{2}\w)(?!\w))
[loglevel]
REGEX = (FATAL|ERROR|WARN|INFO|DEBUG|TRACE)
[loglevel-weblogic]
REGEX = #+<\w+ \d+, \d+ \d+:\d+:\d+ \w+ \w+> <(\w+)>
FORMAT = loglevel::$1
[os]
REGEX = (?i:(?<![a-z])(mac|windows|linux)(?![a-z]))
[browser]
REGEX = (?i:(?<![a-z])(netscape|mozilla|firefox|ie)(?![a-z]))
[language]
REGEX = (?i:(?<![a-z])(php|java|python|c\+\+|perl)(?![a-z]))
[ip]
REGEX = (?:(?<!\d)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?!\d))
[email]
REGEX = (?:(?<!\w)(\w[\w\-\.]+@\w[\w\-\.]+\.[a-z]{1,4}))
[exceptionclass]
REGEX = \sat ([\w\.$_-]+)\(
###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION START ###########
# When adding a new basic modular regex PLEASE add a comment that lists
# the fields that it extracts (named capturing groups), or whether it
# provides a placeholder for the group name as:
# Extracts: field1, field2....
#
[all_lazy]
REGEX = .*?
[all]
REGEX = .*
[nspaces]
# matches one or more NON space characters
REGEX = \S+
[alphas]
# matches a string containing only letters a-zA-Z
REGEX = [a-zA-Z]+
[alnums]
# matches a string containing letters + digits
REGEX = [a-zA-Z0-9]+
[qstring]
#matches a quoted "string" - extracts an unnamed variable - name MUST be provided as in [[qstring:name]]
# Extracts: empty-name-group (needs name)
REGEX = "(?<>[^"]*+)"
[sbstring]
#matches a string enclosed in [] - extracts an unnamed variable - name MUST be provided as in [[sbstring:name]]
# Extracts: empty-name-group (needs name)
REGEX = \[(?<>[^\]]*+)\]
[digits]
REGEX = \d+
[int]
# matches an integer or a hex number
REGEX = 0x[a-fA-F0-9]+|\d+
[float]
# matches a float (or an int)
REGEX = \d*\.\d+|[[int]]
[octet]
# this would match only numbers from 0-255 (one octet in an ip)
REGEX = (?:2(?:5[0-5]|[0-4][0-9])|[0-1][0-9][0-9]|[0-9][0-9]?)
[ipv4]
# matches a valid IPv4 optionally followed by :port_num the octets in the ip would also be validated 0-255 range
# Extracts: ip, port
REGEX = (?<ip>[[octet]](?:\.[[octet]]){3})(?::[[int:port]])?
[simple_url]
# matches a url of the form proto://domain.tld/uri
# Extracts: url, domain
REGEX = (?<url>\w++://(?<domain>[a-zA-Z0-9\-.:]++)(?:/[^\s"]*)?)
[url]
# matches a url of the form proto://domain.tld/uri
# Extracts: url, proto, domain, uri
REGEX = (?<url>[[alphas:proto]]://(?<domain>[a-zA-Z0-9\-.:]++)(?<uri>/[^\s"]*)?)
[simple_uri]
# matches a uri of the form /path/to/resource?query
# Extracts: uri, uri_path, uri_query
REGEX = (?<uri>(?<uri_path>[^\s\?"]++)(?:\\?(?<uri_query>[^\s"]+))?)
[uri]
# uri = path optionally followed by query [/this/path/file.js?query=part&other=var]
# path = root part followed by file [/root/part/file.part]
# Extracts: uri, uri_path, uri_root, uri_file, uri_query, uri_domain (optional if in proxy mode)
REGEX = (?<uri>(?:\w++://(?<uri_domain>[^/\s]++))?(?<uri_path>(?<uri_root>/+(?:[^\s\?;=/]*+/+)*)(?<uri_file>[^\s\?;=?/]*+))(?:\?(?<uri_query>[^\s"]+))?)
###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION END ###########
# EXAMPLE syslog header stripper
# This will just strip the time stamp
[syslog-header-stripper-ts]
REGEX = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s(.*)$
FORMAT = $1
DEST_KEY = _raw
# This will strip the syslog header (date stamp and host) from a syslog event
[syslog-header-stripper-ts-host]
REGEX = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s[^\s]*\s(.*)$
FORMAT = $1
DEST_KEY = _raw
# This will strip out date stamp, host, process with pid and just get the
# actual message
[syslog-header-stripper-ts-host-proc]
REGEX = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s.*?:\s(.*)$
FORMAT = $1
DEST_KEY = _raw
[syslog-host]
DEST_KEY = MetaData:Host
REGEX = :\d\d\s+(?:\d+\s+|(?:user|daemon|local.?)\.\w+\s+)*\[?(\w[\w\.\-]{2,})\]?\s
FORMAT = host::$1
[syslog-host-full]
DEST_KEY = MetaData:Host
REGEX = ^[^\:]*\d\d\:\d\d\:\d\d[^\:]*?\s((\d+\.\d+\.\d+\.\d+)|(\w[\w\.\-]{2,})(?=\s+[^\s\:]+\:))
FORMAT = host::$1
# These next three transforms date back to 'meta events', or the long-dead
# index-time transaction-like feature; they're left here in case someone is using them
# at search time to extract fields.
[log4-severity]
REGEX = .*?([A-Z]+) [\w\.]+ \-
FORMAT = severity::$1
[sendmail-pid]
REGEX = \[(\d+)\]
FORMAT = pid::$1
[sendmail-qid]
REGEX = sendmail\[\d+\]: (\w+):
FORMAT = qid::$1
#######
[cisco-codes]
REGEX = : (?i)%([a-z0-9_]+)-(?:[a-z0-9_]+-)?([0-7])-([a-z0-9_]+):
FORMAT = product::$1 code::$2 severity::$3
[syslog-process]
REGEX = \(([a-zA-Z0-9_]+)\)\[\d+\]:
FORMAT = process::$1
[was-trlog-code]
REGEX = ] ([a-fA-F0-9]{8})
FORMAT = code::$1
[weblogic-code]
REGEX = <BEA-([0-9]+)>
FORMAT = code::$1
[novell-groupwise-arrival]
# ARR MsgType,OriginUserID,MessageID,filename,OriginIDomain,OriginDomain,OriginPostOffice, PreviousHop,Size,Priority,TargApp,ReportStatus,ReportDestination,[destination{destnumber}, ]
REGEX = ARR ([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),(.*)$
FORMAT = $0 msgtype::$1 originuserid::$2 messageid::$3 filename::$4 originidomain::$5 origindomain::$6 originpostoffice::$7 previoushop::$8 size::$9 priority::$10 targapp::$11 reportstatus::$12 reportdestination::$13 destination::$14
DEST_KEY = _meta
[novell-groupwise-queue]
#QUE filename,NextHopType,NextHopName,[destnum,]
REGEX = QUE ([^,]*),([^,]*),([^,]*),(.*)$
FORMAT = $0 filename::$1 nexthoptype::$2 nexthopname::$3 destnum::$4
DEST_KEY = _meta
[novell-groupwise-transfer]
#TRN PeerName,filename,Size,SendTime
REGEX = QUE ([^,]*),([^,]*),([^,]*),([^,]*)$
FORMAT = $0 peername::$1 filename::$2 size::$3 sendtime::$4
DEST_KEY = _meta
######## access-extractions helpers start ########
# make sure to handle escaped quotes (\") inside the URI
[uri_seg]
REGEX = (?:\\"|[^\s\?/"])*+/++
[uri_root]
REGEX = /++(?<root>(?:\\"|[^\s\?/"])++)/++
[bc_domain]
REGEX = (?<domain>\w++://[^/\s"]++)
[bc_uri]
# backwards compatible uri regex
# uri = path optionally followed by query [/this/path/file.js?query=part&other=var]
# path = root part followed by file [/root/part/file.part]
# Extracts: uri, uri_path, root, file, uri_query, uri_domain (optional if in proxy mode)
REGEX = (?<uri>[[bc_domain:uri_]]?+(?<uri_path>[[uri_root]]?[[uri_seg]]*(?<file>[^\s\?/]+)?)(?:\?(?<uri_query>[^\s]*))?)
[reqstr]
REGEX = [^\s"]++
[access-request]
# very relaxed regex for extracting fields from the request
REGEX = "\s*+[[reqstr:method]]?(?:\s++[[bc_uri]](?:\s++[[reqstr:version]])*)?\s*+"
######## access-extractions helpers end ########
[access-extractions]
# matches access-common or access-combined apache logging formats
# Extracts: clientip, clientport, ident, user, req_time, method, uri, root, file, uri_domain, uri_query, version, status, bytes, referer_url, referer_domain, referer_proto, useragent, cookie, other (remaining chars)
# Note: referer is misspelled in purpose because that is the "official" spelling for "HTTP referer"
REGEX = ^[[nspaces:clientip]]\s++[[nspaces:ident]]\s++[[nspaces:user]]\s++[[sbstring:req_time]]\s++[[access-request]]\s++[[nspaces:status]]\s++[[nspaces:bytes]](?:\s++"(?<referer>[[bc_domain:referer_]]?+[^"]*+)"(?:\s++[[qstring:useragent]](?:\s++[[qstring:cookie]])?+)?+)?[[all:other]]
[splunk-access-extractions]
# splunk_access.log uses a slightly extended log format
REGEX = ^[[nspaces:clientip]]\s++[[nspaces:ident]]\s++[[nspaces:user]]\s++[[sbstring:req_time]]\s++[[access-request]]\s++[[nspaces:status]]\s++[[nspaces:bytes]](?:\s++"(?<referer>[[bc_domain:referer_]]?+[^"]*+)"(?:\s++[[qstring:useragent]](?:\s++[[nspaces:vhost]])?(?:\s++[[nspaces:requestid]])?)?)?[[all:other]]
[splunk-service-extractions]
REGEX = (?i)^(?:[^ ]* ){2}(?P<log_level>[^\s]*)\s+\[(?P<requestid>\w+)]\s+(?P<component>[^ ]+):(?P<line>\d+) - (?P<message>.+)
[syslog-extractions]
REGEX = \s([^\s\[]+)(?:\[(\d+)\])?:\s
FORMAT = process::$1 pid::$2
[sendmail-extractions]
REGEX = sendmail\[(\d+)\]: (\w+):
FORMAT = process::sendmail pid::$1 qid::$2
[splunkd-disassembler]
REGEX = ^\S+\s\S+\s+(\S+)\s+(\S+)
FORMAT = $0 level::$1 component::$2
DEST_KEY = _meta
[splunk_help]
DEST_KEY = _MetaData:Index
REGEX = .
FORMAT = help
[splunk_index_history]
DEST_KEY = _MetaData:Index
REGEX = .
FORMAT = history
[send_to_nullqueue]
DEST_KEY = queue
REGEX = .
FORMAT = nullQueue
[tcpdump-endpoints]
REGEX = (\d+\.\d+\.\d+\.\d+):(\d+) -> (\d+\.\d+\.\d+\.\d+):(\d+)
FORMAT = src_ip::$1 src_port::$2 dest_ip::$3 dest_port::$4
[colon-kv]
REGEX = (?<= )([A-Za-z]+): ?((0x[A-F\d]+)|\d+)(?= |\n|$)
FORMAT = $1::$2
[num-kv]
REGEX = ([A-Za-z_][\w_]*)[=:\s]+((0x[A-F\d]+)|[+-]?[\d.]+)
FORMAT = $1::$2
[colon-line]
REGEX = ^(\w+)\s*:[ \t]*(.*?)$
FORMAT = $1::$2
[bracket-space]
REGEX = \[(\S+) (.*?)\]
FORMAT = $1::$2
[db2]
REGEX = ([A-Z]+) *: (.*?)(?=\n|$| +[A-Z]+ *:)
FORMAT = $1::$2
# Example external lookup
[dnslookup]
python.version = latest
external_cmd = external_lookup.py clienthost clientip
fields_list = clienthost,clientip
[registry]
DELIMS="\n","=:"
#[guid_lookup]
#filename = guid_lookup.csv
#max_matches = 1
#min_matches = 1
#
#[sid_lookup]
#filename = sid_lookup.csv
#max_matches = 1
#min_matches = 1
[guid-to-translate]
REGEX = (?<guid_to_trans>\w{8}-\w{4}-\w{4}-\w{4}-\w{12})
MV_ADD = true
[wel-message]
REGEX = (?sm)^(?<_pre_msg>.+)\nMessage=(?<Message>.+)$
CLEAN_KEYS = false
[wel-eq-kv]
SOURCE_KEY = _pre_msg
DELIMS = "\n","="
MV_ADD = true
[wel-col-kv]
SOURCE_KEY = Message
REGEX = \n([^:\n\r]+):[ \t]++([^\n]*)
FORMAT = $1::$2
MV_ADD = true
[ad-kv]
REGEX = (?<_KEY_1>[\w-]+)=(?<_VAL_1>[^\r\n]*)
MV_ADD = true
[perfmon-kv]
DELIMS = "\n","="
[wmi-host]
REGEX = (?m)ComputerName=(.+)
DEST_KEY = MetaData:Host
FORMAT = host::$1
[wmi-override-host]
REGEX = (?m)wmi_hostname=(.+)
DEST_KEY = MetaData:Host
FORMAT = host::$1
[strip-winevt-linebreaker]
REGEX = (?s)^(.*)---splunk-wevt-end-of-event---
FORMAT = $1
DEST_KEY = _raw
[stash_extract]
DELIMS = ",", "="
CAN_OPTIMIZE = false
MV_ADD = true
CLEAN_KEYS = false
[set_sourcetype_to_stash]
REGEX = .
DEST_KEY = MetaData:Sourcetype
FORMAT = sourcetype::stash
[extract_spent]
REGEX = \s(?P<spent>\d+(\.\d+)?)ms$
[remote_searches_extractions_starting]
REGEX = ^[^=\n]*starting: search_id=(?<search_id>[^,]+), server=(?<server>[^,]+), active_searches=(?<active_searches>[^,]+), search='(?<search>.+)', remote_ttl=(?<remote_ttl>.+), apiStartTime='(?<apiStartTime>.+)', apiEndTime='(?<apiEndTime>.+)', savedsearch_name="(?<savedsearch_name>[^"]*)", isClusterPeer=(?<isCMSlave>[^,]+), bucketMapId=(?<bucketMapId>[^,]+), sidType=(?<sidType>[^,]+)$
# We need this for backward compatibility to make sure we can extract at least the fields that are common across all releases
# and events still appear in search results when those common fields are referenced
# It is used in case when 'remote_searches_extractions_starting' stanza for the current release fails on events generated
# by previous splunk releases because of the message format change.
# In particular it is the case for 8.1->8.2 and 8.2->9.0
[remote_searches_extractions_starting_fallback]
REGEX = ^[^=\n]*starting: search_id=(?<search_id>[^,]+), server=(?<server>[^,]+), active_searches=(?<active_searches>[^,]+), search='(?<search>.+)', remote_ttl=(?<remote_ttl>.+), apiStartTime='(?<apiStartTime>.+)', apiEndTime='(?<apiEndTime>.+)', savedsearch_name="(?<savedsearch_name>[^"]*)"
[remote_searches_extractions_terminated]
REGEX = ^[^=\n]*(closed|terminated): search_id=(?<search_id>[^,]+), server=(?<server>[^,]+), active_searches=(?<active_searches>[^,]+), elapsedTime=(?<elapsedTime>[^,]+), cpuTime=(?<cpuTime>[^,]+), search='(?<search>.+)', savedsearch_name="(?<savedsearch_name>[^"]*)", (drop_count=(?<drop_count>[^,]+), scan_count=(?<scan_count>[^,]+), eliminated_buckets=(?<eliminated_buckets>[^,]+), considered_events=(?<considered_events>[^,]+), decompressed_slices=(?<decompressed_slices>[^,]+), events_count=(?<events_count>[^,]+), total_slices=(?<total_slices>[^,]+), considered_buckets=(?<considered_buckets>[^,]+), search_rawdata_bucketcache_error=(?<rawdata_bucketcache_error>[^,]+), search_rawdata_bucketcache_miss=(?<rawdata_bucketcache_miss>[^,]+), search_index_bucketcache_error=(?<index_bucketcache_error>[^,]+), search_index_bucketcache_hit=(?<index_bucketcache_hit>[^,]+), search_index_bucketcache_miss=(?<index_bucketcache_miss>[^,]+), search_rawdata_bucketcache_hit=(?<rawdata_bucketcache_hit>[^,]+), search_rawdata_bucketcache_miss_wait=(?<rawdata_bucketcache_miss_wait>[^,]+), search_index_bucketcache_miss_wait=(?<index_bucketcache_miss_wait>.+))?$
[field_extraction]
REGEX = ([a-zA-Z0-9_\.]+)=\"?([a-zA-Z0-9_\.-]+)
FORMAT = $1::$2
REPEAT_MATCH = true
WRITE_META = true

Powered by BW's shoe-string budget.