You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
419 lines
13 KiB
419 lines
13 KiB
# Version 9.2.2.20240415
|
|
# DO NOT EDIT THIS FILE!
|
|
# Changes to default files will be lost on update and are difficult to
|
|
# manage and support.
|
|
#
|
|
# Please make any changes to system defaults by overriding them in
|
|
# apps or $SPLUNK_HOME/etc/system/local
|
|
# (See "Configuration file precedence" in the web documentation).
|
|
#
|
|
# To override a specific setting, copy the name of the stanza and
|
|
# setting to the file where you wish to override it.
|
|
#
|
|
# This file contains possible attributes and values you can use to
|
|
# configure transform in transforms.conf.
|
|
#
|
|
|
|
SOURCE_KEY = _raw
|
|
LOOKAHEAD = 4096
|
|
REGEX =
|
|
FORMAT =
|
|
MATCH_LIMIT = 100000
|
|
DEPTH_LIMIT = 1000
|
|
DEFAULT_VALUE =
|
|
DEST_KEY =
|
|
WRITE_META = False
|
|
MV_ADD = False
|
|
CLEAN_KEYS = True
|
|
CAN_OPTIMIZE = True
|
|
KEEP_EMPTY_VALS = False
|
|
|
|
[sendToTCP]
|
|
DEST_KEY = queue
|
|
REGEX = .
|
|
FORMAT = tcpOutQueue
|
|
|
|
[filetype]
|
|
REGEX = (?:[\w_]\.([A-Za-z]{2}\w)(?!\w))
|
|
|
|
[loglevel]
|
|
REGEX = (FATAL|ERROR|WARN|INFO|DEBUG|TRACE)
|
|
|
|
[loglevel-weblogic]
|
|
REGEX = #+<\w+ \d+, \d+ \d+:\d+:\d+ \w+ \w+> <(\w+)>
|
|
FORMAT = loglevel::$1
|
|
|
|
[os]
|
|
REGEX = (?i:(?<![a-z])(mac|windows|linux)(?![a-z]))
|
|
|
|
[browser]
|
|
REGEX = (?i:(?<![a-z])(netscape|mozilla|firefox|ie)(?![a-z]))
|
|
|
|
|
|
[language]
|
|
REGEX = (?i:(?<![a-z])(php|java|python|c\+\+|perl)(?![a-z]))
|
|
|
|
[ip]
|
|
REGEX = (?:(?<!\d)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?!\d))
|
|
|
|
[email]
|
|
REGEX = (?:(?<!\w)(\w[\w\-\.]+@\w[\w\-\.]+\.[a-z]{1,4}))
|
|
|
|
[exceptionclass]
|
|
REGEX = \sat ([\w\.$_-]+)\(
|
|
|
|
|
|
###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION START ###########
|
|
# When adding a new basic modular regex PLEASE add a comment that lists
|
|
# the fields that it extracts (named capturing groups), or whether it
|
|
# provides a placeholder for the group name as:
|
|
# Extracts: field1, field2....
|
|
#
|
|
|
|
[all_lazy]
|
|
REGEX = .*?
|
|
|
|
[all]
|
|
REGEX = .*
|
|
|
|
[nspaces]
|
|
# matches one or more NON space characters
|
|
REGEX = \S+
|
|
|
|
[alphas]
|
|
# matches a string containing only letters a-zA-Z
|
|
REGEX = [a-zA-Z]+
|
|
|
|
[alnums]
|
|
# matches a string containing letters + digits
|
|
REGEX = [a-zA-Z0-9]+
|
|
|
|
[qstring]
|
|
#matches a quoted "string" - extracts an unnamed variable - name MUST be provided as in [[qstring:name]]
|
|
# Extracts: empty-name-group (needs name)
|
|
REGEX = "(?<>[^"]*+)"
|
|
|
|
[sbstring]
|
|
#matches a string enclosed in [] - extracts an unnamed variable - name MUST be provided as in [[sbstring:name]]
|
|
# Extracts: empty-name-group (needs name)
|
|
REGEX = \[(?<>[^\]]*+)\]
|
|
|
|
[digits]
|
|
REGEX = \d+
|
|
|
|
[int]
|
|
# matches an integer or a hex number
|
|
REGEX = 0x[a-fA-F0-9]+|\d+
|
|
|
|
[float]
|
|
# matches a float (or an int)
|
|
REGEX = \d*\.\d+|[[int]]
|
|
|
|
[octet]
|
|
# this would match only numbers from 0-255 (one octet in an ip)
|
|
REGEX = (?:2(?:5[0-5]|[0-4][0-9])|[0-1][0-9][0-9]|[0-9][0-9]?)
|
|
|
|
[ipv4]
|
|
# matches a valid IPv4 optionally followed by :port_num the octets in the ip would also be validated 0-255 range
|
|
# Extracts: ip, port
|
|
REGEX = (?<ip>[[octet]](?:\.[[octet]]){3})(?::[[int:port]])?
|
|
|
|
[simple_url]
|
|
# matches a url of the form proto://domain.tld/uri
|
|
# Extracts: url, domain
|
|
REGEX = (?<url>\w++://(?<domain>[a-zA-Z0-9\-.:]++)(?:/[^\s"]*)?)
|
|
|
|
[url]
|
|
# matches a url of the form proto://domain.tld/uri
|
|
# Extracts: url, proto, domain, uri
|
|
REGEX = (?<url>[[alphas:proto]]://(?<domain>[a-zA-Z0-9\-.:]++)(?<uri>/[^\s"]*)?)
|
|
|
|
[simple_uri]
|
|
# matches a uri of the form /path/to/resource?query
|
|
# Extracts: uri, uri_path, uri_query
|
|
REGEX = (?<uri>(?<uri_path>[^\s\?"]++)(?:\\?(?<uri_query>[^\s"]+))?)
|
|
|
|
[uri]
|
|
# uri = path optionally followed by query [/this/path/file.js?query=part&other=var]
|
|
# path = root part followed by file [/root/part/file.part]
|
|
# Extracts: uri, uri_path, uri_root, uri_file, uri_query, uri_domain (optional if in proxy mode)
|
|
REGEX = (?<uri>(?:\w++://(?<uri_domain>[^/\s]++))?(?<uri_path>(?<uri_root>/+(?:[^\s\?;=/]*+/+)*)(?<uri_file>[^\s\?;=?/]*+))(?:\?(?<uri_query>[^\s"]+))?)
|
|
|
|
|
|
###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION END ###########
|
|
|
|
|
|
# EXAMPLE syslog header stripper
|
|
|
|
# This will just strip the time stamp
|
|
[syslog-header-stripper-ts]
|
|
REGEX = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s(.*)$
|
|
FORMAT = $1
|
|
DEST_KEY = _raw
|
|
|
|
# This will strip the syslog header (date stamp and host) from a syslog event
|
|
[syslog-header-stripper-ts-host]
|
|
REGEX = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s[^\s]*\s(.*)$
|
|
FORMAT = $1
|
|
DEST_KEY = _raw
|
|
|
|
# This will strip out date stamp, host, process with pid and just get the
|
|
# actual message
|
|
[syslog-header-stripper-ts-host-proc]
|
|
REGEX = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s.*?:\s(.*)$
|
|
FORMAT = $1
|
|
DEST_KEY = _raw
|
|
|
|
|
|
[syslog-host]
|
|
DEST_KEY = MetaData:Host
|
|
REGEX = :\d\d\s+(?:\d+\s+|(?:user|daemon|local.?)\.\w+\s+)*\[?(\w[\w\.\-]{2,})\]?\s
|
|
FORMAT = host::$1
|
|
|
|
[syslog-host-full]
|
|
DEST_KEY = MetaData:Host
|
|
REGEX = ^[^\:]*\d\d\:\d\d\:\d\d[^\:]*?\s((\d+\.\d+\.\d+\.\d+)|(\w[\w\.\-]{2,})(?=\s+[^\s\:]+\:))
|
|
FORMAT = host::$1
|
|
|
|
|
|
# These next three transforms date back to 'meta events', or the long-dead
|
|
# index-time transaction-like feature; they're left here in case someone is using them
|
|
# at search time to extract fields.
|
|
|
|
[log4-severity]
|
|
REGEX = .*?([A-Z]+) [\w\.]+ \-
|
|
FORMAT = severity::$1
|
|
|
|
[sendmail-pid]
|
|
REGEX = \[(\d+)\]
|
|
FORMAT = pid::$1
|
|
|
|
[sendmail-qid]
|
|
REGEX = sendmail\[\d+\]: (\w+):
|
|
FORMAT = qid::$1
|
|
|
|
#######
|
|
|
|
[cisco-codes]
|
|
REGEX = : (?i)%([a-z0-9_]+)-(?:[a-z0-9_]+-)?([0-7])-([a-z0-9_]+):
|
|
FORMAT = product::$1 code::$2 severity::$3
|
|
|
|
[syslog-process]
|
|
REGEX = \(([a-zA-Z0-9_]+)\)\[\d+\]:
|
|
FORMAT = process::$1
|
|
|
|
[was-trlog-code]
|
|
REGEX = ] ([a-fA-F0-9]{8})
|
|
FORMAT = code::$1
|
|
|
|
[weblogic-code]
|
|
REGEX = <BEA-([0-9]+)>
|
|
FORMAT = code::$1
|
|
|
|
[novell-groupwise-arrival]
|
|
# ARR MsgType,OriginUserID,MessageID,filename,OriginIDomain,OriginDomain,OriginPostOffice, PreviousHop,Size,Priority,TargApp,ReportStatus,ReportDestination,[destination{destnumber}, ]
|
|
REGEX = ARR ([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),(.*)$
|
|
FORMAT = $0 msgtype::$1 originuserid::$2 messageid::$3 filename::$4 originidomain::$5 origindomain::$6 originpostoffice::$7 previoushop::$8 size::$9 priority::$10 targapp::$11 reportstatus::$12 reportdestination::$13 destination::$14
|
|
DEST_KEY = _meta
|
|
|
|
[novell-groupwise-queue]
|
|
#QUE filename,NextHopType,NextHopName,[destnum,]
|
|
REGEX = QUE ([^,]*),([^,]*),([^,]*),(.*)$
|
|
FORMAT = $0 filename::$1 nexthoptype::$2 nexthopname::$3 destnum::$4
|
|
DEST_KEY = _meta
|
|
|
|
[novell-groupwise-transfer]
|
|
#TRN PeerName,filename,Size,SendTime
|
|
REGEX = QUE ([^,]*),([^,]*),([^,]*),([^,]*)$
|
|
FORMAT = $0 peername::$1 filename::$2 size::$3 sendtime::$4
|
|
DEST_KEY = _meta
|
|
|
|
|
|
######## access-extractions helpers start ########
|
|
# make sure to handle escaped quotes (\") inside the URI
|
|
[uri_seg]
|
|
REGEX = (?:\\"|[^\s\?/"])*+/++
|
|
|
|
[uri_root]
|
|
REGEX = /++(?<root>(?:\\"|[^\s\?/"])++)/++
|
|
|
|
[bc_domain]
|
|
REGEX = (?<domain>\w++://[^/\s"]++)
|
|
|
|
[bc_uri]
|
|
# backwards compatible uri regex
|
|
# uri = path optionally followed by query [/this/path/file.js?query=part&other=var]
|
|
# path = root part followed by file [/root/part/file.part]
|
|
# Extracts: uri, uri_path, root, file, uri_query, uri_domain (optional if in proxy mode)
|
|
REGEX = (?<uri>[[bc_domain:uri_]]?+(?<uri_path>[[uri_root]]?[[uri_seg]]*(?<file>[^\s\?/]+)?)(?:\?(?<uri_query>[^\s]*))?)
|
|
|
|
[reqstr]
|
|
REGEX = [^\s"]++
|
|
|
|
[access-request]
|
|
# very relaxed regex for extracting fields from the request
|
|
REGEX = "\s*+[[reqstr:method]]?(?:\s++[[bc_uri]](?:\s++[[reqstr:version]])*)?\s*+"
|
|
|
|
######## access-extractions helpers end ########
|
|
|
|
[access-extractions]
|
|
# matches access-common or access-combined apache logging formats
|
|
# Extracts: clientip, clientport, ident, user, req_time, method, uri, root, file, uri_domain, uri_query, version, status, bytes, referer_url, referer_domain, referer_proto, useragent, cookie, other (remaining chars)
|
|
# Note: referer is misspelled in purpose because that is the "official" spelling for "HTTP referer"
|
|
REGEX = ^[[nspaces:clientip]]\s++[[nspaces:ident]]\s++[[nspaces:user]]\s++[[sbstring:req_time]]\s++[[access-request]]\s++[[nspaces:status]]\s++[[nspaces:bytes]](?:\s++"(?<referer>[[bc_domain:referer_]]?+[^"]*+)"(?:\s++[[qstring:useragent]](?:\s++[[qstring:cookie]])?+)?+)?[[all:other]]
|
|
|
|
[splunk-access-extractions]
|
|
# splunk_access.log uses a slightly extended log format
|
|
REGEX = ^[[nspaces:clientip]]\s++[[nspaces:ident]]\s++[[nspaces:user]]\s++[[sbstring:req_time]]\s++[[access-request]]\s++[[nspaces:status]]\s++[[nspaces:bytes]](?:\s++"(?<referer>[[bc_domain:referer_]]?+[^"]*+)"(?:\s++[[qstring:useragent]](?:\s++[[nspaces:vhost]])?(?:\s++[[nspaces:requestid]])?)?)?[[all:other]]
|
|
|
|
[splunk-service-extractions]
|
|
REGEX = (?i)^(?:[^ ]* ){2}(?P<log_level>[^\s]*)\s+\[(?P<requestid>\w+)]\s+(?P<component>[^ ]+):(?P<line>\d+) - (?P<message>.+)
|
|
|
|
[syslog-extractions]
|
|
REGEX = \s([^\s\[]+)(?:\[(\d+)\])?:\s
|
|
FORMAT = process::$1 pid::$2
|
|
|
|
[sendmail-extractions]
|
|
REGEX = sendmail\[(\d+)\]: (\w+):
|
|
FORMAT = process::sendmail pid::$1 qid::$2
|
|
|
|
[splunkd-disassembler]
|
|
REGEX = ^\S+\s\S+\s+(\S+)\s+(\S+)
|
|
FORMAT = $0 level::$1 component::$2
|
|
DEST_KEY = _meta
|
|
|
|
[splunk_help]
|
|
DEST_KEY = _MetaData:Index
|
|
REGEX = .
|
|
FORMAT = help
|
|
|
|
[splunk_index_history]
|
|
DEST_KEY = _MetaData:Index
|
|
REGEX = .
|
|
FORMAT = history
|
|
|
|
[send_to_nullqueue]
|
|
DEST_KEY = queue
|
|
REGEX = .
|
|
FORMAT = nullQueue
|
|
|
|
[tcpdump-endpoints]
|
|
REGEX = (\d+\.\d+\.\d+\.\d+):(\d+) -> (\d+\.\d+\.\d+\.\d+):(\d+)
|
|
FORMAT = src_ip::$1 src_port::$2 dest_ip::$3 dest_port::$4
|
|
|
|
[colon-kv]
|
|
REGEX = (?<= )([A-Za-z]+): ?((0x[A-F\d]+)|\d+)(?= |\n|$)
|
|
FORMAT = $1::$2
|
|
|
|
[num-kv]
|
|
REGEX = ([A-Za-z_][\w_]*)[=:\s]+((0x[A-F\d]+)|[+-]?[\d.]+)
|
|
FORMAT = $1::$2
|
|
|
|
[colon-line]
|
|
REGEX = ^(\w+)\s*:[ \t]*(.*?)$
|
|
FORMAT = $1::$2
|
|
|
|
[bracket-space]
|
|
REGEX = \[(\S+) (.*?)\]
|
|
FORMAT = $1::$2
|
|
|
|
[db2]
|
|
REGEX = ([A-Z]+) *: (.*?)(?=\n|$| +[A-Z]+ *:)
|
|
FORMAT = $1::$2
|
|
|
|
# Example external lookup
|
|
[dnslookup]
|
|
python.version = latest
|
|
external_cmd = external_lookup.py clienthost clientip
|
|
fields_list = clienthost,clientip
|
|
|
|
[registry]
|
|
DELIMS="\n","=:"
|
|
|
|
#[guid_lookup]
|
|
#filename = guid_lookup.csv
|
|
#max_matches = 1
|
|
#min_matches = 1
|
|
#
|
|
#[sid_lookup]
|
|
#filename = sid_lookup.csv
|
|
#max_matches = 1
|
|
#min_matches = 1
|
|
|
|
[guid-to-translate]
|
|
REGEX = (?<guid_to_trans>\w{8}-\w{4}-\w{4}-\w{4}-\w{12})
|
|
MV_ADD = true
|
|
|
|
[wel-message]
|
|
REGEX = (?sm)^(?<_pre_msg>.+)\nMessage=(?<Message>.+)$
|
|
CLEAN_KEYS = false
|
|
|
|
[wel-eq-kv]
|
|
SOURCE_KEY = _pre_msg
|
|
DELIMS = "\n","="
|
|
MV_ADD = true
|
|
|
|
[wel-col-kv]
|
|
SOURCE_KEY = Message
|
|
REGEX = \n([^:\n\r]+):[ \t]++([^\n]*)
|
|
FORMAT = $1::$2
|
|
MV_ADD = true
|
|
|
|
[ad-kv]
|
|
REGEX = (?<_KEY_1>[\w-]+)=(?<_VAL_1>[^\r\n]*)
|
|
MV_ADD = true
|
|
|
|
[perfmon-kv]
|
|
DELIMS = "\n","="
|
|
|
|
[wmi-host]
|
|
REGEX = (?m)ComputerName=(.+)
|
|
DEST_KEY = MetaData:Host
|
|
FORMAT = host::$1
|
|
|
|
[wmi-override-host]
|
|
REGEX = (?m)wmi_hostname=(.+)
|
|
DEST_KEY = MetaData:Host
|
|
FORMAT = host::$1
|
|
|
|
[strip-winevt-linebreaker]
|
|
REGEX = (?s)^(.*)---splunk-wevt-end-of-event---
|
|
FORMAT = $1
|
|
DEST_KEY = _raw
|
|
|
|
[stash_extract]
|
|
DELIMS = ",", "="
|
|
CAN_OPTIMIZE = false
|
|
MV_ADD = true
|
|
CLEAN_KEYS = false
|
|
|
|
|
|
[set_sourcetype_to_stash]
|
|
REGEX = .
|
|
DEST_KEY = MetaData:Sourcetype
|
|
FORMAT = sourcetype::stash
|
|
|
|
[extract_spent]
|
|
REGEX = \s(?P<spent>\d+(\.\d+)?)ms$
|
|
|
|
[remote_searches_extractions_starting]
|
|
REGEX = ^[^=\n]*starting: search_id=(?<search_id>[^,]+), server=(?<server>[^,]+), active_searches=(?<active_searches>[^,]+), search='(?<search>.+)', remote_ttl=(?<remote_ttl>.+), apiStartTime='(?<apiStartTime>.+)', apiEndTime='(?<apiEndTime>.+)', savedsearch_name="(?<savedsearch_name>[^"]*)", isClusterPeer=(?<isCMSlave>[^,]+), bucketMapId=(?<bucketMapId>[^,]+), sidType=(?<sidType>[^,]+)$
|
|
|
|
# We need this for backward compatibility to make sure we can extract at least the fields that are common across all releases
|
|
# and events still appear in search results when those common fields are referenced
|
|
# It is used in case when 'remote_searches_extractions_starting' stanza for the current release fails on events generated
|
|
# by previous splunk releases because of the message format change.
|
|
# In particular it is the case for 8.1->8.2 and 8.2->9.0
|
|
[remote_searches_extractions_starting_fallback]
|
|
REGEX = ^[^=\n]*starting: search_id=(?<search_id>[^,]+), server=(?<server>[^,]+), active_searches=(?<active_searches>[^,]+), search='(?<search>.+)', remote_ttl=(?<remote_ttl>.+), apiStartTime='(?<apiStartTime>.+)', apiEndTime='(?<apiEndTime>.+)', savedsearch_name="(?<savedsearch_name>[^"]*)"
|
|
|
|
|
|
[remote_searches_extractions_terminated]
|
|
REGEX = ^[^=\n]*(closed|terminated): search_id=(?<search_id>[^,]+), server=(?<server>[^,]+), active_searches=(?<active_searches>[^,]+), elapsedTime=(?<elapsedTime>[^,]+), cpuTime=(?<cpuTime>[^,]+), search='(?<search>.+)', savedsearch_name="(?<savedsearch_name>[^"]*)", (drop_count=(?<drop_count>[^,]+), scan_count=(?<scan_count>[^,]+), eliminated_buckets=(?<eliminated_buckets>[^,]+), considered_events=(?<considered_events>[^,]+), decompressed_slices=(?<decompressed_slices>[^,]+), events_count=(?<events_count>[^,]+), total_slices=(?<total_slices>[^,]+), considered_buckets=(?<considered_buckets>[^,]+), search_rawdata_bucketcache_error=(?<rawdata_bucketcache_error>[^,]+), search_rawdata_bucketcache_miss=(?<rawdata_bucketcache_miss>[^,]+), search_index_bucketcache_error=(?<index_bucketcache_error>[^,]+), search_index_bucketcache_hit=(?<index_bucketcache_hit>[^,]+), search_index_bucketcache_miss=(?<index_bucketcache_miss>[^,]+), search_rawdata_bucketcache_hit=(?<rawdata_bucketcache_hit>[^,]+), search_rawdata_bucketcache_miss_wait=(?<rawdata_bucketcache_miss_wait>[^,]+), search_index_bucketcache_miss_wait=(?<index_bucketcache_miss_wait>.+))?$
|
|
|
|
[field_extraction]
|
|
REGEX = ([a-zA-Z0-9_\.]+)=\"?([a-zA-Z0-9_\.-]+)
|
|
FORMAT = $1::$2
|
|
REPEAT_MATCH = true
|
|
WRITE_META = true
|