Splunk_Docker/files/splunkbeta/etc/system/default/transforms.conf

#   Version 9.2.2.20240415
# DO NOT EDIT THIS FILE!
# Changes to default files will be lost on update and are difficult to
# manage and support.
#
# Please make any changes to system defaults by overriding them in
# apps or $SPLUNK_HOME/etc/system/local
# (See "Configuration file precedence" in the web documentation).
#
# To override a specific setting, copy the name of the stanza and
# setting to the file where you wish to override it.
#
# This file contains possible attributes and values you can use to
# configure transform in transforms.conf.
#

SOURCE_KEY = _raw
LOOKAHEAD = 4096
REGEX =
FORMAT =
MATCH_LIMIT = 100000
DEPTH_LIMIT = 1000
DEFAULT_VALUE =
DEST_KEY =
WRITE_META = False
MV_ADD = False
CLEAN_KEYS = True
CAN_OPTIMIZE = True
KEEP_EMPTY_VALS = False

[sendToTCP]
DEST_KEY = queue
REGEX = .
FORMAT = tcpOutQueue

[filetype]
REGEX = (?:[\w_]\.([A-Za-z]{2}\w)(?!\w))

[loglevel]
REGEX = (FATAL|ERROR|WARN|INFO|DEBUG|TRACE)

[loglevel-weblogic]
REGEX = #+<\w+ \d+, \d+ \d+:\d+:\d+ \w+ \w+> <(\w+)>
FORMAT = loglevel::$1

[os]
REGEX = (?i:(?<![a-z])(mac|windows|linux)(?![a-z]))

[browser]
REGEX = (?i:(?<![a-z])(netscape|mozilla|firefox|ie)(?![a-z]))


[language]
REGEX = (?i:(?<![a-z])(php|java|python|c\+\+|perl)(?![a-z]))

[ip]
REGEX = (?:(?<!\d)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?!\d))

[email]
REGEX = (?:(?<!\w)(\w[\w\-\.]+@\w[\w\-\.]+\.[a-z]{1,4}))

[exceptionclass]
REGEX = \sat ([\w\.$_-]+)\(


###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION START ###########
# When adding a new basic modular regex PLEASE add a comment that lists
# the fields that it extracts (named capturing groups), or whether it
# provides a placeholder for the group name as:
# Extracts: field1, field2....
#

[all_lazy]
REGEX = .*?

[all]
REGEX = .*

[nspaces]
# matches one or more NON space characters
REGEX = \S+

[alphas]
# matches a string containing only letters a-zA-Z
REGEX = [a-zA-Z]+

[alnums]
# matches a string containing letters + digits
REGEX = [a-zA-Z0-9]+

[qstring]
#matches a quoted "string" - extracts an unnamed variable - name MUST be provided as in [[qstring:name]]
# Extracts: empty-name-group (needs name)
REGEX = "(?<>[^"]*+)"

[sbstring]
#matches a string enclosed in [] - extracts an unnamed variable - name MUST be provided as in [[sbstring:name]]
# Extracts: empty-name-group (needs name)
REGEX = \[(?<>[^\]]*+)\]

[digits]
REGEX = \d+

[int]
# matches an integer or a hex number
REGEX = 0x[a-fA-F0-9]+|\d+

[float]
# matches a float (or an int)
REGEX = \d*\.\d+|[[int]]

[octet]
# this would match only numbers from 0-255 (one octet in an ip)
REGEX = (?:2(?:5[0-5]|[0-4][0-9])|[0-1][0-9][0-9]|[0-9][0-9]?)

[ipv4]
# matches a valid IPv4 optionally followed by :port_num the octets in the ip would also be validated 0-255 range
# Extracts: ip, port
REGEX = (?<ip>[[octet]](?:\.[[octet]]){3})(?::[[int:port]])?

[simple_url]
# matches a url of the form proto://domain.tld/uri
# Extracts: url, domain
REGEX = (?<url>\w++://(?<domain>[a-zA-Z0-9\-.:]++)(?:/[^\s"]*)?)

[url]
# matches a url of the form proto://domain.tld/uri
# Extracts: url, proto, domain, uri
REGEX = (?<url>[[alphas:proto]]://(?<domain>[a-zA-Z0-9\-.:]++)(?<uri>/[^\s"]*)?)

[simple_uri]
# matches a uri of the form /path/to/resource?query
# Extracts: uri, uri_path, uri_query
REGEX = (?<uri>(?<uri_path>[^\s\?"]++)(?:\\?(?<uri_query>[^\s"]+))?)

[uri]
# uri  = path optionally followed by query [/this/path/file.js?query=part&other=var]
# path = root part followed by file        [/root/part/file.part]
# Extracts: uri, uri_path, uri_root, uri_file, uri_query, uri_domain (optional if in proxy mode)
REGEX = (?<uri>(?:\w++://(?<uri_domain>[^/\s]++))?(?<uri_path>(?<uri_root>/+(?:[^\s\?;=/]*+/+)*)(?<uri_file>[^\s\?;=?/]*+))(?:\?(?<uri_query>[^\s"]+))?)


###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION END ###########


# EXAMPLE syslog header stripper

# This will just strip the time stamp
[syslog-header-stripper-ts]
REGEX         = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s(.*)$
FORMAT        = $1
DEST_KEY      = _raw

# This will strip the syslog header (date stamp and host) from a syslog event
[syslog-header-stripper-ts-host]
REGEX         = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s[^\s]*\s(.*)$
FORMAT        = $1
DEST_KEY      = _raw

# This will strip out date stamp, host, process with pid and just get the
# actual message
[syslog-header-stripper-ts-host-proc]
REGEX         = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s.*?:\s(.*)$
FORMAT        = $1
DEST_KEY      = _raw


[syslog-host]
DEST_KEY = MetaData:Host
REGEX = :\d\d\s+(?:\d+\s+|(?:user|daemon|local.?)\.\w+\s+)*\[?(\w[\w\.\-]{2,})\]?\s
FORMAT = host::$1

[syslog-host-full]
DEST_KEY = MetaData:Host
REGEX = ^[^\:]*\d\d\:\d\d\:\d\d[^\:]*?\s((\d+\.\d+\.\d+\.\d+)|(\w[\w\.\-]{2,})(?=\s+[^\s\:]+\:))
FORMAT = host::$1


# These next three transforms date back to 'meta events', or the long-dead
# index-time transaction-like feature; they're left here in case someone is using them
# at search time to extract fields.

[log4-severity]
REGEX = .*?([A-Z]+) [\w\.]+ \-
FORMAT = severity::$1

[sendmail-pid]
REGEX = \[(\d+)\]
FORMAT = pid::$1

[sendmail-qid]
REGEX = sendmail\[\d+\]: (\w+):
FORMAT = qid::$1

#######

[cisco-codes]
REGEX = : (?i)%([a-z0-9_]+)-(?:[a-z0-9_]+-)?([0-7])-([a-z0-9_]+):
FORMAT = product::$1 code::$2 severity::$3

[syslog-process]
REGEX = \(([a-zA-Z0-9_]+)\)\[\d+\]:
FORMAT = process::$1

[was-trlog-code]
REGEX = ] ([a-fA-F0-9]{8})
FORMAT = code::$1

[weblogic-code]
REGEX = <BEA-([0-9]+)>
FORMAT = code::$1

[novell-groupwise-arrival]
# ARR MsgType,OriginUserID,MessageID,filename,OriginIDomain,OriginDomain,OriginPostOffice, PreviousHop,Size,Priority,TargApp,ReportStatus,ReportDestination,[destination{destnumber},  ]
REGEX = ARR ([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),(.*)$
FORMAT = $0 msgtype::$1 originuserid::$2 messageid::$3 filename::$4 originidomain::$5 origindomain::$6 originpostoffice::$7  previoushop::$8 size::$9 priority::$10 targapp::$11 reportstatus::$12 reportdestination::$13 destination::$14
DEST_KEY = _meta

[novell-groupwise-queue]
#QUE filename,NextHopType,NextHopName,[destnum,]
REGEX = QUE ([^,]*),([^,]*),([^,]*),(.*)$
FORMAT = $0 filename::$1 nexthoptype::$2 nexthopname::$3 destnum::$4
DEST_KEY = _meta

[novell-groupwise-transfer]
#TRN PeerName,filename,Size,SendTime
REGEX = QUE ([^,]*),([^,]*),([^,]*),([^,]*)$
FORMAT = $0 peername::$1 filename::$2 size::$3 sendtime::$4
DEST_KEY = _meta


######## access-extractions helpers start ########
# make sure to handle escaped quotes (\") inside the URI
[uri_seg]
REGEX = (?:\\"|[^\s\?/"])*+/++

[uri_root]
REGEX = /++(?<root>(?:\\"|[^\s\?/"])++)/++

[bc_domain]
REGEX = (?<domain>\w++://[^/\s"]++)

[bc_uri]
# backwards compatible uri regex
# uri  = path optionally followed by query [/this/path/file.js?query=part&other=var]
# path = root part followed by file        [/root/part/file.part]
# Extracts: uri, uri_path, root, file, uri_query, uri_domain (optional if in proxy mode)
REGEX = (?<uri>[[bc_domain:uri_]]?+(?<uri_path>[[uri_root]]?[[uri_seg]]*(?<file>[^\s\?/]+)?)(?:\?(?<uri_query>[^\s]*))?)

[reqstr]
REGEX = [^\s"]++

[access-request]
# very relaxed regex for extracting fields from the request
REGEX = "\s*+[[reqstr:method]]?(?:\s++[[bc_uri]](?:\s++[[reqstr:version]])*)?\s*+"

######## access-extractions helpers end ########

[access-extractions]
# matches access-common or access-combined apache logging formats
# Extracts: clientip, clientport, ident, user, req_time, method, uri, root, file, uri_domain, uri_query, version, status, bytes, referer_url, referer_domain, referer_proto, useragent, cookie, other (remaining chars)
# Note: referer is misspelled in purpose because that is the "official" spelling for "HTTP referer"
REGEX = ^[[nspaces:clientip]]\s++[[nspaces:ident]]\s++[[nspaces:user]]\s++[[sbstring:req_time]]\s++[[access-request]]\s++[[nspaces:status]]\s++[[nspaces:bytes]](?:\s++"(?<referer>[[bc_domain:referer_]]?+[^"]*+)"(?:\s++[[qstring:useragent]](?:\s++[[qstring:cookie]])?+)?+)?[[all:other]]

[splunk-access-extractions]
# splunk_access.log uses a slightly extended log format
REGEX = ^[[nspaces:clientip]]\s++[[nspaces:ident]]\s++[[nspaces:user]]\s++[[sbstring:req_time]]\s++[[access-request]]\s++[[nspaces:status]]\s++[[nspaces:bytes]](?:\s++"(?<referer>[[bc_domain:referer_]]?+[^"]*+)"(?:\s++[[qstring:useragent]](?:\s++[[nspaces:vhost]])?(?:\s++[[nspaces:requestid]])?)?)?[[all:other]]

[splunk-service-extractions]
REGEX = (?i)^(?:[^ ]* ){2}(?P<log_level>[^\s]*)\s+\[(?P<requestid>\w+)]\s+(?P<component>[^ ]+):(?P<line>\d+) - (?P<message>.+)

[syslog-extractions]
REGEX = \s([^\s\[]+)(?:\[(\d+)\])?:\s
FORMAT = process::$1 pid::$2

[sendmail-extractions]
REGEX = sendmail\[(\d+)\]: (\w+):
FORMAT = process::sendmail pid::$1 qid::$2

[splunkd-disassembler]
REGEX = ^\S+\s\S+\s+(\S+)\s+(\S+)
FORMAT = $0 level::$1 component::$2
DEST_KEY = _meta

[splunk_help]
DEST_KEY = _MetaData:Index
REGEX = .
FORMAT = help

[splunk_index_history]
DEST_KEY   = _MetaData:Index
REGEX      = .
FORMAT     = history

[send_to_nullqueue]
DEST_KEY = queue
REGEX    = .
FORMAT   = nullQueue

[tcpdump-endpoints]
REGEX = (\d+\.\d+\.\d+\.\d+):(\d+) -> (\d+\.\d+\.\d+\.\d+):(\d+)
FORMAT = src_ip::$1 src_port::$2 dest_ip::$3 dest_port::$4

[colon-kv]
REGEX = (?<= )([A-Za-z]+): ?((0x[A-F\d]+)|\d+)(?= |\n|$)
FORMAT = $1::$2

[num-kv]
REGEX = ([A-Za-z_][\w_]*)[=:\s]+((0x[A-F\d]+)|[+-]?[\d.]+)
FORMAT = $1::$2

[colon-line]
REGEX = ^(\w+)\s*:[ \t]*(.*?)$
FORMAT = $1::$2

[bracket-space]
REGEX = \[(\S+) (.*?)\]
FORMAT = $1::$2

[db2]
REGEX = ([A-Z]+) *: (.*?)(?=\n|$| +[A-Z]+ *:)
FORMAT = $1::$2

# Example external lookup
[dnslookup]
python.version = latest
external_cmd = external_lookup.py clienthost clientip
fields_list = clienthost,clientip

[registry]
DELIMS="\n","=:"

#[guid_lookup]
#filename = guid_lookup.csv
#max_matches = 1
#min_matches = 1
#
#[sid_lookup]
#filename = sid_lookup.csv
#max_matches = 1
#min_matches = 1

[guid-to-translate]
REGEX = (?<guid_to_trans>\w{8}-\w{4}-\w{4}-\w{4}-\w{12})
MV_ADD = true

[wel-message]
REGEX = (?sm)^(?<_pre_msg>.+)\nMessage=(?<Message>.+)$
CLEAN_KEYS = false

[wel-eq-kv]
SOURCE_KEY = _pre_msg
DELIMS     = "\n","="
MV_ADD     = true

[wel-col-kv]
SOURCE_KEY = Message
REGEX      = \n([^:\n\r]+):[ \t]++([^\n]*)
FORMAT     = $1::$2
MV_ADD     = true

[ad-kv]
REGEX      = (?<_KEY_1>[\w-]+)=(?<_VAL_1>[^\r\n]*)
MV_ADD     = true

[perfmon-kv]
DELIMS     = "\n","="

[wmi-host]
REGEX = (?m)ComputerName=(.+)
DEST_KEY = MetaData:Host
FORMAT = host::$1

[wmi-override-host]
REGEX = (?m)wmi_hostname=(.+)
DEST_KEY = MetaData:Host
FORMAT = host::$1

[strip-winevt-linebreaker]
REGEX = (?s)^(.*)---splunk-wevt-end-of-event---
FORMAT = $1
DEST_KEY = _raw

[stash_extract]
DELIMS       = ",", "="
CAN_OPTIMIZE = false
MV_ADD       = true
CLEAN_KEYS   = false


[set_sourcetype_to_stash]
REGEX    = .
DEST_KEY = MetaData:Sourcetype
FORMAT   = sourcetype::stash

[extract_spent]
REGEX   = \s(?P<spent>\d+(\.\d+)?)ms$

[remote_searches_extractions_starting]
REGEX = ^[^=\n]*starting: search_id=(?<search_id>[^,]+), server=(?<server>[^,]+), active_searches=(?<active_searches>[^,]+), search='(?<search>.+)', remote_ttl=(?<remote_ttl>.+), apiStartTime='(?<apiStartTime>.+)', apiEndTime='(?<apiEndTime>.+)', savedsearch_name="(?<savedsearch_name>[^"]*)", isClusterPeer=(?<isCMSlave>[^,]+), bucketMapId=(?<bucketMapId>[^,]+), sidType=(?<sidType>[^,]+)$

# We need this for backward compatibility to make sure we can extract at least the fields that are common across all releases
# and events still appear in search results when those common fields are referenced
# It is used in case when 'remote_searches_extractions_starting' stanza for the current release fails on events generated
# by previous splunk releases because of the message format change.
# In particular it is the case for 8.1->8.2 and 8.2->9.0
[remote_searches_extractions_starting_fallback]
REGEX = ^[^=\n]*starting: search_id=(?<search_id>[^,]+), server=(?<server>[^,]+), active_searches=(?<active_searches>[^,]+), search='(?<search>.+)', remote_ttl=(?<remote_ttl>.+), apiStartTime='(?<apiStartTime>.+)', apiEndTime='(?<apiEndTime>.+)', savedsearch_name="(?<savedsearch_name>[^"]*)"


[remote_searches_extractions_terminated]
REGEX = ^[^=\n]*(closed|terminated): search_id=(?<search_id>[^,]+), server=(?<server>[^,]+), active_searches=(?<active_searches>[^,]+), elapsedTime=(?<elapsedTime>[^,]+), cpuTime=(?<cpuTime>[^,]+), search='(?<search>.+)', savedsearch_name="(?<savedsearch_name>[^"]*)", (drop_count=(?<drop_count>[^,]+), scan_count=(?<scan_count>[^,]+), eliminated_buckets=(?<eliminated_buckets>[^,]+), considered_events=(?<considered_events>[^,]+), decompressed_slices=(?<decompressed_slices>[^,]+), events_count=(?<events_count>[^,]+), total_slices=(?<total_slices>[^,]+), considered_buckets=(?<considered_buckets>[^,]+), search_rawdata_bucketcache_error=(?<rawdata_bucketcache_error>[^,]+), search_rawdata_bucketcache_miss=(?<rawdata_bucketcache_miss>[^,]+), search_index_bucketcache_error=(?<index_bucketcache_error>[^,]+), search_index_bucketcache_hit=(?<index_bucketcache_hit>[^,]+), search_index_bucketcache_miss=(?<index_bucketcache_miss>[^,]+), search_rawdata_bucketcache_hit=(?<rawdata_bucketcache_hit>[^,]+), search_rawdata_bucketcache_miss_wait=(?<rawdata_bucketcache_miss_wait>[^,]+), search_index_bucketcache_miss_wait=(?<index_bucketcache_miss_wait>.+))?$

[field_extraction]
REGEX = ([a-zA-Z0-9_\.]+)=\"?([a-zA-Z0-9_\.-]+)
FORMAT = $1::$2
REPEAT_MATCH = true
WRITE_META = true