# Version # DO NOT EDIT THIS FILE! # Changes to default files will be lost on update and are difficult to # manage and support. # # Please make any changes to system defaults by overriding them in # apps or $SPLUNK_HOME/etc/system/local # (See "Configuration file precedence" in the web documentation). # # To override a specific setting, copy the name of the stanza and # setting to the file where you wish to override it. # # This file contains possible attributes and values you can use to # configure transform in transforms.conf. # SOURCE_KEY = _raw LOOKAHEAD = 4096 REGEX = FORMAT = MATCH_LIMIT = 100000 DEPTH_LIMIT = 1000 DEFAULT_VALUE = DEST_KEY = WRITE_META = False MV_ADD = False CLEAN_KEYS = True CAN_OPTIMIZE = True KEEP_EMPTY_VALS = False [sendToTCP] DEST_KEY = queue REGEX = . FORMAT = tcpOutQueue [filetype] REGEX = (?:[\w_]\.([A-Za-z]{2}\w)(?!\w)) [loglevel] REGEX = (FATAL|ERROR|WARN|INFO|DEBUG|TRACE) [loglevel-weblogic] REGEX = #+<\w+ \d+, \d+ \d+:\d+:\d+ \w+ \w+> <(\w+)> FORMAT = loglevel::$1 [os] REGEX = (?i:(?[^"]*+)" [sbstring] #matches a string enclosed in [] - extracts an unnamed variable - name MUST be provided as in [[sbstring:name]] # Extracts: empty-name-group (needs name) REGEX = \[(?<>[^\]]*+)\] [digits] REGEX = \d+ [int] # matches an integer or a hex number REGEX = 0x[a-fA-F0-9]+|\d+ [float] # matches a float (or an int) REGEX = \d*\.\d+|[[int]] [octet] # this would match only numbers from 0-255 (one octet in an ip) REGEX = (?:2(?:5[0-5]|[0-4][0-9])|[0-1][0-9][0-9]|[0-9][0-9]?) [ipv4] # matches a valid IPv4 optionally followed by :port_num the octets in the ip would also be validated 0-255 range # Extracts: ip, port REGEX = (?[[octet]](?:\.[[octet]]){3})(?::[[int:port]])? [simple_url] # matches a url of the form proto://domain.tld/uri # Extracts: url, domain REGEX = (?\w++://(?[a-zA-Z0-9\-.:]++)(?:/[^\s"]*)?) [url] # matches a url of the form proto://domain.tld/uri # Extracts: url, proto, domain, uri REGEX = (?[[alphas:proto]]://(?[a-zA-Z0-9\-.:]++)(?/[^\s"]*)?) [simple_uri] # matches a uri of the form /path/to/resource?query # Extracts: uri, uri_path, uri_query REGEX = (?(?[^\s\?"]++)(?:\\?(?[^\s"]+))?) [uri] # uri = path optionally followed by query [/this/path/file.js?query=part&other=var] # path = root part followed by file [/root/part/file.part] # Extracts: uri, uri_path, uri_root, uri_file, uri_query, uri_domain (optional if in proxy mode) REGEX = (?(?:\w++://(?[^/\s]++))?(?(?/+(?:[^\s\?;=/]*+/+)*)(?[^\s\?;=?/]*+))(?:\?(?[^\s"]+))?) ###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION END ########### # EXAMPLE syslog header stripper # This will just strip the time stamp [syslog-header-stripper-ts] REGEX = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s(.*)$ FORMAT = $1 DEST_KEY = _raw # This will strip the syslog header (date stamp and host) from a syslog event [syslog-header-stripper-ts-host] REGEX = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s[^\s]*\s(.*)$ FORMAT = $1 DEST_KEY = _raw # This will strip out date stamp, host, process with pid and just get the # actual message [syslog-header-stripper-ts-host-proc] REGEX = ^[A-Z][a-z]+\s+\d+\s\d+:\d+:\d+\s.*?:\s(.*)$ FORMAT = $1 DEST_KEY = _raw [syslog-host] DEST_KEY = MetaData:Host REGEX = :\d\d\s+(?:\d+\s+|(?:user|daemon|local.?)\.\w+\s+)*\[?(\w[\w\.\-]{2,})\]?\s FORMAT = host::$1 [syslog-host-full] DEST_KEY = MetaData:Host REGEX = ^[^\:]*\d\d\:\d\d\:\d\d[^\:]*?\s((\d+\.\d+\.\d+\.\d+)|(\w[\w\.\-]{2,})(?=\s+[^\s\:]+\:)) FORMAT = host::$1 # These next three transforms date back to 'meta events', or the long-dead # index-time transaction-like feature; they're left here in case someone is using them # at search time to extract fields. [log4-severity] REGEX = .*?([A-Z]+) [\w\.]+ \- FORMAT = severity::$1 [sendmail-pid] REGEX = \[(\d+)\] FORMAT = pid::$1 [sendmail-qid] REGEX = sendmail\[\d+\]: (\w+): FORMAT = qid::$1 ####### [cisco-codes] REGEX = : (?i)%([a-z0-9_]+)-(?:[a-z0-9_]+-)?([0-7])-([a-z0-9_]+): FORMAT = product::$1 code::$2 severity::$3 [syslog-process] REGEX = \(([a-zA-Z0-9_]+)\)\[\d+\]: FORMAT = process::$1 [was-trlog-code] REGEX = ] ([a-fA-F0-9]{8}) FORMAT = code::$1 [weblogic-code] REGEX = FORMAT = code::$1 [novell-groupwise-arrival] # ARR MsgType,OriginUserID,MessageID,filename,OriginIDomain,OriginDomain,OriginPostOffice, PreviousHop,Size,Priority,TargApp,ReportStatus,ReportDestination,[destination{destnumber}, ] REGEX = ARR ([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),(.*)$ FORMAT = $0 msgtype::$1 originuserid::$2 messageid::$3 filename::$4 originidomain::$5 origindomain::$6 originpostoffice::$7 previoushop::$8 size::$9 priority::$10 targapp::$11 reportstatus::$12 reportdestination::$13 destination::$14 DEST_KEY = _meta [novell-groupwise-queue] #QUE filename,NextHopType,NextHopName,[destnum,] REGEX = QUE ([^,]*),([^,]*),([^,]*),(.*)$ FORMAT = $0 filename::$1 nexthoptype::$2 nexthopname::$3 destnum::$4 DEST_KEY = _meta [novell-groupwise-transfer] #TRN PeerName,filename,Size,SendTime REGEX = QUE ([^,]*),([^,]*),([^,]*),([^,]*)$ FORMAT = $0 peername::$1 filename::$2 size::$3 sendtime::$4 DEST_KEY = _meta ######## access-extractions helpers start ######## # make sure to handle escaped quotes (\") inside the URI [uri_seg] REGEX = (?:\\"|[^\s\?/"])*+/++ [uri_root] REGEX = /++(?(?:\\"|[^\s\?/"])++)/++ [bc_domain] REGEX = (?\w++://[^/\s"]++) [bc_uri] # backwards compatible uri regex # uri = path optionally followed by query [/this/path/file.js?query=part&other=var] # path = root part followed by file [/root/part/file.part] # Extracts: uri, uri_path, root, file, uri_query, uri_domain (optional if in proxy mode) REGEX = (?[[bc_domain:uri_]]?+(?[[uri_root]]?[[uri_seg]]*(?[^\s\?/]+)?)(?:\?(?[^\s]*))?) [reqstr] REGEX = [^\s"]++ [access-request] # very relaxed regex for extracting fields from the request REGEX = "\s*+[[reqstr:method]]?(?:\s++[[bc_uri]](?:\s++[[reqstr:version]])*)?\s*+" ######## access-extractions helpers end ######## [access-extractions] # matches access-common or access-combined apache logging formats # Extracts: clientip, clientport, ident, user, req_time, method, uri, root, file, uri_domain, uri_query, version, status, bytes, referer_url, referer_domain, referer_proto, useragent, cookie, other (remaining chars) # Note: referer is misspelled in purpose because that is the "official" spelling for "HTTP referer" REGEX = ^[[nspaces:clientip]]\s++[[nspaces:ident]]\s++[[nspaces:user]]\s++[[sbstring:req_time]]\s++[[access-request]]\s++[[nspaces:status]]\s++[[nspaces:bytes]](?:\s++"(?[[bc_domain:referer_]]?+[^"]*+)"(?:\s++[[qstring:useragent]](?:\s++[[qstring:cookie]])?+)?+)?[[all:other]] [splunk-access-extractions] # splunk_access.log uses a slightly extended log format REGEX = ^[[nspaces:clientip]]\s++[[nspaces:ident]]\s++[[nspaces:user]]\s++[[sbstring:req_time]]\s++[[access-request]]\s++[[nspaces:status]]\s++[[nspaces:bytes]](?:\s++"(?[[bc_domain:referer_]]?+[^"]*+)"(?:\s++[[qstring:useragent]](?:\s++[[nspaces:vhost]])?(?:\s++[[nspaces:requestid]])?)?)?[[all:other]] [splunk-service-extractions] REGEX = (?i)^(?:[^ ]* ){2}(?P[^\s]*)\s+\[(?P\w+)]\s+(?P[^ ]+):(?P\d+) - (?P.+) [syslog-extractions] REGEX = \s([^\s\[]+)(?:\[(\d+)\])?:\s FORMAT = process::$1 pid::$2 [sendmail-extractions] REGEX = sendmail\[(\d+)\]: (\w+): FORMAT = process::sendmail pid::$1 qid::$2 [splunkd-disassembler] REGEX = ^\S+\s\S+\s+(\S+)\s+(\S+) FORMAT = $0 level::$1 component::$2 DEST_KEY = _meta [splunk_help] DEST_KEY = _MetaData:Index REGEX = . FORMAT = help [splunk_index_history] DEST_KEY = _MetaData:Index REGEX = . FORMAT = history [send_to_nullqueue] DEST_KEY = queue REGEX = . FORMAT = nullQueue [tcpdump-endpoints] REGEX = (\d+\.\d+\.\d+\.\d+):(\d+) -> (\d+\.\d+\.\d+\.\d+):(\d+) FORMAT = src_ip::$1 src_port::$2 dest_ip::$3 dest_port::$4 [colon-kv] REGEX = (?<= )([A-Za-z]+): ?((0x[A-F\d]+)|\d+)(?= |\n|$) FORMAT = $1::$2 [num-kv] REGEX = ([A-Za-z_][\w_]*)[=:\s]+((0x[A-F\d]+)|[+-]?[\d.]+) FORMAT = $1::$2 [colon-line] REGEX = ^(\w+)\s*:[ \t]*(.*?)$ FORMAT = $1::$2 [bracket-space] REGEX = \[(\S+) (.*?)\] FORMAT = $1::$2 [db2] REGEX = ([A-Z]+) *: (.*?)(?=\n|$| +[A-Z]+ *:) FORMAT = $1::$2 # Example external lookup [dnslookup] python.version = latest external_cmd = external_lookup.py clienthost clientip fields_list = clienthost,clientip [registry] DELIMS="\n","=:" #[guid_lookup] #filename = guid_lookup.csv #max_matches = 1 #min_matches = 1 # #[sid_lookup] #filename = sid_lookup.csv #max_matches = 1 #min_matches = 1 [guid-to-translate] REGEX = (?\w{8}-\w{4}-\w{4}-\w{4}-\w{12}) MV_ADD = true [wel-message] REGEX = (?sm)^(?<_pre_msg>.+)\nMessage=(?.+)$ CLEAN_KEYS = false [wel-eq-kv] SOURCE_KEY = _pre_msg DELIMS = "\n","=" MV_ADD = true [wel-col-kv] SOURCE_KEY = Message REGEX = \n([^:\n\r]+):[ \t]++([^\n]*) FORMAT = $1::$2 MV_ADD = true [ad-kv] REGEX = (?<_KEY_1>[\w-]+)=(?<_VAL_1>[^\r\n]*) MV_ADD = true [perfmon-kv] DELIMS = "\n","=" [wmi-host] REGEX = (?m)ComputerName=(.+) DEST_KEY = MetaData:Host FORMAT = host::$1 [wmi-override-host] REGEX = (?m)wmi_hostname=(.+) DEST_KEY = MetaData:Host FORMAT = host::$1 [strip-winevt-linebreaker] REGEX = (?s)^(.*)---splunk-wevt-end-of-event--- FORMAT = $1 DEST_KEY = _raw [stash_extract] DELIMS = ",", "=" CAN_OPTIMIZE = false MV_ADD = true CLEAN_KEYS = false [set_sourcetype_to_stash] REGEX = . DEST_KEY = MetaData:Sourcetype FORMAT = sourcetype::stash [extract_spent] REGEX = \s(?P\d+(\.\d+)?)ms$ [remote_searches_extractions_starting] REGEX = ^[^=\n]*starting: search_id=(?[^,]+), server=(?[^,]+), active_searches=(?[^,]+), search='(?.+)', remote_ttl=(?.+), apiStartTime='(?.+)', apiEndTime='(?.+)', savedsearch_name="(?[^"]*)", isClusterPeer=(?[^,]+), bucketMapId=(?[^,]+), sidType=(?[^,]+)$ # We need this for backward compatibility to make sure we can extract at least the fields that are common across all releases # and events still appear in search results when those common fields are referenced # It is used in case when 'remote_searches_extractions_starting' stanza for the current release fails on events generated # by previous splunk releases because of the message format change. # In particular it is the case for 8.1->8.2 and 8.2->9.0 [remote_searches_extractions_starting_fallback] REGEX = ^[^=\n]*starting: search_id=(?[^,]+), server=(?[^,]+), active_searches=(?[^,]+), search='(?.+)', remote_ttl=(?.+), apiStartTime='(?.+)', apiEndTime='(?.+)', savedsearch_name="(?[^"]*)" [remote_searches_extractions_terminated] REGEX = ^[^=\n]*(closed|terminated): search_id=(?[^,]+), server=(?[^,]+), active_searches=(?[^,]+), elapsedTime=(?[^,]+), cpuTime=(?[^,]+), search='(?.+)', savedsearch_name="(?[^"]*)", (drop_count=(?[^,]+), scan_count=(?[^,]+), eliminated_buckets=(?[^,]+), considered_events=(?[^,]+), decompressed_slices=(?[^,]+), events_count=(?[^,]+), total_slices=(?[^,]+), considered_buckets=(?[^,]+), search_rawdata_bucketcache_error=(?[^,]+), search_rawdata_bucketcache_miss=(?[^,]+), search_index_bucketcache_error=(?[^,]+), search_index_bucketcache_hit=(?[^,]+), search_index_bucketcache_miss=(?[^,]+), search_rawdata_bucketcache_hit=(?[^,]+), search_rawdata_bucketcache_miss_wait=(?[^,]+), search_index_bucketcache_miss_wait=(?.+))?$ [field_extraction] REGEX = ([a-zA-Z0-9_\.]+)=\"?([a-zA-Z0-9_\.-]+) FORMAT = $1::$2 REPEAT_MATCH = true WRITE_META = true