You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
328 lines
10 KiB
328 lines
10 KiB
# Version 9.2.2.20240415
|
|
#
|
|
# This is an example transforms.conf. Use this file to create regexes and
|
|
# rules for transforms. Use this file in tandem with props.conf.
|
|
#
|
|
# To use one or more of these configurations, copy the configuration block
|
|
# into transforms.conf in $SPLUNK_HOME/etc/system/local/. You must restart
|
|
# Splunk to enable configurations.
|
|
#
|
|
# To learn more about configuration files (including precedence) please see
|
|
# the documentation located at
|
|
# http://docs.splunk.com/Documentation/Splunk/latest/Admin/Aboutconfigurationfiles
|
|
|
|
# Note: These are examples. Replace the values with your own customizations.
|
|
|
|
|
|
# Indexed field:
|
|
|
|
[netscreen-error]
|
|
REGEX = device_id=\[w+\](?<err_code>[^:]+)
|
|
FORMAT = err_code::$1
|
|
WRITE_META = true
|
|
|
|
|
|
# Override host:
|
|
|
|
[hostoverride]
|
|
DEST_KEY = MetaData:Host
|
|
REGEX = \s(\w*)$
|
|
FORMAT = host::$1
|
|
|
|
|
|
# Extracted fields:
|
|
|
|
[netscreen-error-field]
|
|
REGEX = device_id=\[w+\](?<err_code>[^:]+)
|
|
FORMAT = err_code::$1
|
|
|
|
# Index-time evaluations:
|
|
|
|
[discard-long-lines]
|
|
INGEST_EVAL = queue=if(length(_raw) > 500, "nullQueue", "indexQueue")
|
|
|
|
[split-into-sixteen-indexes-for-no-good-reason]
|
|
INGEST_EVAL = index="split_" . substr(md5(_raw),1,1)
|
|
|
|
[add-two-numeric-fields]
|
|
INGEST_EVAL = loglen_raw=ln(length(_raw)), loglen_src=ln(length(source))
|
|
|
|
# In this example the Splunk platform only creates the new index-time field if
|
|
# the hostname has a dot in it; assigning null() to a new field is a no-op:
|
|
|
|
[add-hostdomain-field]
|
|
INGEST_EVAL = hostdomain=if(host LIKE "%.%", replace(host,"^[^\\.]+\\.",""), null())
|
|
|
|
# Static lookup table
|
|
|
|
[mylookuptable]
|
|
filename = mytable.csv
|
|
|
|
# One-to-one lookup guarantees that the Splunk platform outputs a single
|
|
# lookup value for each input value. When no match exists, the Splunk platform
|
|
# uses the value for "default_match", which by default is nothing.
|
|
|
|
[mylook]
|
|
filename = mytable.csv
|
|
max_matches = 1
|
|
min_matches = 1
|
|
default_match =
|
|
|
|
# Lookup and filter results:
|
|
|
|
[myfilteredlookup]
|
|
filename = mytable.csv
|
|
filter = id<500 AND color="red"
|
|
|
|
# external command lookup table:
|
|
|
|
[myexternaltable]
|
|
external_cmd = testadapter.py blah
|
|
fields_list = foo bar
|
|
|
|
# Temporal based static lookup table:
|
|
|
|
[staticwtime]
|
|
filename = mytable.csv
|
|
time_field = timestamp
|
|
time_format = %d/%m/%y %H:%M:%S
|
|
|
|
# Mask sensitive data:
|
|
|
|
[session-anonymizer]
|
|
REGEX = (?m)^(.*)SessionId=\w+(\w{4}[&"].*)$
|
|
FORMAT = $1SessionId=########$2
|
|
DEST_KEY = _raw
|
|
|
|
# Route to an alternate index:
|
|
|
|
[AppRedirect]
|
|
REGEX = (Application)
|
|
DEST_KEY = _MetaData:Index
|
|
FORMAT = Verbose
|
|
|
|
# Extract comma-delimited values into fields:
|
|
# This example assigns extracted values that do not have file names
|
|
# from _raw to field1, field2 and field3, in the order that the
|
|
# fields are extracted.
|
|
#If the Splunk platform extracts more than three values that do not
|
|
# have field names, then the Splunk platform ignores those values.
|
|
|
|
[extract_csv]
|
|
DELIMS = ","
|
|
FIELDS = "field1", "field2", "field3"
|
|
|
|
# This example extracts key-value pairs which are separated by '|'
|
|
# while the key is delimited from value by '='
|
|
|
|
[pipe_eq]
|
|
DELIMS = "|", "="
|
|
|
|
# This example extracts key-value pairs which are separated by '|' or
|
|
# ';', while the key is delimited from value by '=' or ':'
|
|
|
|
[multiple_delims]
|
|
DELIMS = "|;", "=:"
|
|
|
|
|
|
###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION START ###########
|
|
# When you add a new basic modular regex you must add a comment that
|
|
# lists the fields that it extracts as named capturing groups.
|
|
# If there are no field names, note the placeholders
|
|
# for the group name as: Extracts: field1, field2....
|
|
|
|
[all_lazy]
|
|
REGEX = .*?
|
|
|
|
[all]
|
|
REGEX = .*
|
|
|
|
[nspaces]
|
|
# Matches one or more NON space characters:
|
|
REGEX = \S+
|
|
|
|
[alphas]
|
|
# Matches a string containing only letters a-zA-Z:
|
|
REGEX = [a-zA-Z]+
|
|
|
|
[alnums]
|
|
# Matches a string containing letters + digits:
|
|
REGEX = [a-zA-Z0-9]+
|
|
|
|
[qstring]
|
|
# Matches a quoted "string" and extracts an unnamed variable
|
|
# Name MUST be provided as: [[qstring:name]]
|
|
# Extracts: empty-name-group (needs name)
|
|
REGEX = "(?<>[^"]*+)"
|
|
|
|
[sbstring]
|
|
# Matches a string enclosed in [] and extracts an unnamed variable
|
|
# Name must be provided as: [[sbstring:name]]
|
|
# Extracts: empty-name-group (needs name)
|
|
REGEX = \[(?<>[^\]]*+)\]
|
|
|
|
[digits]
|
|
REGEX = \d+
|
|
|
|
[int]
|
|
# Matches an integer or a hex number:
|
|
REGEX = 0x[a-fA-F0-9]+|\d+
|
|
|
|
[float]
|
|
# Matches a float (or an int):
|
|
REGEX = \d*\.\d+|[[int]]
|
|
|
|
[octet]
|
|
# Matches only numbers from 0-255 (one octet in an ip):
|
|
REGEX = (?:2(?:5[0-5]|[0-4][0-9])|[0-1][0-9][0-9]|[0-9][0-9]?)
|
|
|
|
[ipv4]
|
|
# Matches a valid IPv4 optionally followed by :port_num. The octets in the IP
|
|
# are also be validated in the 0-255 range.
|
|
# Extracts: ip, port
|
|
REGEX = (?<ip>[[octet]](?:\.[[octet]]){3})(?::[[int:port]])?
|
|
|
|
[simple_url]
|
|
# Matches a url of the form proto://domain.tld/uri
|
|
# Extracts: url, domain
|
|
REGEX = (?<url>\w++://(?<domain>[a-zA-Z0-9\-.:]++)(?:/[^\s"]*)?)
|
|
|
|
[url]
|
|
# Matches a url in the form of: proto://domain.tld/uri
|
|
# Extracts: url, proto, domain, uri
|
|
REGEX = (?<url>[[alphas:proto]]://(?<domain>[a-zA-Z0-9\-.:]++)(?<uri>/[^\s"]*)?)
|
|
|
|
[simple_uri]
|
|
# Matches a uri in the form of: /path/to/resource?query
|
|
# Extracts: uri, uri_path, uri_query
|
|
REGEX = (?<uri>(?<uri_path>[^\s\?"]++)(?:\\?(?<uri_query>[^\s"]+))?)
|
|
|
|
[uri]
|
|
# uri = path optionally followed by query [/this/path/file.js?query=part&other=var]
|
|
# path = root part followed by file [/root/part/file.part]
|
|
# Extracts: uri, uri_path, uri_root, uri_file, uri_query, uri_domain (optional if in proxy mode)
|
|
REGEX = (?<uri>(?:\w++://(?<uri_domain>[^/\s]++))?(?<uri_path>(?<uri_root>/+(?:[^\s\?;=/]*+/+)*)(?<uri_file>[^\s\?;=?/]*+))(?:\?(?<uri_query>[^\s"]+))?)
|
|
|
|
[hide-ip-address]
|
|
# When you make a clone of an event with the sourcetype masked_ip_address, the clone's
|
|
# text is changed to mask the IP address.
|
|
# The cloned event is further processed by index-time transforms and
|
|
# SEDCMD expressions according to its new sourcetype.
|
|
# In most scenarios an additional transform directs the
|
|
# masked_ip_address event to a different index than the original data.
|
|
REGEX = ^(.*?)src=\d+\.\d+\.\d+\.\d+(.*)$
|
|
FORMAT = $1src=XXXXX$2
|
|
DEST_KEY = _raw
|
|
CLONE_SOURCETYPE = masked_ip_addresses
|
|
|
|
# Set repeat_match to true to repeatedly match the regex in the data.
|
|
# When repeat_match is set to true, regex is added as indexed
|
|
# fields: a, b, c, d, e, etc. For example: 1483382050 a=1 b=2 c=3 d=4 e=5
|
|
# If repeat_match is not set, the match stops at a=1.
|
|
[repeat_regex]
|
|
REGEX = ([a-z])=(\d+)
|
|
FORMAT = $1::$2
|
|
REPEAT_MATCH = true
|
|
WRITE_META = true
|
|
|
|
###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION END ###########
|
|
|
|
# Statsd dimensions extraction:
|
|
|
|
# In most cases the Splunk platform needs only one regex to run per
|
|
# sourcetype. By default the Splunk platform would look for the sourcetype
|
|
# name in transforms.conf. There there is no need to provide
|
|
# the STATSD-DIM-TRANSFORMS setting in props.conf.
|
|
|
|
# For example, these two stanzas would extract dimensions as ipv4=10.2.3.4
|
|
# and os=windows from statsd data=mem.percent.used.10.2.3.4.windows:33|g
|
|
[statsd-dims:regex_stanza1]
|
|
REGEX = (?<ipv4>\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})
|
|
REMOVE_DIMS_FROM_METRIC_NAME = true
|
|
|
|
[statsd-dims:regex_stanza2]
|
|
REGEX = \S+\.(?<os>\w+):
|
|
REMOVE_DIMS_FROM_METRIC_NAME = true
|
|
|
|
|
|
|
|
[statsd-dims:metric_sourcetype_name]
|
|
# In this example, we extract both ipv4 and os dimension using a single regex:
|
|
REGEX = (?<ipv4>\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})\.(?<os>\w+):
|
|
REMOVE_DIMS_FROM_METRIC_NAME = true
|
|
|
|
|
|
# In this metrics example, we start with this log line:
|
|
#
|
|
# 01-26-2018 07:49:49.030 -0800 INFO Metrics - group=queue, name=aggqueue, max_size_kb=1024, current_size_kb=1,
|
|
# current_size=3, largest_size=49, smallest_size=0, dc_latitude=37.3187706, dc_longitude=-121.9515042
|
|
#
|
|
# The following stanza converts that single event into multiple metrics at
|
|
# index-time. It deny lists the "dc_latitude" and "dc_longitude" dimensions,
|
|
# which means they are omitted from the generated metric data points. It also
|
|
# allow lists the "name" and "dc_latitude" dimensions, which means that those
|
|
# dimensions potentially are the only dimensions that appear in the
|
|
# generated metric data points.
|
|
# When a log-to-metrics configuration simultaneously includes allow list and
|
|
# deny list dimensions, the Splunk platform includes the dimensions that
|
|
# appear in the allow list and also do not appear in the deny list
|
|
# for the generated metric data points. For example, "dc_latitude" appears in
|
|
# the allow list, but also in the deny list, so it is not included in the generated
|
|
# metric data points. The metric data points generated by this configuration
|
|
# have "name" as their sole dimension.
|
|
[metric-schema:logtometrics]
|
|
METRIC-SCHEMA-MEASURES-queue = max_size_kb,current_size_kb,current_size,largest_size,smallest_size
|
|
METRIC-SCHEMA-BLACKLIST-DIMS-queue = dc_latitude,dc_longitude
|
|
METRIC-SCHEMA-WHITELIST-DIMS-queue = name,dc_latitude
|
|
|
|
# Here are the metrics generated by that stanza:
|
|
# {'metric_name' : 'queue.max_size_kb', '_value' : 1024, 'name': 'aggqueue'},
|
|
# {'metric_name' : 'queue.current_size_kb, '_value' : 1, 'name': 'aggqueue'},
|
|
# {'metric_name' : 'queue.current_size', '_value' : 3, 'name': 'aggqueue'},
|
|
# {'metric_name' : 'queue.largest_size', '_value' : 49, 'name': 'aggqueue'},
|
|
# {'metric_name' : 'queue.smallest_size', '_value' : 0, 'name': 'aggqueue'}
|
|
|
|
# You can use wildcard characters ('*') in METRIC-SCHEMA configurations. In
|
|
# the preceding example, '*_size' matches 'current_size', 'largest_size', and
|
|
# 'smallest_size'. The following configuration uses a wildcard to include all
|
|
# three of those fields without individually listing each one.
|
|
# METRIC-SCHEMA-MEASURES-queue = max_size_kb,current_size_kb,*_size
|
|
|
|
# In the sample log above, group=queue represents the unique metric name prefix. Hence, it needs to be
|
|
# formatted and saved as metric_name::queue for Splunk to identify queue as a metric name prefix.
|
|
[extract_group]
|
|
REGEX = group=(\w+)
|
|
FORMAT = metric_name::$1
|
|
WRITE_META = true
|
|
|
|
[extract_name]
|
|
REGEX = name=(\w+)
|
|
FORMAT = name::$1
|
|
WRITE_META = true
|
|
|
|
[extract_max_size_kb]
|
|
REGEX = max_size_kb=(\w+)
|
|
FORMAT = max_size_kb::$1
|
|
WRITE_META = true
|
|
|
|
[extract_current_size_kb]
|
|
REGEX = current_size_kb=(\w+)
|
|
FORMAT = current_size_kb::$1
|
|
WRITE_META = true
|
|
|
|
[extract_current_size]
|
|
REGEX = max_size_kb=(\w+)
|
|
FORMAT = max_size_kb::$1
|
|
WRITE_META = true
|
|
|
|
[extract_largest_size]
|
|
REGEX = largest_size=(\w+)
|
|
FORMAT = largest_size::$1
|
|
WRITE_META = true
|
|
|
|
[extract_smallest_size]
|
|
REGEX = smallest_size=(\w+)
|
|
FORMAT = smallest_size::$1
|
|
WRITE_META = true
|
|
|