# Version 9.2.2.20240415 # # This is an example transforms.conf. Use this file to create regexes and # rules for transforms. Use this file in tandem with props.conf. # # To use one or more of these configurations, copy the configuration block # into transforms.conf in $SPLUNK_HOME/etc/system/local/. You must restart # Splunk to enable configurations. # # To learn more about configuration files (including precedence) please see # the documentation located at # http://docs.splunk.com/Documentation/Splunk/latest/Admin/Aboutconfigurationfiles # Note: These are examples. Replace the values with your own customizations. # Indexed field: [netscreen-error] REGEX = device_id=\[w+\](?[^:]+) FORMAT = err_code::$1 WRITE_META = true # Override host: [hostoverride] DEST_KEY = MetaData:Host REGEX = \s(\w*)$ FORMAT = host::$1 # Extracted fields: [netscreen-error-field] REGEX = device_id=\[w+\](?[^:]+) FORMAT = err_code::$1 # Index-time evaluations: [discard-long-lines] INGEST_EVAL = queue=if(length(_raw) > 500, "nullQueue", "indexQueue") [split-into-sixteen-indexes-for-no-good-reason] INGEST_EVAL = index="split_" . substr(md5(_raw),1,1) [add-two-numeric-fields] INGEST_EVAL = loglen_raw=ln(length(_raw)), loglen_src=ln(length(source)) # In this example the Splunk platform only creates the new index-time field if # the hostname has a dot in it; assigning null() to a new field is a no-op: [add-hostdomain-field] INGEST_EVAL = hostdomain=if(host LIKE "%.%", replace(host,"^[^\\.]+\\.",""), null()) # Static lookup table [mylookuptable] filename = mytable.csv # One-to-one lookup guarantees that the Splunk platform outputs a single # lookup value for each input value. When no match exists, the Splunk platform # uses the value for "default_match", which by default is nothing. [mylook] filename = mytable.csv max_matches = 1 min_matches = 1 default_match = # Lookup and filter results: [myfilteredlookup] filename = mytable.csv filter = id<500 AND color="red" # external command lookup table: [myexternaltable] external_cmd = testadapter.py blah fields_list = foo bar # Temporal based static lookup table: [staticwtime] filename = mytable.csv time_field = timestamp time_format = %d/%m/%y %H:%M:%S # Mask sensitive data: [session-anonymizer] REGEX = (?m)^(.*)SessionId=\w+(\w{4}[&"].*)$ FORMAT = $1SessionId=########$2 DEST_KEY = _raw # Route to an alternate index: [AppRedirect] REGEX = (Application) DEST_KEY = _MetaData:Index FORMAT = Verbose # Extract comma-delimited values into fields: # This example assigns extracted values that do not have file names # from _raw to field1, field2 and field3, in the order that the # fields are extracted. #If the Splunk platform extracts more than three values that do not # have field names, then the Splunk platform ignores those values. [extract_csv] DELIMS = "," FIELDS = "field1", "field2", "field3" # This example extracts key-value pairs which are separated by '|' # while the key is delimited from value by '=' [pipe_eq] DELIMS = "|", "=" # This example extracts key-value pairs which are separated by '|' or # ';', while the key is delimited from value by '=' or ':' [multiple_delims] DELIMS = "|;", "=:" ###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION START ########### # When you add a new basic modular regex you must add a comment that # lists the fields that it extracts as named capturing groups. # If there are no field names, note the placeholders # for the group name as: Extracts: field1, field2.... [all_lazy] REGEX = .*? [all] REGEX = .* [nspaces] # Matches one or more NON space characters: REGEX = \S+ [alphas] # Matches a string containing only letters a-zA-Z: REGEX = [a-zA-Z]+ [alnums] # Matches a string containing letters + digits: REGEX = [a-zA-Z0-9]+ [qstring] # Matches a quoted "string" and extracts an unnamed variable # Name MUST be provided as: [[qstring:name]] # Extracts: empty-name-group (needs name) REGEX = "(?<>[^"]*+)" [sbstring] # Matches a string enclosed in [] and extracts an unnamed variable # Name must be provided as: [[sbstring:name]] # Extracts: empty-name-group (needs name) REGEX = \[(?<>[^\]]*+)\] [digits] REGEX = \d+ [int] # Matches an integer or a hex number: REGEX = 0x[a-fA-F0-9]+|\d+ [float] # Matches a float (or an int): REGEX = \d*\.\d+|[[int]] [octet] # Matches only numbers from 0-255 (one octet in an ip): REGEX = (?:2(?:5[0-5]|[0-4][0-9])|[0-1][0-9][0-9]|[0-9][0-9]?) [ipv4] # Matches a valid IPv4 optionally followed by :port_num. The octets in the IP # are also be validated in the 0-255 range. # Extracts: ip, port REGEX = (?[[octet]](?:\.[[octet]]){3})(?::[[int:port]])? [simple_url] # Matches a url of the form proto://domain.tld/uri # Extracts: url, domain REGEX = (?\w++://(?[a-zA-Z0-9\-.:]++)(?:/[^\s"]*)?) [url] # Matches a url in the form of: proto://domain.tld/uri # Extracts: url, proto, domain, uri REGEX = (?[[alphas:proto]]://(?[a-zA-Z0-9\-.:]++)(?/[^\s"]*)?) [simple_uri] # Matches a uri in the form of: /path/to/resource?query # Extracts: uri, uri_path, uri_query REGEX = (?(?[^\s\?"]++)(?:\\?(?[^\s"]+))?) [uri] # uri = path optionally followed by query [/this/path/file.js?query=part&other=var] # path = root part followed by file [/root/part/file.part] # Extracts: uri, uri_path, uri_root, uri_file, uri_query, uri_domain (optional if in proxy mode) REGEX = (?(?:\w++://(?[^/\s]++))?(?(?/+(?:[^\s\?;=/]*+/+)*)(?[^\s\?;=?/]*+))(?:\?(?[^\s"]+))?) [hide-ip-address] # When you make a clone of an event with the sourcetype masked_ip_address, the clone's # text is changed to mask the IP address. # The cloned event is further processed by index-time transforms and # SEDCMD expressions according to its new sourcetype. # In most scenarios an additional transform directs the # masked_ip_address event to a different index than the original data. REGEX = ^(.*?)src=\d+\.\d+\.\d+\.\d+(.*)$ FORMAT = $1src=XXXXX$2 DEST_KEY = _raw CLONE_SOURCETYPE = masked_ip_addresses # Set repeat_match to true to repeatedly match the regex in the data. # When repeat_match is set to true, regex is added as indexed # fields: a, b, c, d, e, etc. For example: 1483382050 a=1 b=2 c=3 d=4 e=5 # If repeat_match is not set, the match stops at a=1. [repeat_regex] REGEX = ([a-z])=(\d+) FORMAT = $1::$2 REPEAT_MATCH = true WRITE_META = true ###### BASIC MODULAR REGULAR EXPRESSIONS DEFINITION END ########### # Statsd dimensions extraction: # In most cases the Splunk platform needs only one regex to run per # sourcetype. By default the Splunk platform would look for the sourcetype # name in transforms.conf. There there is no need to provide # the STATSD-DIM-TRANSFORMS setting in props.conf. # For example, these two stanzas would extract dimensions as ipv4=10.2.3.4 # and os=windows from statsd data=mem.percent.used.10.2.3.4.windows:33|g [statsd-dims:regex_stanza1] REGEX = (?\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}) REMOVE_DIMS_FROM_METRIC_NAME = true [statsd-dims:regex_stanza2] REGEX = \S+\.(?\w+): REMOVE_DIMS_FROM_METRIC_NAME = true [statsd-dims:metric_sourcetype_name] # In this example, we extract both ipv4 and os dimension using a single regex: REGEX = (?\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})\.(?\w+): REMOVE_DIMS_FROM_METRIC_NAME = true # In this metrics example, we start with this log line: # # 01-26-2018 07:49:49.030 -0800 INFO Metrics - group=queue, name=aggqueue, max_size_kb=1024, current_size_kb=1, # current_size=3, largest_size=49, smallest_size=0, dc_latitude=37.3187706, dc_longitude=-121.9515042 # # The following stanza converts that single event into multiple metrics at # index-time. It deny lists the "dc_latitude" and "dc_longitude" dimensions, # which means they are omitted from the generated metric data points. It also # allow lists the "name" and "dc_latitude" dimensions, which means that those # dimensions potentially are the only dimensions that appear in the # generated metric data points. # When a log-to-metrics configuration simultaneously includes allow list and # deny list dimensions, the Splunk platform includes the dimensions that # appear in the allow list and also do not appear in the deny list # for the generated metric data points. For example, "dc_latitude" appears in # the allow list, but also in the deny list, so it is not included in the generated # metric data points. The metric data points generated by this configuration # have "name" as their sole dimension. [metric-schema:logtometrics] METRIC-SCHEMA-MEASURES-queue = max_size_kb,current_size_kb,current_size,largest_size,smallest_size METRIC-SCHEMA-BLACKLIST-DIMS-queue = dc_latitude,dc_longitude METRIC-SCHEMA-WHITELIST-DIMS-queue = name,dc_latitude # Here are the metrics generated by that stanza: # {'metric_name' : 'queue.max_size_kb', '_value' : 1024, 'name': 'aggqueue'}, # {'metric_name' : 'queue.current_size_kb, '_value' : 1, 'name': 'aggqueue'}, # {'metric_name' : 'queue.current_size', '_value' : 3, 'name': 'aggqueue'}, # {'metric_name' : 'queue.largest_size', '_value' : 49, 'name': 'aggqueue'}, # {'metric_name' : 'queue.smallest_size', '_value' : 0, 'name': 'aggqueue'} # You can use wildcard characters ('*') in METRIC-SCHEMA configurations. In # the preceding example, '*_size' matches 'current_size', 'largest_size', and # 'smallest_size'. The following configuration uses a wildcard to include all # three of those fields without individually listing each one. # METRIC-SCHEMA-MEASURES-queue = max_size_kb,current_size_kb,*_size # In the sample log above, group=queue represents the unique metric name prefix. Hence, it needs to be # formatted and saved as metric_name::queue for Splunk to identify queue as a metric name prefix. [extract_group] REGEX = group=(\w+) FORMAT = metric_name::$1 WRITE_META = true [extract_name] REGEX = name=(\w+) FORMAT = name::$1 WRITE_META = true [extract_max_size_kb] REGEX = max_size_kb=(\w+) FORMAT = max_size_kb::$1 WRITE_META = true [extract_current_size_kb] REGEX = current_size_kb=(\w+) FORMAT = current_size_kb::$1 WRITE_META = true [extract_current_size] REGEX = max_size_kb=(\w+) FORMAT = max_size_kb::$1 WRITE_META = true [extract_largest_size] REGEX = largest_size=(\w+) FORMAT = largest_size::$1 WRITE_META = true [extract_smallest_size] REGEX = smallest_size=(\w+) FORMAT = smallest_size::$1 WRITE_META = true