You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

194 lines
8.6 KiB

import sys
import boto3
import json
import os
import base64
import time
from botocore.exceptions import ClientError
import splunk.conf_util
import splunk.clilib.cli_common as comm
SPLUNK_HOME_PATH = os.environ.get('SPLUNK_HOME', '/opt/splunk')
ARCHIVAL_BUCKET_OPTION_NAME = 'archive'
SSE_ALGORITHM = 'AES256'
def check_if_in_archive(client, bid, s3_bucket, key, plaintext):
""" Check if a given key is already in the bucket.
Log error and exit if found.
"""
try:
client.head_object(Bucket=s3_bucket, Key=key,
SSECustomerAlgorithm=SSE_ALGORITHM, SSECustomerKey=plaintext)
sys.exit(f'BucketId={bid} path={key} already exists!')
except ClientError as ce:
# not found is expected - we should not log errors in this case
if ce.response['Error']['Code'] != "404":
sys.stdout.write(f'Unable to check if bucket exists bid={bid} path={key} error={ce}')
pass
return True
if __name__ == "__main__":
# check parameters
if len(sys.argv) < 14:
sys.exit('missing arguments')
#required params
arg_index_name = sys.argv[1]
arg_bucket_path = sys.argv[2]
arg_remote_path = sys.argv[3]
arg_bucket_id = sys.argv[4]
arg_bucket_size = sys.argv[5]
arg_start_time = sys.argv[6]
arg_end_time = sys.argv[7]
arg_bucket_name = sys.argv[8]
arg_receipt_path = sys.argv[9]
arg_access_key = sys.argv[10]
arg_secret_key = sys.argv[11]
arg_region = sys.argv[12]
arg_table_name = sys.argv[13]
"""
The following flag is not currently used, but may need to be used in the future if new
changes to the DDAA feature require unique support in the AWS gov cloud or other
FIPS cloud-like environments. See SPL-168479
"""
is_on_gov_cloud = arg_region.startswith('us-gov')
DATA_ARCHIVE_CONF_PATH = os.path.join(SPLUNK_HOME_PATH, 'etc', comm.getAppDir(), '_cluster_admin', 'local', 'data_archive.conf')
if not os.path.exists(DATA_ARCHIVE_CONF_PATH): # noah indexers use a different location
DATA_ARCHIVE_CONF_PATH = os.path.join(SPLUNK_HOME_PATH, 'etc', 'system', 'local', 'data_archive.conf')
if not os.path.exists(DATA_ARCHIVE_CONF_PATH):
sys.exit('data_archive.conf not found at required path=' + DATA_ARCHIVE_CONF_PATH)
archival_bucket_name = splunk.conf_util.ConfigMap(DATA_ARCHIVE_CONF_PATH)['buckets'][ARCHIVAL_BUCKET_OPTION_NAME]
# get file list and encryption info from receipt.json
if not os.path.exists(arg_receipt_path):
sys.exit('failed to locate updated receipt.json: BucketId=' + arg_bucket_id)
fileList = ''
cipher_blob = ''
guid_context = ''
rawSize = ''
try:
with open(arg_receipt_path) as json_data:
data = json.load(json_data)
fileList = data["objects"]
cipher_blob = data["user_data"]["cipher_blob"]
guid_context = data["user_data"]["uploader_guid"]
rawSize = data["manifest"]["raw_size"]
except Exception as exc:
sys.exit('failed to get info from receipt.json: BucketId=' + arg_bucket_id + '; exception =' + str(exc))
plaintext = ''
try:
kms_client = boto3.client('kms', arg_region)
kms_response = kms_client.decrypt(CiphertextBlob=b"%s" % base64.b64decode(cipher_blob),EncryptionContext={'guid': guid_context},)
plaintext = kms_response["Plaintext"]
except Exception as exc:
sys.exit('failed to get customer key from receipt.json: BucketId=' + arg_bucket_id + '; exception =' + str(exc))
# copy data files in the bucket to staging folder, skip receipt.json
client = boto3.client('s3',
#aws_access_key_id = arg_access_key,
#aws_secret_access_key = arg_secret_key,
region_name = arg_region,
)
old_prefix = arg_remote_path
new_prefix = ''
try:
s = old_prefix.split('/', 1)
new_prefix = s[0] + '/' + s[1]
except Exception as exc:
sys.exit('failed to get staging path from bucket path: ' + arg_remote_path + '; exception =' + str(exc))
extra_args = {
'CopySourceSSECustomerAlgorithm': SSE_ALGORITHM,
'CopySourceSSECustomerKey': plaintext,
'SSECustomerAlgorithm': SSE_ALGORITHM,
'SSECustomerKey': plaintext,
}
try:
# perform check in the archive
archive_checked=False
for file in fileList:
if file['size'] == 0:
continue
cur_file = file['name'][1:]
cur_key = old_prefix + cur_file
if file.get('expand', False): # handle deletes
list_result = client.list_objects(Bucket=arg_bucket_name,
Prefix=cur_key,
Delimiter='/')
for r in list_result['Contents']:
if r['Key'].endswith('/'):
continue
old_source = {'Bucket': arg_bucket_name, 'Key': r['Key']}
if not archive_checked:
archive_checked = check_if_in_archive(client, arg_bucket_id, archival_bucket_name, r['Key'], plaintext)
response = client.copy(old_source, archival_bucket_name, r['Key'], ExtraArgs=extra_args)
else:
old_source = {'Bucket': arg_bucket_name, 'Key': cur_key}
new_key = new_prefix + cur_file
if not archive_checked:
archive_checked = check_if_in_archive(client, arg_bucket_id, archival_bucket_name, new_key, plaintext)
response = client.copy(old_source, archival_bucket_name, new_key, ExtraArgs=extra_args)
except ClientError as err:
sys.exit('failed to copy bucket to archival bucket: BucketId=' + arg_bucket_id + '; error=' + err.response['Error']['Message'])
except Exception as exc:
sys.exit('failed to copy bucket to archival bucket: BucketId=' + arg_bucket_id + '; exception =' + str(exc))
else:
sys.stdout.write('successfully copied bucket to archival bucket; ')
# upload receipt.json with restore flag
try:
receipt_key = new_prefix + '/receipt.json'
data = open(arg_receipt_path, 'rb')
client.put_object(Key=receipt_key, Bucket=archival_bucket_name, Body=data)
except ClientError as err:
sys.exit('failed to copy updated receipt.json to archival bucket: BucketId=' + arg_bucket_id + '; error=' + err.response['Error']['Message'])
except Exception as exc:
sys.exit('failed to copy updated receipt.json to archival bucket: BucketId=' + arg_bucket_id + '; exception =' + str(exc))
else:
sys.stdout.write('successfully uploaded receipt.json to archival bucket; ')
# write bucket info to dynamodb
dynamodb = boto3.resource('dynamodb',
region_name = arg_region,
#aws_access_key_id= arg_access_key,
#aws_secret_access_key= arg_secret_key,
)
try:
table = dynamodb.Table(arg_table_name)
except Exception as exc:
sys.exit('failed to get DynamoDB table: ' + arg_table_name + '; exception =' + str(exc))
cur_time = str(int(time.time())).zfill(10)
start_time = arg_start_time.zfill(10)
try:
response = table.put_item(Item={
'IndexName' : arg_index_name,
'BucketPath': arg_bucket_path,
'RemoteBucketPath': arg_remote_path,
'BucketId' : arg_bucket_id,
'StartTime' : int(arg_start_time),
'EndTime' : int(arg_end_time),
'BucketSize': int(arg_bucket_size),
'FileList' : json.dumps(fileList),
'RawSize' : int(rawSize),
'ArchiveTimeWithBucketID': cur_time + "_" + arg_bucket_id,
'StartTimeWithBucketID': start_time + "_" + arg_bucket_id,
'BucketTimeSpan': int(arg_end_time) - int(arg_start_time)
})
except ClientError as err:
sys.exit('failed to write bucket info to bucket history table: BucketId=' + arg_bucket_id + '; error=' + err.response['Error']['Message'])
except Exception as exc:
sys.exit('failed to write bucket info to bucket history table: BucketId=' + arg_bucket_id + '; exception =' + str(exc))
else:
sys.stdout.write('successfully wrote bucket info to bucket history table')

Powered by BW's shoe-string budget.