You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

389 lines
14 KiB

6 months ago
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import copy
import math
from s3transfer.tasks import (
CompleteMultipartUploadTask,
CreateMultipartUploadTask,
SubmissionTask,
Task,
)
from s3transfer.utils import (
ChunksizeAdjuster,
calculate_range_parameter,
get_callbacks,
get_filtered_dict,
)
class CopySubmissionTask(SubmissionTask):
"""Task for submitting tasks to execute a copy"""
EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = {
'CopySourceIfMatch': 'IfMatch',
'CopySourceIfModifiedSince': 'IfModifiedSince',
'CopySourceIfNoneMatch': 'IfNoneMatch',
'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince',
'CopySourceSSECustomerKey': 'SSECustomerKey',
'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm',
'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5',
'RequestPayer': 'RequestPayer',
'ExpectedBucketOwner': 'ExpectedBucketOwner',
}
UPLOAD_PART_COPY_ARGS = [
'CopySourceIfMatch',
'CopySourceIfModifiedSince',
'CopySourceIfNoneMatch',
'CopySourceIfUnmodifiedSince',
'CopySourceSSECustomerKey',
'CopySourceSSECustomerAlgorithm',
'CopySourceSSECustomerKeyMD5',
'SSECustomerKey',
'SSECustomerAlgorithm',
'SSECustomerKeyMD5',
'RequestPayer',
'ExpectedBucketOwner',
]
CREATE_MULTIPART_ARGS_BLACKLIST = [
'CopySourceIfMatch',
'CopySourceIfModifiedSince',
'CopySourceIfNoneMatch',
'CopySourceIfUnmodifiedSince',
'CopySourceSSECustomerKey',
'CopySourceSSECustomerAlgorithm',
'CopySourceSSECustomerKeyMD5',
'MetadataDirective',
'TaggingDirective',
]
COMPLETE_MULTIPART_ARGS = [
'SSECustomerKey',
'SSECustomerAlgorithm',
'SSECustomerKeyMD5',
'RequestPayer',
'ExpectedBucketOwner',
]
def _submit(
self, client, config, osutil, request_executor, transfer_future
):
"""
:param client: The client associated with the transfer manager
:type config: s3transfer.manager.TransferConfig
:param config: The transfer config associated with the transfer
manager
:type osutil: s3transfer.utils.OSUtil
:param osutil: The os utility associated to the transfer manager
:type request_executor: s3transfer.futures.BoundedExecutor
:param request_executor: The request executor associated with the
transfer manager
:type transfer_future: s3transfer.futures.TransferFuture
:param transfer_future: The transfer future associated with the
transfer request that tasks are being submitted for
"""
# Determine the size if it was not provided
if transfer_future.meta.size is None:
# If a size was not provided figure out the size for the
# user. Note that we will only use the client provided to
# the TransferManager. If the object is outside of the region
# of the client, they may have to provide the file size themselves
# with a completely new client.
call_args = transfer_future.meta.call_args
head_object_request = (
self._get_head_object_request_from_copy_source(
call_args.copy_source
)
)
extra_args = call_args.extra_args
# Map any values that may be used in the head object that is
# used in the copy object
for param, value in extra_args.items():
if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING:
head_object_request[
self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING[param]
] = value
response = call_args.source_client.head_object(
**head_object_request
)
transfer_future.meta.provide_transfer_size(
response['ContentLength']
)
# If it is greater than threshold do a multipart copy, otherwise
# do a regular copy object.
if transfer_future.meta.size < config.multipart_threshold:
self._submit_copy_request(
client, config, osutil, request_executor, transfer_future
)
else:
self._submit_multipart_request(
client, config, osutil, request_executor, transfer_future
)
def _submit_copy_request(
self, client, config, osutil, request_executor, transfer_future
):
call_args = transfer_future.meta.call_args
# Get the needed progress callbacks for the task
progress_callbacks = get_callbacks(transfer_future, 'progress')
# Submit the request of a single copy.
self._transfer_coordinator.submit(
request_executor,
CopyObjectTask(
transfer_coordinator=self._transfer_coordinator,
main_kwargs={
'client': client,
'copy_source': call_args.copy_source,
'bucket': call_args.bucket,
'key': call_args.key,
'extra_args': call_args.extra_args,
'callbacks': progress_callbacks,
'size': transfer_future.meta.size,
},
is_final=True,
),
)
def _submit_multipart_request(
self, client, config, osutil, request_executor, transfer_future
):
call_args = transfer_future.meta.call_args
# Submit the request to create a multipart upload and make sure it
# does not include any of the arguments used for copy part.
create_multipart_extra_args = {}
for param, val in call_args.extra_args.items():
if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST:
create_multipart_extra_args[param] = val
create_multipart_future = self._transfer_coordinator.submit(
request_executor,
CreateMultipartUploadTask(
transfer_coordinator=self._transfer_coordinator,
main_kwargs={
'client': client,
'bucket': call_args.bucket,
'key': call_args.key,
'extra_args': create_multipart_extra_args,
},
),
)
# Determine how many parts are needed based on filesize and
# desired chunksize.
part_size = config.multipart_chunksize
adjuster = ChunksizeAdjuster()
part_size = adjuster.adjust_chunksize(
part_size, transfer_future.meta.size
)
num_parts = int(
math.ceil(transfer_future.meta.size / float(part_size))
)
# Submit requests to upload the parts of the file.
part_futures = []
progress_callbacks = get_callbacks(transfer_future, 'progress')
for part_number in range(1, num_parts + 1):
extra_part_args = self._extra_upload_part_args(
call_args.extra_args
)
# The part number for upload part starts at 1 while the
# range parameter starts at zero, so just subtract 1 off of
# the part number
extra_part_args['CopySourceRange'] = calculate_range_parameter(
part_size,
part_number - 1,
num_parts,
transfer_future.meta.size,
)
# Get the size of the part copy as well for the progress
# callbacks.
size = self._get_transfer_size(
part_size,
part_number - 1,
num_parts,
transfer_future.meta.size,
)
# Get the checksum algorithm of the multipart request.
checksum_algorithm = call_args.extra_args.get("ChecksumAlgorithm")
part_futures.append(
self._transfer_coordinator.submit(
request_executor,
CopyPartTask(
transfer_coordinator=self._transfer_coordinator,
main_kwargs={
'client': client,
'copy_source': call_args.copy_source,
'bucket': call_args.bucket,
'key': call_args.key,
'part_number': part_number,
'extra_args': extra_part_args,
'callbacks': progress_callbacks,
'size': size,
'checksum_algorithm': checksum_algorithm,
},
pending_main_kwargs={
'upload_id': create_multipart_future
},
),
)
)
complete_multipart_extra_args = self._extra_complete_multipart_args(
call_args.extra_args
)
# Submit the request to complete the multipart upload.
self._transfer_coordinator.submit(
request_executor,
CompleteMultipartUploadTask(
transfer_coordinator=self._transfer_coordinator,
main_kwargs={
'client': client,
'bucket': call_args.bucket,
'key': call_args.key,
'extra_args': complete_multipart_extra_args,
},
pending_main_kwargs={
'upload_id': create_multipart_future,
'parts': part_futures,
},
is_final=True,
),
)
def _get_head_object_request_from_copy_source(self, copy_source):
if isinstance(copy_source, dict):
return copy.copy(copy_source)
else:
raise TypeError(
'Expecting dictionary formatted: '
'{"Bucket": bucket_name, "Key": key} '
'but got %s or type %s.' % (copy_source, type(copy_source))
)
def _extra_upload_part_args(self, extra_args):
# Only the args in COPY_PART_ARGS actually need to be passed
# onto the upload_part_copy calls.
return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS)
def _extra_complete_multipart_args(self, extra_args):
return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
def _get_transfer_size(
self, part_size, part_index, num_parts, total_transfer_size
):
if part_index == num_parts - 1:
# The last part may be different in size then the rest of the
# parts.
return total_transfer_size - (part_index * part_size)
return part_size
class CopyObjectTask(Task):
"""Task to do a nonmultipart copy"""
def _main(
self, client, copy_source, bucket, key, extra_args, callbacks, size
):
"""
:param client: The client to use when calling PutObject
:param copy_source: The CopySource parameter to use
:param bucket: The name of the bucket to copy to
:param key: The name of the key to copy to
:param extra_args: A dictionary of any extra arguments that may be
used in the upload.
:param callbacks: List of callbacks to call after copy
:param size: The size of the transfer. This value is passed into
the callbacks
"""
client.copy_object(
CopySource=copy_source, Bucket=bucket, Key=key, **extra_args
)
for callback in callbacks:
callback(bytes_transferred=size)
class CopyPartTask(Task):
"""Task to upload a part in a multipart copy"""
def _main(
self,
client,
copy_source,
bucket,
key,
upload_id,
part_number,
extra_args,
callbacks,
size,
checksum_algorithm=None,
):
"""
:param client: The client to use when calling PutObject
:param copy_source: The CopySource parameter to use
:param bucket: The name of the bucket to upload to
:param key: The name of the key to upload to
:param upload_id: The id of the upload
:param part_number: The number representing the part of the multipart
upload
:param extra_args: A dictionary of any extra arguments that may be
used in the upload.
:param callbacks: List of callbacks to call after copy part
:param size: The size of the transfer. This value is passed into
the callbacks
:param checksum_algorithm: The algorithm that was used to create the multipart
upload
:rtype: dict
:returns: A dictionary representing a part::
{'Etag': etag_value, 'PartNumber': part_number}
This value can be appended to a list to be used to complete
the multipart upload. If a checksum is in the response,
it will also be included.
"""
response = client.upload_part_copy(
CopySource=copy_source,
Bucket=bucket,
Key=key,
UploadId=upload_id,
PartNumber=part_number,
**extra_args,
)
for callback in callbacks:
callback(bytes_transferred=size)
etag = response['CopyPartResult']['ETag']
part_metadata = {'ETag': etag, 'PartNumber': part_number}
if checksum_algorithm:
checksum_member = f'Checksum{checksum_algorithm.upper()}'
if checksum_member in response['CopyPartResult']:
part_metadata[checksum_member] = response['CopyPartResult'][
checksum_member
]
return part_metadata

Powered by BW's shoe-string budget.