You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

168 lines
6.2 KiB

6 months ago
# Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# https://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import logging
logger = logging.getLogger(__name__)
def register_table_methods(base_classes, **kwargs):
base_classes.insert(0, TableResource)
# This class can be used to add any additional methods we want
# onto a table resource. Ideally to avoid creating a new
# base class for every method we can just update this
# class instead. Just be sure to move the bulk of the
# actual method implementation to another class.
class TableResource:
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def batch_writer(self, overwrite_by_pkeys=None):
"""Create a batch writer object.
This method creates a context manager for writing
objects to Amazon DynamoDB in batch.
The batch writer will automatically handle buffering and sending items
in batches. In addition, the batch writer will also automatically
handle any unprocessed items and resend them as needed. All you need
to do is call ``put_item`` for any items you want to add, and
``delete_item`` for any items you want to delete.
Example usage::
with table.batch_writer() as batch:
for _ in range(1000000):
batch.put_item(Item={'HashKey': '...',
'Otherstuff': '...'})
# You can also delete_items in a batch.
batch.delete_item(Key={'HashKey': 'SomeHashKey'})
:type overwrite_by_pkeys: list(string)
:param overwrite_by_pkeys: De-duplicate request items in buffer
if match new request item on specified primary keys. i.e
``["partition_key1", "sort_key2", "sort_key3"]``
"""
return BatchWriter(
self.name, self.meta.client, overwrite_by_pkeys=overwrite_by_pkeys
)
class BatchWriter:
"""Automatically handle batch writes to DynamoDB for a single table."""
def __init__(
self, table_name, client, flush_amount=25, overwrite_by_pkeys=None
):
"""
:type table_name: str
:param table_name: The name of the table. The class handles
batch writes to a single table.
:type client: ``botocore.client.Client``
:param client: A botocore client. Note this client
**must** have the dynamodb customizations applied
to it for transforming AttributeValues into the
wire protocol. What this means in practice is that
you need to use a client that comes from a DynamoDB
resource if you're going to instantiate this class
directly, i.e
``boto3.resource('dynamodb').Table('foo').meta.client``.
:type flush_amount: int
:param flush_amount: The number of items to keep in
a local buffer before sending a batch_write_item
request to DynamoDB.
:type overwrite_by_pkeys: list(string)
:param overwrite_by_pkeys: De-duplicate request items in buffer
if match new request item on specified primary keys. i.e
``["partition_key1", "sort_key2", "sort_key3"]``
"""
self._table_name = table_name
self._client = client
self._items_buffer = []
self._flush_amount = flush_amount
self._overwrite_by_pkeys = overwrite_by_pkeys
def put_item(self, Item):
self._add_request_and_process({'PutRequest': {'Item': Item}})
def delete_item(self, Key):
self._add_request_and_process({'DeleteRequest': {'Key': Key}})
def _add_request_and_process(self, request):
if self._overwrite_by_pkeys:
self._remove_dup_pkeys_request_if_any(request)
self._items_buffer.append(request)
self._flush_if_needed()
def _remove_dup_pkeys_request_if_any(self, request):
pkey_values_new = self._extract_pkey_values(request)
for item in self._items_buffer:
if self._extract_pkey_values(item) == pkey_values_new:
self._items_buffer.remove(item)
logger.debug(
"With overwrite_by_pkeys enabled, skipping " "request:%s",
item,
)
def _extract_pkey_values(self, request):
if request.get('PutRequest'):
return [
request['PutRequest']['Item'][key]
for key in self._overwrite_by_pkeys
]
elif request.get('DeleteRequest'):
return [
request['DeleteRequest']['Key'][key]
for key in self._overwrite_by_pkeys
]
return None
def _flush_if_needed(self):
if len(self._items_buffer) >= self._flush_amount:
self._flush()
def _flush(self):
items_to_send = self._items_buffer[: self._flush_amount]
self._items_buffer = self._items_buffer[self._flush_amount :]
response = self._client.batch_write_item(
RequestItems={self._table_name: items_to_send}
)
unprocessed_items = response['UnprocessedItems']
if not unprocessed_items:
unprocessed_items = {}
item_list = unprocessed_items.get(self._table_name, [])
# Any unprocessed_items are immediately added to the
# next batch we send.
self._items_buffer.extend(item_list)
logger.debug(
"Batch write sent %s, unprocessed: %s",
len(items_to_send),
len(self._items_buffer),
)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, tb):
# When we exit, we need to keep flushing whatever's left
# until there's nothing left in our items buffer.
while self._items_buffer:
self._flush()

Powered by BW's shoe-string budget.