DynamoDB in examples, Example 4.1: Search
import datetime from uuid import uuid4 from ddb_table import DDBTable, DDBUUIDField, DDBStrField DDB_LOCAL_URL = 'http://localhost:8010' class DDBTweet(DDBTable): TABLE_NAME = 'tweet' KEY_SCHEMA = [{ 'AttributeName': 'tweet_id', 'KeyType': 'HASH', }] PROVISIONED_THROUGHPUT = { 'ReadCapacityUnits': 1, 'WriteCapacityUnits': 1 } GLOBAL_SECONDARY_INDEXES = [{ 'IndexName': 'by_text', 'KeySchema': [{ 'AttributeName': 'upper_first', 'KeyType': 'HASH' }, { 'AttributeName': 'upper_rest', 'KeyType': 'RANGE' } ], 'Projection': { 'ProjectionType': 'ALL', }, 'ProvisionedThroughput': { 'ReadCapacityUnits': 1, 'WriteCapacityUnits': 1, } }] FIELDS = { 'tweet_id': DDBUUIDField, 'text': DDBStrField, 'created': DDBStrField, 'upper_first': DDBStrField, 'upper_rest': DDBStrField, } def _get_endpoint_url(self): return DDB_LOCAL_URL def create(self, text): tweet_id = uuid4() created = str(datetime.datetime.utcnow()) upper = text.upper() data = { 'tweet_id': tweet_id, 'text': text, 'created': created, 'upper_first': upper[:2], 'upper_rest': upper[2:], } response = self._dynamodb(operation='PutItem').call( TableName=self._get_table_name(), Item=self.encode_item(data=data)) return data def search(self, text, last=None, limit=10): if not text: return ([], None) upper = text.upper() ddb_query = self._dynamodb(operation='Query') kwargs = { 'TableName': self._get_table_name(), 'IndexName': 'by_text', } if len(text) > 1: kwargs.update({ 'KeyConditions': { 'upper_first': { 'AttributeValueList': [{ 'S': upper[:2], }], 'ComparisonOperator': 'EQ' }, 'upper_rest': { 'AttributeValueList': [{ 'S': upper[2:], }], 'ComparisonOperator': 'BEGINS_WITH' } } }) else: kwargs.update({ 'KeyConditions': { 'upper_first': { 'AttributeValueList': [{ 'S': upper[:2], }], 'ComparisonOperator': 'EQ' }, } }) if last: kwargs['ExclusiveStartKey'] = last result = ddb_query.call(**kwargs) return ( [self.decode_item(item) for item in result.get('Items')], result.get('LastEvaluatedKey')) if __name__ == '__main__': ddb_tweet = DDBTweet() ddb_tweet.create_table() for text in ['text1', 'text2', 'text3', 'Text4', 'Another text']: ddb_tweet.create(text=text) print(ddb_tweet.search(text='t')) print(ddb_tweet.search(text='text4')) print(ddb_tweet.search(text='not found')) # ([{'upper_first': 'TE', 'upper_rest': 'XT1', 'tweet_id': '5aac3887-3da4-41c3-b158-4d9624248e46', 'text': 'text1', 'created': '2015-05-30 13:43:01.174688'}, {'upper_first': 'TE', 'upper_rest': 'XT2', 'tweet_id': '95b8330a-0d56-41b8-9389-a8ae4fd27d70', 'text': 'text2', 'created': '2015-05-30 13:43:01.265926'}, {'upper_first': 'TE', 'upper_rest': 'XT3', 'tweet_id': 'd079d36b-e902-4f0e-91de-03b285756d27', 'text': 'text3', 'created': '2015-05-30 13:43:01.290698'}, {'upper_first': 'TE', 'upper_rest': 'XT4', 'tweet_id': '67e6c96b-f828-42dc-89b9-770f309e920e', 'text': 'Text4', 'created': '2015-05-30 13:43:01.314380'}], None) # ([{'upper_first': 'TE', 'upper_rest': 'XT4', 'tweet_id': '67e6c96b-f828-42dc-89b9-770f309e920e', 'text': 'Text4', 'created': '2015-05-30 13:43:01.314380'}], None) # ([], None)
Here I want to turn your attention to two points:
- use additional text field with uppercase or lowercase content, it allows to search case insensitive
- hash key == first two letters in text, it allows to spread data and load between nodes in DynamoDB cluster
This example is pretty useless for implemetation search feature in real projects, use search engines like Amazon CloudSearch, Elasticsearch, Apache Solr etc. instead.
But in some cases this functionality may be enough (suggest hash tags for example). And it is pretty fast and scalable.
Licensed under CC BY-SA 3.0