Project

General

Profile

Bug #63017 ยป test_multipart.py

Pritha Srivastava, 09/28/2023 10:08 AM

 
import boto3
import botocore
import json
import string
import random
from botocore.client import Config
import logging

logging.basicConfig(filename="boto.log", level=logging.DEBUG)

from botocore.handlers import validate_bucket_name

endpoint = 'http://localhost:8000'

access = '0555b35654ad1656d804'
secret = 'h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q=='

def generate_random(size, part_size=5*1024*1024):
"""
Generate the specified number random data.
(actually each MB is a repetition of the first KB)
"""
chunk = 1024
allowed = string.ascii_letters
for x in range(0, size, part_size):
strpart = ''.join([allowed[random.randint(0, len(allowed) - 1)] for _ in range(chunk)])
s = ''
left = size - x
this_part_size = min(left, part_size)
for y in range(this_part_size // chunk):
s = s + strpart
if this_part_size > len(s):
s = s + strpart[0:this_part_size - len(s)]
yield s
if (x == size):
return

def _multipart_upload(bucket_name, key, size, part_size=5*1024*1024, client=None, content_type=None, metadata=None, resend_parts=[]):
"""
generate a multi-part upload for a random file of specifed size,
if requested, generate a list of the parts
return the upload descriptor
"""

if content_type == None and metadata == None:
response = client.create_multipart_upload(Bucket=bucket_name, Key=key)
else:
response = client.create_multipart_upload(Bucket=bucket_name, Key=key, Metadata=metadata, ContentType=content_type)

upload_id = response['UploadId']
s = ''
parts = []
for i, part in enumerate(generate_random(size, part_size)):
# part_num is necessary because PartNumber for upload_part and in parts must start at 1 and i starts at 0
part_num = i+1
s += part
response = client.upload_part(UploadId=upload_id, Bucket=bucket_name, Key=key, PartNumber=part_num, Body=part)
parts.append({'ETag': response['ETag'].strip('"'), 'PartNumber': part_num})
if i in resend_parts:
client.upload_part(UploadId=upload_id, Bucket=bucket_name, Key=key, PartNumber=part_num, Body=part)

return (upload_id, s, parts)

def _check_content_using_range(client, key, bucket_name, data, step):
response = client.get_object(Bucket=bucket_name, Key=key)
size = response['ContentLength']
#body = _get_body(response)
#assert body == data

for ofs in range(0, size, step):
toread = size - ofs
if toread > step:
toread = step
end = ofs + toread - 1
r = 'bytes={s}-{e}'.format(s=ofs, e=end)
response = client.get_object(Bucket=bucket_name, Key=key, Range=r)
assert response['ContentLength'] == toread
body = _get_body(response)
assert body == data[ofs:end+1]

def _get_body(response):
body = response['Body']
got = body.read()
if type(got) is bytes:
got = got.decode()
return got

bucket_name = 'my-bucket'
client = boto3.client('s3', region_name='us-east-1', endpoint_url=endpoint, use_ssl=False, aws_access_key_id=access, aws_secret_access_key=secret,config=Config(s3={'addressing_style': 'path'}, retries={'max_attempts': 3}))
client.meta.events.unregister('before-parameter-build.s3', validate_bucket_name)

try:
# Create bucket bucket_name
client.create_bucket(Bucket=bucket_name)
print("\r\ncreated_bucket: " + bucket_name)
except botocore.exceptions.ClientError as e:
print (e)

key="mymultipart"
content_type='text/bla'
objlen = 30 * 1024 * 1024
metadata = {'foo': 'bar'}

(upload_id, data, parts) = _multipart_upload(bucket_name=bucket_name, key=key, size=objlen, client=client, content_type=content_type, metadata=metadata)
client.complete_multipart_upload(Bucket=bucket_name, Key=key, UploadId=upload_id, MultipartUpload={'Parts': parts})
# check extra client.complete_multipart_upload
client.complete_multipart_upload(Bucket=bucket_name, Key=key, UploadId=upload_id, MultipartUpload={'Parts': parts})

response = client.list_objects_v2(Bucket=bucket_name, Prefix=key)
assert len(response['Contents']) == 1
assert response['Contents'][0]['Size'] == objlen

response = client.get_object(Bucket=bucket_name, Key=key)
#assert response['ContentType'] == content_type
#assert response['Metadata'] == metadata
#body = _get_body(response)
#assert len(body) == response['ContentLength']
#assert body == data

#_check_content_using_range(client, key, bucket_name, data, 1000000)
#_check_content_using_range(client, key, bucket_name, data, 10000000)
    (1-1/1)