Source code for exporters.bypasses.s3_to_azure_blob_bypass

from exporters.default_retries import retry_long
from .base_s3_bypass import BaseS3Bypass
import re

S3_URL_EXPIRES_IN = 1800  # half an hour should be enough


[docs]class S3AzureBlobBypass(BaseS3Bypass): """ Bypass executed by default when data source is an S3 bucket and data destination is an Azure blob container. It should be transparent to user. Conditions are: - S3Reader and AzureBlobWriter are used on configuration. - No filter modules are set up. - No transform module is set up. - No grouper module is set up. - AzureBlobWriter has not a items_limit set in configuration. - AzureBlobWriter has default items_per_buffer_write and size_per_buffer_write per default. - AzureBlobWriter has default write_buffer. """ def __init__(self, config, metadata): super(S3AzureBlobBypass, self).__init__(config, metadata) self.container = self.read_option('writer', 'container') from azure.storage.blob import BlockBlobService self.azure_service = BlockBlobService( self.read_option('writer', 'account_name'), self.read_option('writer', 'account_key')) @classmethod
[docs] def meets_conditions(cls, config): if not config.writer_options['name'].endswith('AzureBlobWriter'): cls._log_skip_reason('Wrong reader configured') return False return super(S3AzureBlobBypass, cls).meets_conditions(config)
@retry_long def _copy_s3_key(self, key): blob_name = key.name.split('/')[-1] url = key.generate_url(S3_URL_EXPIRES_IN) # Convert the https://<bucket>.s3.aws.com/<path> url format to # https://s3.aws.com/<bucket>/<path> Since the first one gives # certificate errors if there are dots in the bucket name url = re.sub(r'^https://([^/]+)\.s3\.amazonaws\.com/', r'https://s3.amazonaws.com/\1/', url) self.azure_service.copy_blob( self.container, blob_name, url, timeout=S3_URL_EXPIRES_IN, )