Source code for exporters.writers.gdrive_writer

import json
import os
import shutil
import tempfile
from collections import Counter
from exporters.default_retries import retry_long
from exporters.writers.filebase_base_writer import FilebaseBaseWriter
from exporters.writers.base_writer import InconsistentWriteState


[docs]class GDriveWriter(FilebaseBaseWriter): """ Writes items to Google Drive account. It is a File Based writer, so it has filebase - client_secret (object) JSON object containing client secrets (client-secret.json) file obtained when creating the google drive API key. - credentials (object) JSON object containing credentials, obtained by authenticating the application using the bin/get_gdrive_credentials.py ds script - filebase (str) Path to store the exported files """ supported_options = { 'credentials': {'type': object}, 'client_secret': {'type': object}, } def __init__(self, *args, **kwargs): super(GDriveWriter, self).__init__(*args, **kwargs) from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive gauth = GoogleAuth() files_tmp_path = tempfile.mkdtemp() client_secret_file = os.path.join(files_tmp_path, 'secret.json') with open(client_secret_file, 'w') as f: f.write(json.dumps(self.read_option('client_secret'))) gauth.LoadClientConfigFile(client_secret_file) credentials_file = os.path.join(files_tmp_path, 'credentials.json') with open(credentials_file, 'w') as f: f.write(json.dumps(self.read_option('credentials'))) gauth.LoadCredentialsFile(credentials_file) shutil.rmtree(files_tmp_path) self.drive = GoogleDrive(gauth) self.set_metadata('files_counter', Counter()) self.set_metadata('files_written', [])
[docs] def get_file_suffix(self, path, prefix): """ Gets a valid filename """ parent = self._ensure_folder_path(path) file_list = self.drive.ListFile({ 'q': "'{}' in parents and trashed=false and title contains '{}'".format( parent['id'], prefix)}).GetList() try: number_of_files = len(file_list) except: number_of_files = 0 return '{0:04}'.format(number_of_files)
def _ensure_folder_path(self, filebase_path): """Creates the directory for the path given, recursively creating parent directories when needed""" folders = filebase_path.split('/') parent = {"id": "root"} for folder in folders: file_list = self.drive.ListFile( {'q': "'{}' in parents and trashed=false and title = '{}'".format( parent['id'], folder)}).GetList() if not len(file_list): f = self.drive.CreateFile({'title': folder, 'parents': [parent], 'mimeType': 'application/vnd.google-apps.folder'}) f.Upload() else: parent = {"id": file_list[-1]["id"]} return parent @retry_long
[docs] def write(self, dump_path, group_key=None, file_name=None): if group_key is None: group_key = [] filebase_path, filename = self.create_filebase_name(group_key, file_name=file_name) parent = self._ensure_folder_path(filebase_path) file = self.drive.CreateFile({'title': filename, 'parents': [parent]}) file.SetContentFile(dump_path) file.Upload() self._update_metadata(dump_path, file) self.logger.info('Uploaded file {}'.format(file['title']))
def _update_metadata(self, dump_path, file): buffer_info = self.write_buffer.get_metadata(dump_path) key_info = { 'size': buffer_info['size'], 'remote_size': file['fileSize'], 'hash': buffer_info['file_hash'], 'remote_hash': file['md5Checksum'], 'title': file['title'], } self.get_metadata('files_written').append(key_info) def _check_write_consistency(self): for file_info in self.get_metadata('files_written'): if str(file_info['size']) != str(file_info['remote_size']): msg = 'Unexpected size of file {title}. Expected {size} - got {remote_size}' raise InconsistentWriteState(msg.format(**file_info)) if file_info['hash'] != file_info['remote_hash']: msg = 'Unexpected hash of file {title}. Expected {hash} - got {remote_hash}' raise InconsistentWriteState(msg.format(**file_info))