Skip to content

Commit

Permalink
Raise exception on missed gdrive settings file path in config
Browse files Browse the repository at this point in the history
  • Loading branch information
maxhora committed Oct 26, 2019
1 parent 1abb735 commit e1ed0b1
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 108 deletions.
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
include fastentrypoints.py
include LICENSE
include dvc/remote/gdrive/settings.yaml
5 changes: 0 additions & 5 deletions dvc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,7 @@ class Config(object): # pylint: disable=too-many-instance-attributes
Optional(SECTION_GCP_PROJECTNAME): str,
}

SECTION_GDRIVE = "gdrive"
SECTION_GDRIVE_CREDENTIALPATH = CREDENTIALPATH
SECTION_GDRIVE_SCHEMA = {
Optional(SECTION_GDRIVE_CREDENTIALPATH): str,
}

# backward compatibility
SECTION_LOCAL = "local"
Expand Down Expand Up @@ -320,7 +316,6 @@ class Config(object): # pylint: disable=too-many-instance-attributes
Optional(SECTION_AWS, default={}): SECTION_AWS_SCHEMA,
Optional(SECTION_GCP, default={}): SECTION_GCP_SCHEMA,
Optional(SECTION_LOCAL, default={}): SECTION_LOCAL_SCHEMA,
Optional(SECTION_GDRIVE, default={}): SECTION_GDRIVE_SCHEMA,
}

def __init__(self, dvc_dir=None, validate=True):
Expand Down
63 changes: 14 additions & 49 deletions dvc/remote/gdrive/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
from dvc.path_info import CloudURLInfo
from dvc.remote.base import RemoteBASE
from dvc.config import Config
from dvc.remote.gdrive.utils import shared_token_warning
from dvc.exceptions import DvcException
from dvc.remote.gdrive.pydrive import (
RequestListFile,
RequestListFilePaginated,
RequestUploadFile,
RequestDownloadFile,
)
from dvc.remote.gdrive.utils import FOLDER_MIME_TYPE


class GDriveURLInfo(CloudURLInfo):
Expand All @@ -32,25 +32,21 @@ class RemoteGDrive(RemoteBASE):
path_cls = GDriveURLInfo
REGEX = r"^gdrive://.*$"
REQUIRES = {"pydrive": "pydrive"}
PARAM_CHECKSUM = "md5Checksum"
DEFAULT_GOOGLE_AUTH_SETTINGS_PATH = os.path.join(
os.path.dirname(__file__), "settings.yaml"
)
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"

def __init__(self, repo, config):
super(RemoteGDrive, self).__init__(repo, config)
self.no_traverse = False
self.cached_dirs = {}
self.cached_ids = {}
if Config.SECTION_GDRIVE_CREDENTIALPATH not in config:
shared_token_warning()
raise DvcException(
"Google Drive settings file path is missed from config. "
"Learn more at https://dvc.org/doc."
)
self.gdrive_credentials_path = config.get(
Config.SECTION_GDRIVE_CREDENTIALPATH,
self.DEFAULT_GOOGLE_AUTH_SETTINGS_PATH,
Config.SECTION_GDRIVE_CREDENTIALPATH
)
core = config.get(Config.SECTION_GDRIVE, {})
print("Credentials path: {} , {}".format(self.gdrive_credentials_path, core))
self.path_info = self.path_cls(config[Config.SECTION_REMOTE_URL])
print("!!!!!!!!!!!!!!!!! Init")
self.init_drive()

def init_drive(self):
Expand Down Expand Up @@ -78,34 +74,23 @@ def list_drive_item(self, query):
page_list = self.execute_request(list_request)

def cache_root_dirs(self):
print("Gather cache...........................................")
self.cached_dirs = {}
self.cached_ids = {}
for dir1 in self.list_drive_item(
"'{}' in parents and trashed=false".format(self.root_id)
):
self.cached_dirs.setdefault(dir1["title"], []).append(dir1["id"])
print("Cashing {} with id {}".format(dir1["title"], dir1["id"]))
self.cached_ids[dir1["id"]] = dir1["title"]
print("Cached root dir content: {}".format(self.cached_dirs))

@cached_property
def drive(self):
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import logging

if os.getenv("PYDRIVE_USER_CREDENTIALS_DATA"):
with open("credentials.json", "w") as credentials_file:
credentials_file.write(
os.getenv("PYDRIVE_USER_CREDENTIALS_DATA")
)

# Supress import error on GoogleAuth warning
logging.getLogger("googleapiclient.discovery_cache").setLevel(
logging.ERROR
)

GoogleAuth.DEFAULT_SETTINGS["client_config_backend"] = "settings"
gauth = GoogleAuth(settings_file=self.gdrive_credentials_path)
gauth.CommandLineAuth()
Expand All @@ -118,32 +103,25 @@ def create_drive_item(self, parent_id, title):
"drive": self.drive,
"title": title,
"parent_id": parent_id,
"mime_type": self.FOLDER_MIME_TYPE,
"mime_type": FOLDER_MIME_TYPE,
}
)
result = self.execute_request(upload_request)
return result

def get_drive_item(self, name, parents_ids):
print('get_drive_item for parents_ids {}'.format(parents_ids))
query = " or ".join(
"'{}' in parents".format(parent_id)
for parent_id in parents_ids
"'{}' in parents".format(parent_id) for parent_id in parents_ids
)
if not query:
return
query += " and trashed=false and title='{}'".format(name)
print("get_drive_item query: {}".format(query))

list_request = RequestListFile(
self.drive,
query,
)
list_request = RequestListFile(self.drive, query)
item_list = self.execute_request(list_request)
return next(iter(item_list), None)

def resolve_remote_file(self, parents_ids, path_parts, create):
print("resolve remote file for {}".format(path_parts))
for path_part in path_parts:
item = self.get_drive_item(path_part, parents_ids)
if not item and create:
Expand All @@ -157,9 +135,8 @@ def subtract_root_path(self, parts):
parents_ids = [self.path_info.netloc]
if not hasattr(self, "root_id"):
return parts, parents_ids

for part in self.path_info.path.split("/"):
print("subtract_root_path compare {} with {}".format(part, parts[0]))
if parts and parts[0] == part:
parts.pop(0)
parents_ids = [self.root_id]
Expand All @@ -170,21 +147,18 @@ def subtract_root_path(self, parts):
def get_path_id_from_cache(self, path_info):
files_ids = []
parts, parents_ids = self.subtract_root_path(path_info.path.split("/"))
print("Resolved parts: {}".format(parts))
if (
path_info != self.path_info
and parts
and (parts[0] in self.cached_dirs)
):
parents_ids = self.cached_dirs[parts[0]]
print('Parents_ids resolved from cash for {} as {}'.format(parts[0], self.cached_dirs[parts[0]]))
files_ids = self.cached_dirs[parts[0]]
parts.pop(0)

return files_ids, parents_ids, parts

def get_path_id(self, path_info, create=False):
print("get_path_id for path {}".format(path_info))
files_ids, parents_ids, parts = self.get_path_id_from_cache(path_info)

if not parts and files_ids:
Expand All @@ -200,7 +174,6 @@ def _upload(self, from_file, to_info, name, no_progress_bar):
dirname = to_info.parent
if dirname:
parent_id = self.get_path_id(dirname, True)
print("parent_id on upload resolved as: {}".format(parent_id))
else:
parent_id = to_info.netloc

Expand Down Expand Up @@ -237,7 +210,7 @@ def list_cache_paths(self):
yield posixpath.join(prefix, path)

def list_file_path(self, drive_file):
if drive_file["mimeType"] == self.FOLDER_MIME_TYPE:
if drive_file["mimeType"] == FOLDER_MIME_TYPE:
for i in self.list_path(drive_file["id"]):
yield posixpath.join(drive_file["title"], i)
else:
Expand All @@ -251,22 +224,14 @@ def list_path(self, parent_id):
yield path

def all(self):
print('All')
query = " or ".join(
"'{}' in parents".format(dir_id)
for dir_id in self.cached_ids
"'{}' in parents".format(dir_id) for dir_id in self.cached_ids
)
if not query:
return
query += " and trashed=false"
print("All query: {}".format(query))
counter = 0
for file1 in self.list_drive_item(query):
parent_id = file1["parents"][0]["id"]
print(self.cached_ids[parent_id])
print(file1["title"])
counter += 1
print("{}".format(counter))
path = posixpath.join(self.cached_ids[parent_id], file1["title"])
try:
yield self.path_to_checksum(path)
Expand Down
4 changes: 2 additions & 2 deletions dvc/remote/gdrive/pydrive.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

from dvc.remote.gdrive.utils import TrackFileReadProgress
from dvc.remote.gdrive.utils import TrackFileReadProgress, FOLDER_MIME_TYPE


class RequestBASE:
Expand Down Expand Up @@ -64,7 +64,7 @@ def execute(self):
"mimeType": self.mime_type,
}
)
if self.mime_type == "application/vnd.google-apps.folder":
if self.mime_type == FOLDER_MIME_TYPE:
item.Upload()
else:
self.upload(item)
Expand Down
14 changes: 0 additions & 14 deletions dvc/remote/gdrive/settings.yaml

This file was deleted.

38 changes: 1 addition & 37 deletions dvc/remote/gdrive/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
import functools
import os
import threading
import logging

from dvc.progress import Tqdm


LOGGER = logging.getLogger(__name__)


MIME_GOOGLE_APPS_FOLDER = "application/vnd.google-apps.folder"
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"


class TrackFileReadProgress(object):
Expand All @@ -36,33 +30,3 @@ def close(self):

def __getattr__(self, attr):
return getattr(self.fobj, attr)


def only_once(func):
lock = threading.Lock()
locks = {}
results = {}

@functools.wraps(func)
def wrapped(*args, **kwargs):
key = (args, tuple(kwargs.items()))
# could do with just setdefault, but it would require
# create/delete a "default" Lock() object for each call, so it
# is better to lock a single one for a short time
with lock:
if key not in locks:
locks[key] = threading.Lock()
with locks[key]:
if key not in results:
results[key] = func(*args, **kwargs)
return results[key]

return wrapped


@only_once
def shared_token_warning():
LOGGER.warning(
"Warning: a shared GoogleAPI token is in use. "
"Please create your own token."
)

0 comments on commit e1ed0b1

Please sign in to comment.