From e36608303f7b3c565e08f5e38a65f68c765cd0cf Mon Sep 17 00:00:00 2001 From: Emmanuel Odeke Date: Thu, 13 Nov 2014 10:49:53 -0700 Subject: [PATCH] Fix bug with missing downloadURL for docs/sheets Google Docs + Sheets do not populate 'DownloadUrl', but instead provide an map of exportLinks ie csv, docx, pptx, txt etc. Simple heuristic to determine the url to export as well as create the target extension to avoid clobbering of the original file on a re-sync. --- README.md | 3 ++ changes.go | 6 ++++ cmd/drive/main.go | 7 ++-- commands.go | 3 ++ pull.go | 91 +++++++++++++++++++++++++++++++++++++++-------- remote.go | 10 ++++-- types.go | 4 +++ 7 files changed, 106 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 94698312..6538068d 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,9 @@ Background sync is not just hard, it's stupid. My technical and philosophical ra * Probably, it doesn't work on Windows. * Google Drive allows a directory to contain files/directories with the same name. Client doesn't handle these cases yet. We don't recommend you to use `drive` if you have such files/directories to avoid data loss. * Racing conditions occur if remote is being modified while we're trying to update the file. Google Drive provides resource versioning with ETags, use Etags to avoid racy cases. +* Google Docs + Sheets + Presentations data cannot be downloaded raw but only +as exported to different forms e.g docx, xlsx, csv etc hence doing a pull of +these types will result in a exported document. ## License Copyright 2013 Google Inc. All Rights Reserved. diff --git a/changes.go b/changes.go index 11cc4a79..c53755ce 100644 --- a/changes.go +++ b/changes.go @@ -37,6 +37,12 @@ func (g *Commands) resolveChangeListRecv( isPush bool, p string, r *File, l *File) (cl []*Change, err error) { var change *Change if isPush { + // Handle the case of doc files for which we don't have a direct download + // url but have exportable links. These files should not be clobbered on the cloud + if r != nil && !r.IsDir && r.BlobAt == "" { + return cl, nil + } + change = &Change{Path: p, Src: l, Dest: r} } else { change = &Change{Path: p, Src: r, Dest: l} diff --git a/cmd/drive/main.go b/cmd/drive/main.go index 81f6e1b2..2f577f9c 100644 --- a/cmd/drive/main.go +++ b/cmd/drive/main.go @@ -56,13 +56,15 @@ func (cmd *initCmd) Run(args []string) { } type pullCmd struct { - isRecursive *bool - isNoPrompt *bool + isRecursive *bool + isNoPrompt *bool + exportOnBackup *bool } func (cmd *pullCmd) Flags(fs *flag.FlagSet) *flag.FlagSet { cmd.isRecursive = fs.Bool("r", true, "performs the pull action recursively") cmd.isNoPrompt = fs.Bool("no-prompt", false, "shows no prompt before applying the pull action") + cmd.exportOnBackup = fs.Bool("export", false, "export your docs + sheets files") return fs } @@ -72,6 +74,7 @@ func (cmd *pullCmd) Run(args []string) { Path: path, IsRecursive: *cmd.isRecursive, IsNoPrompt: *cmd.isNoPrompt, + ExportOnBackup: *cmd.exportOnBackup, }).Pull()) } diff --git a/commands.go b/commands.go index dbcf73e8..ad74cc7e 100644 --- a/commands.go +++ b/commands.go @@ -33,6 +33,9 @@ type Options struct { IsForce bool // Hidden discovers hidden paths if set Hidden bool + // ExportOnBackup when set allows the exporting of Google Docs + Sheets to a + // downloadable format e.g *.presentation to pptx. + ExportOnBackup bool } type Commands struct { diff --git a/pull.go b/pull.go index 2785d2b1..1fc6fea4 100644 --- a/pull.go +++ b/pull.go @@ -18,6 +18,7 @@ import ( "fmt" "io" "os" + "strings" "path/filepath" "sync" ) @@ -26,7 +27,23 @@ const ( maxNumOfConcPullTasks = 4 ) -// Pull from remote if remote path exists and in a god context. If path is a +func docExportsMap() *map[string][]string { + return &map[string][]string { + "text/plain": []string{"text/plain", "txt",}, + "application/vnd.google-apps.drawing": []string{"image/svg+xml", "svg+xml",}, + "application/vnd.google-apps.spreadsheet": []string{ + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx", + }, + "application/vnd.google-apps.document": []string{ + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx", + }, + "application/vnd.google-apps.presentation": []string{ + "application/vnd.openxmlformats-officedocument.presentationml.presentation", "pptx", + }, + } +} + +// Pull from remote if remote path exists and in a gd context. If path is a // directory, it recursively pulls from the remote if there are remote changes. // It doesn't check if there are remote changes if isForce is set. func (g *Commands) Pull() (err error) { @@ -47,12 +64,12 @@ func (g *Commands) Pull() (err error) { } if ok := printChangeList(cl, g.opts.IsNoPrompt); ok { - return g.playPullChangeList(cl) + return g.playPullChangeList(cl, g.opts.ExportOnBackup) } return } -func (g *Commands) playPullChangeList(cl []*Change) (err error) { +func (g *Commands) playPullChangeList(cl []*Change, exportOnBackup bool) (err error) { var next []*Change g.taskStart(len(cl)) @@ -72,9 +89,9 @@ func (g *Commands) playPullChangeList(cl []*Change) (err error) { for _, c := range next { switch c.Op() { case OpMod: - go g.localMod(&wg, c) + go g.localMod(&wg, c, exportOnBackup) case OpAdd: - go g.localAdd(&wg, c) + go g.localAdd(&wg, c, exportOnBackup) case OpDelete: go g.localDelete(&wg, c) } @@ -86,20 +103,22 @@ func (g *Commands) playPullChangeList(cl []*Change) (err error) { return err } -func (g *Commands) localMod(wg *sync.WaitGroup, change *Change) (err error) { +func (g *Commands) localMod(wg *sync.WaitGroup, change *Change, exportOnBackup bool) (err error) { defer g.taskDone() defer wg.Done() destAbsPath := g.context.AbsPathOf(change.Path) - if change.Src.BlobAt != "" { + + if change.Src.BlobAt != "" || change.Src.ExportLinks != nil { // download and replace - if err = g.download(change); err != nil { + if err = g.download(change, exportOnBackup); err != nil { return } } return os.Chtimes(destAbsPath, change.Src.ModTime, change.Src.ModTime) } -func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change) (err error) { +func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change, exportOnBackup bool) (err error) { + defer g.taskDone() defer wg.Done() destAbsPath := g.context.AbsPathOf(change.Path) @@ -108,9 +127,9 @@ func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change) (err error) { if change.Src.IsDir { return os.Mkdir(destAbsPath, os.ModeDir|0755) } - if change.Src.BlobAt != "" { + if change.Src.BlobAt != "" || change.Src.ExportLinks != nil { // download and create - if err = g.download(change); err != nil { + if err = g.download(change, exportOnBackup); err != nil { return } } @@ -123,8 +142,52 @@ func (g *Commands) localDelete(wg *sync.WaitGroup, change *Change) (err error) { return os.RemoveAll(change.Dest.BlobAt) } -func (g *Commands) download(change *Change) (err error) { - destAbsPath := g.context.AbsPathOf(change.Path) +func touchFile(path string) (err error) { + var ef *os.File + defer func() { + if err != nil && ef != nil { + ef.Close() + } + }() + ef, err = os.Create(path) + return +} + +func (g *Commands) download(change *Change, exportOnBackup bool) (err error) { + exportUrl := "" + baseName := change.Path + + // If BlobAt is not set, we are most likely dealing with + // Document/SpreadSheet/Image. In this case we'll use the target + // exportable type since we cannot directly download the raw data. + // We also need to pay attention and add the exported extension + // to avoid overriding the original file on re-syncing. + if len(change.Src.BlobAt) < 1 && exportOnBackup { + var ok bool + var mimeKeyExtList[]string + + exportsMap := *docExportsMap() + mimeKeyExtList, ok = exportsMap[change.Src.MimeType] + if !ok { + mimeKeyExtList = []string{"text/plain", "txt"} + } + + // We need to touch an empty file for the + // non-downloadable version to avoid an erasal + // on later push. If there is a name conflict / data race, + // the original file won't be touched. + emptyFilepath := g.context.AbsPathOf(baseName) + err = touchFile(emptyFilepath) + + // TODO: @odeke-em / @rakyll, if user selects all desired formats, + // should we be be downloading every single one of them? + exportUrl = change.Src.ExportLinks[mimeKeyExtList[0]] + fmt.Print("Exported ", baseName) + baseName = strings.Join([]string{baseName, mimeKeyExtList[1]}, ".") + fmt.Println(" to: ", baseName) + } + + destAbsPath := g.context.AbsPathOf(baseName) var fo *os.File fo, err = os.Create(destAbsPath) if err != nil { @@ -144,7 +207,7 @@ func (g *Commands) download(change *Change) (err error) { blob.Close() } }() - blob, err = g.rem.Download(change.Src.Id) + blob, err = g.rem.Download(change.Src.Id, exportUrl) if err != nil { return err } diff --git a/remote.go b/remote.go index 8b4d6ce9..4cdfec0e 100644 --- a/remote.go +++ b/remote.go @@ -120,8 +120,14 @@ func (r *Remote) Publish(id string) (string, error) { return "https://googledrive.com/host/" + id, nil } -func (r *Remote) Download(id string) (io.ReadCloser, error) { - resp, err := r.transport.Client().Get("https://googledrive.com/host/" + id) +func (r *Remote) Download(id string, exportUrl string) (io.ReadCloser, error) { + var url string + if len(exportUrl) < 1 { + url = "https://googledrive.com/host/" + id + } else { + url = exportUrl + } + resp, err := r.transport.Client().Get(url) if err != nil || resp.StatusCode < 200 || resp.StatusCode > 299 { return resp.Body, err } diff --git a/types.go b/types.go index 2acad24c..d0e25722 100644 --- a/types.go +++ b/types.go @@ -38,7 +38,9 @@ type File struct { ModTime time.Time Size int64 BlobAt string + MimeType string Md5Checksum string + ExportLinks map[string]string } func NewRemoteFile(f *drive.File) *File { @@ -50,8 +52,10 @@ func NewRemoteFile(f *drive.File) *File { IsDir: f.MimeType == "application/vnd.google-apps.folder", ModTime: mtime, Size: f.FileSize, + MimeType: f.MimeType, BlobAt: f.DownloadUrl, Md5Checksum: f.Md5Checksum, + ExportLinks: f.ExportLinks, } }