From 0c4fac7fc2b68f436df21a47e4c8ac1e52e822ae Mon Sep 17 00:00:00 2001 From: Emmanuel Odeke Date: Fri, 5 Dec 2014 17:28:39 -0700 Subject: [PATCH] doc export via csv on CLI --- README.md | 28 ++++++++-- changes.go | 2 +- cmd/drive/main.go | 15 +++--- commands.go | 6 +-- pull.go | 128 ++++++++++++++++++++++++++++++++-------------- remote.go | 54 ++++++++++--------- 6 files changed, 153 insertions(+), 80 deletions(-) diff --git a/README.md b/README.md index d74a4df3..a905193f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Build Status](https://travis-ci.org/rakyll/drive.png?branch=master)](https://travis-ci.org/rakyll/drive) -`drive` is a tiny program to pull or push [Google Drive](https://drive.google.com) files. You need go1.2 installed in order to build the program. +`drive` is a tiny program to pull or push [Google Drive](https://drive.google.com) files. You need at least go1.2 installed in order to build the program. ## Installation @@ -12,7 +12,11 @@ Use `drive help` for further reference. $ drive init [path] $ drive pull [-r -no-prompt path] # pulls from remote - $ drive pull [-r -no-prompt -export path] # pulls from remote and exports Docs + Sheets to one of its export formats. + $ drive pull [-r -no-prompt -export ext1,ext2,ext3 path] # pulls from remote and exports Docs + Sheets to one of its export formats. + e.g: + $ drive pull [-r -no-prompt -export pdf,docx,rtf,html ReportII.txt] # pull ReportII.txt from remote and + export it to pdf, docx, rtf and html + $ drive push [-r -no-prompt path] # pushes to the remote $ drive push [-r -hidden path] # pushes also hidden directories and paths to the remote $ drive diff [path] # outputs a diff of local and remote @@ -50,13 +54,27 @@ Background sync is not just hard, it's stupid. My technical and philosophical ra * Possibility to support multiple accounts. Pull from or push to multiple Google Drive remotes. Possibility to support multiple backends. Why not to push to Dropbox or Box as well? +## Notes: +* Google Docs cannot be directly downloaded but only +exported to different forms e.g docx, xlsx, csv etc. +When doing a pull remember to include option `-export ext1,ext2,ext3` +where ext1, ext2, ... could be: + * docx + * jpeg + * html + * odt + * rtf + * pdf + * png + * pptx + * svg + * txt + * xlsx + ## Known issues * Probably, it doesn't work on Windows. * Google Drive allows a directory to contain files/directories with the same name. Client doesn't handle these cases yet. We don't recommend you to use `drive` if you have such files/directories to avoid data loss. * Racing conditions occur if remote is being modified while we're trying to update the file. Google Drive provides resource versioning with ETags, use Etags to avoid racy cases. -* Google Docs + Sheets + Presentations data cannot be downloaded raw but only -as exported to different forms e.g docx, xlsx, csv etc hence doing a pull of -these types will result in a exported document. ## License Copyright 2013 Google Inc. All Rights Reserved. diff --git a/changes.go b/changes.go index d435b301..5bcb70c0 100644 --- a/changes.go +++ b/changes.go @@ -39,7 +39,7 @@ func (g *Commands) resolveChangeListRecv( if isPush { // Handle the case of doc files for which we don't have a direct download // url but have exportable links. These files should not be clobbered on the cloud - if IsGoogleDoc(r) { + if hasExportLinks(r) { return cl, nil } diff --git a/cmd/drive/main.go b/cmd/drive/main.go index 2f577f9c..34eba3b8 100644 --- a/cmd/drive/main.go +++ b/cmd/drive/main.go @@ -20,6 +20,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "github.com/rakyll/command" "github.com/rakyll/drive" @@ -56,25 +57,27 @@ func (cmd *initCmd) Run(args []string) { } type pullCmd struct { - isRecursive *bool - isNoPrompt *bool - exportOnBackup *bool + export *string + isRecursive *bool + isNoPrompt *bool } func (cmd *pullCmd) Flags(fs *flag.FlagSet) *flag.FlagSet { + cmd.export = fs.String( + "export", "", "comma separated list of formats to export your docs + sheets files") cmd.isRecursive = fs.Bool("r", true, "performs the pull action recursively") cmd.isNoPrompt = fs.Bool("no-prompt", false, "shows no prompt before applying the pull action") - cmd.exportOnBackup = fs.Bool("export", false, "export your docs + sheets files") return fs } func (cmd *pullCmd) Run(args []string) { context, path := discoverContext(args) + exports := strings.Split(*cmd.export, ",") exitWithError(drive.New(context, &drive.Options{ Path: path, - IsRecursive: *cmd.isRecursive, + Exports: exports, IsNoPrompt: *cmd.isNoPrompt, - ExportOnBackup: *cmd.exportOnBackup, + IsRecursive: *cmd.isRecursive, }).Pull()) } diff --git a/commands.go b/commands.go index ad74cc7e..5d4c1337 100644 --- a/commands.go +++ b/commands.go @@ -33,9 +33,9 @@ type Options struct { IsForce bool // Hidden discovers hidden paths if set Hidden bool - // ExportOnBackup when set allows the exporting of Google Docs + Sheets to a - // downloadable format e.g *.presentation to pptx. - ExportOnBackup bool + // Exports contains the formats to export your Google Docs + Sheets to + // e.g ["csv" "txt"] + Exports []string } type Commands struct { diff --git a/pull.go b/pull.go index d28e62ed..cb108b5b 100644 --- a/pull.go +++ b/pull.go @@ -18,8 +18,9 @@ import ( "fmt" "io" "os" - "strings" + "path" "path/filepath" + "strings" "sync" ) @@ -27,7 +28,7 @@ const ( maxNumOfConcPullTasks = 4 ) -// Pull from remote if remote path exists and in a gd context. If path is a +// Pull from remote if remote path exists and in a god context. If path is a // directory, it recursively pulls from the remote if there are remote changes. // It doesn't check if there are remote changes if isForce is set. func (g *Commands) Pull() (err error) { @@ -48,12 +49,12 @@ func (g *Commands) Pull() (err error) { } if ok := printChangeList(cl, g.opts.IsNoPrompt); ok { - return g.playPullChangeList(cl, g.opts.ExportOnBackup) + return g.playPullChangeList(cl, g.opts.Exports) } return } -func (g *Commands) playPullChangeList(cl []*Change, exportOnBackup bool) (err error) { +func (g *Commands) playPullChangeList(cl []*Change, exports []string) (err error) { var next []*Change g.taskStart(len(cl)) @@ -73,9 +74,9 @@ func (g *Commands) playPullChangeList(cl []*Change, exportOnBackup bool) (err er for _, c := range next { switch c.Op() { case OpMod: - go g.localMod(&wg, c, exportOnBackup) + go g.localMod(&wg, c, exports) case OpAdd: - go g.localAdd(&wg, c, exportOnBackup) + go g.localAdd(&wg, c, exports) case OpDelete: go g.localDelete(&wg, c) } @@ -87,21 +88,21 @@ func (g *Commands) playPullChangeList(cl []*Change, exportOnBackup bool) (err er return err } -func (g *Commands) localMod(wg *sync.WaitGroup, change *Change, exportOnBackup bool) (err error) { +func (g *Commands) localMod(wg *sync.WaitGroup, change *Change, exports []string) (err error) { defer g.taskDone() defer wg.Done() destAbsPath := g.context.AbsPathOf(change.Path) if change.Src.BlobAt != "" || change.Src.ExportLinks != nil { // download and replace - if err = g.download(change, exportOnBackup); err != nil { + if err = g.download(change, exports); err != nil { return } } return os.Chtimes(destAbsPath, change.Src.ModTime, change.Src.ModTime) } -func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change, exportOnBackup bool) (err error) { +func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change, exports []string) (err error) { defer g.taskDone() defer wg.Done() @@ -113,7 +114,7 @@ func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change, exportOnBackup b } if change.Src.BlobAt != "" || change.Src.ExportLinks != nil { // download and create - if err = g.download(change, exportOnBackup); err != nil { + if err = g.download(change, exports); err != nil { return } } @@ -137,40 +138,93 @@ func touchFile(path string) (err error) { return } -func (g *Commands) download(change *Change, exportOnBackup bool) (err error) { - exportUrl := "" - baseName := change.Path +func (g *Commands) export(f *File, destAbsPath string, exports []string) (manifest []string, err error) { + if len(exports) < 1 || f == nil { + return + } - // If BlobAt is not set, we are most likely dealing with - // Document/SpreadSheet/Image. In this case we'll use the target - // exportable type since we cannot directly download the raw data. - // We also need to pay attention and add the exported extension - // to avoid overriding the original file on re-syncing. - if len(change.Src.BlobAt) < 1 && exportOnBackup && IsGoogleDoc(change.Src) { - var ok bool - var mimeKeyExtList[]string + dirPath := strings.Join([]string{destAbsPath, "exports"}, "_") + if err = os.MkdirAll(dirPath, os.ModeDir|0755); err != nil { + return + } - mimeKeyExtList, ok = docExportsMap[change.Src.MimeType] + var ok bool + var mimeType, exportURL string + + waitables := map[string]string{} + for _, ext := range exports { + mimeType, ok = docExportsMap[ext] + if !ok { + continue + } + exportURL, ok = f.ExportLinks[mimeType] if !ok { - mimeKeyExtList = []string{"text/plain", "txt"} + continue } + exportPath := strings.Join([]string{filepath.Base(f.Name), ext}, ".") + pathName := path.Join(dirPath, exportPath) + waitables[pathName] = exportURL + } - // We need to touch an empty file for the - // non-downloadable version to avoid an erasal - // on later push. If there is a name conflict / data race, - // the original file won't be touched. - emptyFilepath := g.context.AbsPathOf(baseName) - err = touchFile(emptyFilepath) - - // TODO: @odeke-em / @rakyll, if user selects all desired formats, - // should we be be downloading every single one of them? - exportUrl = change.Src.ExportLinks[mimeKeyExtList[0]] - fmt.Print("Exported ", baseName) - baseName = strings.Join([]string{baseName, mimeKeyExtList[1]}, ".") - fmt.Println(" to: ", baseName) + var wg sync.WaitGroup + wg.Add(len(waitables)) + + for pathName, exportURL := range waitables { + go func(wg *sync.WaitGroup, dest, id, url string) error { + var fo *os.File + var blob io.ReadCloser + var fErr, dlErr error + + defer func() { + if blob != nil { + blob.Close() + } + if fo != nil { + fo.Close() + } + wg.Done() + }() + + fo, fErr = os.Create(dest) + if fErr != nil { + return fErr + } + + blob, dlErr = g.rem.Download(id, url) + if dlErr != nil { + return dlErr + } + _, err = io.Copy(fo, blob) + if err == nil { + manifest = append(manifest, dest) + } + return err + }(&wg, pathName, f.Id, exportURL) } + wg.Wait() + return +} +func (g *Commands) download(change *Change, exports []string) (err error) { + baseName := change.Path destAbsPath := g.context.AbsPathOf(baseName) + + if hasExportLinks(change.Src) { + // We need to touch the empty file to ensure + // consistency during a push. + emptyFilepath := g.context.AbsPathOf(baseName) + if err = touchFile(emptyFilepath); err != nil { + return err + } + manifest, exportErr := g.export(change.Src, destAbsPath, exports) + if exportErr == nil { + for i, exportPath := range manifest { + fmt.Printf("# %d: %s\n", i+1, exportPath) + } + } + return exportErr + } + var fo *os.File fo, err = os.Create(destAbsPath) if err != nil { @@ -190,7 +244,7 @@ func (g *Commands) download(change *Change, exportOnBackup bool) (err error) { blob.Close() } }() - blob, err = g.rem.Download(change.Src.Id, exportUrl) + blob, err = g.rem.Download(change.Src.Id, "") if err != nil { return err } diff --git a/remote.go b/remote.go index fb3f2dd9..416e30bd 100644 --- a/remote.go +++ b/remote.go @@ -46,7 +46,26 @@ var ( ErrPathNotExists = errors.New("remote path doesn't exist") ) -var docExportsMap = *newDocExportsMap() +var docExportsMap = map[string]string{ + "csv": "text/csv", + "html": "text/html", + "txt": "text/plain", + + "gif": "image/gif", + "png": "image/png", + "svg": "image/svg+xml", + "jpeg": "image/jpeg", + + "odt": "application/vnd.oasis.opendocument.text", + "rtf": "application/rtf", + "pdf": "application/pdf", + + "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "pptx": "application/vnd.openxmlformats-officedocument.wordprocessingml.presentation", + + "xls": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", +} type Remote struct { transport *oauth.Transport @@ -59,16 +78,11 @@ func NewRemoteContext(context *config.Context) *Remote { return &Remote{service: service, transport: transport} } -func IsGoogleDoc(f *File) bool { +func hasExportLinks(f *File) bool { if f == nil || f.IsDir { - return false; - } - - _, ok := docExportsMap[f.MimeType] - if !ok { - return f.BlobAt == ""; + return false } - return true; + return len(f.ExportLinks) >= 1 } func RetrieveRefreshToken(context *config.Context) (string, error) { @@ -134,12 +148,12 @@ func (r *Remote) Publish(id string) (string, error) { return "https://googledrive.com/host/" + id, nil } -func (r *Remote) Download(id string, exportUrl string) (io.ReadCloser, error) { +func (r *Remote) Download(id string, exportURL string) (io.ReadCloser, error) { var url string - if len(exportUrl) < 1 { + if len(exportURL) < 1 { url = "https://googledrive.com/host/" + id } else { - url = exportUrl + url = exportURL } resp, err := r.transport.Client().Get(url) if err != nil || resp.StatusCode < 200 || resp.StatusCode > 299 { @@ -217,19 +231,3 @@ func newTransport(context *config.Context) *oauth.Transport { }, } } - -func newDocExportsMap() *map[string][]string { - return &map[string][]string { - "text/plain": []string{"text/plain", "txt",}, - "application/vnd.google-apps.drawing": []string{"image/svg+xml", "svg+xml",}, - "application/vnd.google-apps.spreadsheet": []string{ - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx", - }, - "application/vnd.google-apps.document": []string{ - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx", - }, - "application/vnd.google-apps.presentation": []string{ - "application/vnd.openxmlformats-officedocument.presentationml.presentation", "pptx", - }, - } -}