Skip to content
This repository has been archived by the owner on Sep 3, 2020. It is now read-only.

Commit

Permalink
doc export via csv on CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
Emmanuel Odeke committed Dec 6, 2014
1 parent 2771355 commit 2a327f9
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 74 deletions.
28 changes: 23 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![Build Status](https://travis-ci.org/rakyll/drive.png?branch=master)](https://travis-ci.org/rakyll/drive)

`drive` is a tiny program to pull or push [Google Drive](https://drive.google.com) files. You need go1.2 installed in order to build the program.
`drive` is a tiny program to pull or push [Google Drive](https://drive.google.com) files. You need at least go1.2 installed in order to build the program.

## Installation

Expand All @@ -12,7 +12,11 @@ Use `drive help` for further reference.

$ drive init [path]
$ drive pull [-r -no-prompt path] # pulls from remote
$ drive pull [-r -no-prompt -export path] # pulls from remote and exports Docs + Sheets to one of its export formats.
$ drive pull [-r -no-prompt -export ext1,ext2,ext3 path] # pulls from remote and exports Docs + Sheets to one of its export formats.
e.g:
$ drive pull [-r -no-prompt -export pdf,docx,rtf,html ReportII.txt] # pull ReportII.txt from remote and
export it to pdf, docx, rtf and html
$ drive push [-r -no-prompt path] # pushes to the remote
$ drive push [-r -hidden path] # pushes also hidden directories and paths to the remote
$ drive diff [path] # outputs a diff of local and remote
Expand Down Expand Up @@ -50,13 +54,27 @@ Background sync is not just hard, it's stupid. My technical and philosophical ra

* Possibility to support multiple accounts. Pull from or push to multiple Google Drive remotes. Possibility to support multiple backends. Why not to push to Dropbox or Box as well?

## Notes:
* Google Docs cannot be directly downloaded but only
exported to different forms e.g docx, xlsx, csv etc.
When doing a pull remember to include option `-export ext1,ext2,ext3`
where ext1, ext2, ... could be:
* docx
* jpeg
* html
* odt
* rtf
* pdf
* png
* pptx
* svg
* txt
* xlsx

## Known issues
* Probably, it doesn't work on Windows.
* Google Drive allows a directory to contain files/directories with the same name. Client doesn't handle these cases yet. We don't recommend you to use `drive` if you have such files/directories to avoid data loss.
* Racing conditions occur if remote is being modified while we're trying to update the file. Google Drive provides resource versioning with ETags, use Etags to avoid racy cases.
* Google Docs + Sheets + Presentations data cannot be downloaded raw but only
as exported to different forms e.g docx, xlsx, csv etc hence doing a pull of
these types will result in a exported document.

## License
Copyright 2013 Google Inc. All Rights Reserved.
Expand Down
2 changes: 1 addition & 1 deletion changes.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (g *Commands) resolveChangeListRecv(
if isPush {
// Handle the case of doc files for which we don't have a direct download
// url but have exportable links. These files should not be clobbered on the cloud
if IsGoogleDoc(r) {
if isGoogleDoc(r) {
return cl, nil
}

Expand Down
15 changes: 9 additions & 6 deletions cmd/drive/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

"github.com/rakyll/command"
"github.com/rakyll/drive"
Expand Down Expand Up @@ -56,25 +57,27 @@ func (cmd *initCmd) Run(args []string) {
}

type pullCmd struct {
isRecursive *bool
isNoPrompt *bool
exportOnBackup *bool
export *string
isRecursive *bool
isNoPrompt *bool
}

func (cmd *pullCmd) Flags(fs *flag.FlagSet) *flag.FlagSet {
cmd.export = fs.String(
"export", "", "comma separated list of formats to export your docs + sheets files")
cmd.isRecursive = fs.Bool("r", true, "performs the pull action recursively")
cmd.isNoPrompt = fs.Bool("no-prompt", false, "shows no prompt before applying the pull action")
cmd.exportOnBackup = fs.Bool("export", false, "export your docs + sheets files")
return fs
}

func (cmd *pullCmd) Run(args []string) {
context, path := discoverContext(args)
exports := strings.Split(*cmd.export, ",")
exitWithError(drive.New(context, &drive.Options{
Path: path,
IsRecursive: *cmd.isRecursive,
Exports: exports,
IsNoPrompt: *cmd.isNoPrompt,
ExportOnBackup: *cmd.exportOnBackup,
IsRecursive: *cmd.isRecursive,
}).Pull())
}

Expand Down
6 changes: 3 additions & 3 deletions commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ type Options struct {
IsForce bool
// Hidden discovers hidden paths if set
Hidden bool
// ExportOnBackup when set allows the exporting of Google Docs + Sheets to a
// downloadable format e.g *.presentation to pptx.
ExportOnBackup bool
// Exports contains the formats to export your Google Docs + Sheets to
// e.g ["csv" "txt"]
Exports []string
}

type Commands struct {
Expand Down
129 changes: 94 additions & 35 deletions pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@ import (
"fmt"
"io"
"os"
"strings"
"path"
"path/filepath"
"strings"
"sync"
)

const (
maxNumOfConcPullTasks = 4
)

// Pull from remote if remote path exists and in a gd context. If path is a
// Pull from remote if remote path exists and in a god context. If path is a
// directory, it recursively pulls from the remote if there are remote changes.
// It doesn't check if there are remote changes if isForce is set.
func (g *Commands) Pull() (err error) {
Expand All @@ -48,12 +49,12 @@ func (g *Commands) Pull() (err error) {
}

if ok := printChangeList(cl, g.opts.IsNoPrompt); ok {
return g.playPullChangeList(cl, g.opts.ExportOnBackup)
return g.playPullChangeList(cl, g.opts.Exports)
}
return
}

func (g *Commands) playPullChangeList(cl []*Change, exportOnBackup bool) (err error) {
func (g *Commands) playPullChangeList(cl []*Change, exports []string) (err error) {
var next []*Change
g.taskStart(len(cl))

Expand All @@ -73,9 +74,9 @@ func (g *Commands) playPullChangeList(cl []*Change, exportOnBackup bool) (err er
for _, c := range next {
switch c.Op() {
case OpMod:
go g.localMod(&wg, c, exportOnBackup)
go g.localMod(&wg, c, exports)
case OpAdd:
go g.localAdd(&wg, c, exportOnBackup)
go g.localAdd(&wg, c, exports)
case OpDelete:
go g.localDelete(&wg, c)
}
Expand All @@ -87,21 +88,21 @@ func (g *Commands) playPullChangeList(cl []*Change, exportOnBackup bool) (err er
return err
}

func (g *Commands) localMod(wg *sync.WaitGroup, change *Change, exportOnBackup bool) (err error) {
func (g *Commands) localMod(wg *sync.WaitGroup, change *Change, exports []string) (err error) {
defer g.taskDone()
defer wg.Done()
destAbsPath := g.context.AbsPathOf(change.Path)

if change.Src.BlobAt != "" || change.Src.ExportLinks != nil {
// download and replace
if err = g.download(change, exportOnBackup); err != nil {
if err = g.download(change, exports); err != nil {
return
}
}
return os.Chtimes(destAbsPath, change.Src.ModTime, change.Src.ModTime)
}

func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change, exportOnBackup bool) (err error) {
func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change, exports []string) (err error) {

defer g.taskDone()
defer wg.Done()
Expand All @@ -113,7 +114,7 @@ func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change, exportOnBackup b
}
if change.Src.BlobAt != "" || change.Src.ExportLinks != nil {
// download and create
if err = g.download(change, exportOnBackup); err != nil {
if err = g.download(change, exports); err != nil {
return
}
}
Expand All @@ -137,40 +138,98 @@ func touchFile(path string) (err error) {
return
}

func (g *Commands) download(change *Change, exportOnBackup bool) (err error) {
exportUrl := ""
func (g *Commands) export(f *File, destAbsPath string, exports []string) (manifest []string, err error) {
if len(exports) < 1 || f == nil {
return
}

dirPath := strings.Join([]string{destAbsPath, "exports"}, "_")
if err = os.MkdirAll(dirPath, os.ModeDir|0755); err != nil {
return
}

var ok bool
var mimeType, exportURL string

waitables := map[string]string{}
for _, ext := range exports {
mimeType, ok = docExportsMap[ext]
if !ok {
continue
}
exportURL, ok = f.ExportLinks[mimeType]
if !ok {
continue
}
exportPath := strings.Join([]string{filepath.Base(f.Name), ext}, ".")
pathName := path.Join(dirPath, exportPath)
waitables[pathName] = exportURL
}

var wg sync.WaitGroup
wg.Add(len(waitables))

for pathName, exportURL := range waitables {
go func(wg *sync.WaitGroup, dest, id, url string) error {
var fo *os.File
var blob io.ReadCloser
var fErr, dlErr error

defer func() {
if blob != nil {
blob.Close()
}
if fo != nil {
fo.Close()
}
wg.Done()
}()

fo, fErr = os.Create(dest)
if fErr != nil {
return fErr
}

blob, dlErr = g.rem.Download(id, url)
if dlErr != nil {
return dlErr
}
_, err = io.Copy(fo, blob)
if err == nil {
manifest = append(manifest, dest)
}
return err
}(&wg, pathName, f.Id, exportURL)
}
wg.Wait()
return
}

func (g *Commands) download(change *Change, exports []string) (err error) {
baseName := change.Path
destAbsPath := g.context.AbsPathOf(baseName)

// If BlobAt is not set, we are most likely dealing with
// Document/SpreadSheet/Image. In this case we'll use the target
// exportable type since we cannot directly download the raw data.
// We also need to pay attention and add the exported extension
// to avoid overriding the original file on re-syncing.
if len(change.Src.BlobAt) < 1 && exportOnBackup && IsGoogleDoc(change.Src) {
var ok bool
var mimeKeyExtList[]string

mimeKeyExtList, ok = docExportsMap[change.Src.MimeType]
if !ok {
mimeKeyExtList = []string{"text/plain", "txt"}
}

// We need to touch an empty file for the
// non-downloadable version to avoid an erasal
// on later push. If there is a name conflict / data race,
// the original file won't be touched.
if isGoogleDoc(change.Src) {
// We need to touch the empty file to ensure
// consistency during a push.
emptyFilepath := g.context.AbsPathOf(baseName)
err = touchFile(emptyFilepath)

// TODO: @odeke-em / @rakyll, if user selects all desired formats,
// should we be be downloading every single one of them?
exportUrl = change.Src.ExportLinks[mimeKeyExtList[0]]
fmt.Print("Exported ", baseName)
baseName = strings.Join([]string{baseName, mimeKeyExtList[1]}, ".")
fmt.Println(" to: ", baseName)
if err = touchFile(emptyFilepath); err != nil {
return err
}
manifest, exportErr := g.export(change.Src, destAbsPath, exports)
if exportErr == nil {
for i, exportPath := range manifest {
fmt.Printf("# %d: %s\n", i+1, exportPath)
}
}
return exportErr
}

destAbsPath := g.context.AbsPathOf(baseName)
var fo *os.File
fo, err = os.Create(destAbsPath)
if err != nil {
Expand All @@ -190,7 +249,7 @@ func (g *Commands) download(change *Change, exportOnBackup bool) (err error) {
blob.Close()
}
}()
blob, err = g.rem.Download(change.Src.Id, exportUrl)
blob, err = g.rem.Download(change.Src.Id, "")
if err != nil {
return err
}
Expand Down
Loading

0 comments on commit 2a327f9

Please sign in to comment.