Skip to content
This repository has been archived by the owner on Sep 3, 2020. It is now read-only.

Fix bug with missing downloadURL for docs/sheets #25

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Use `drive help` for further reference.

$ drive init [path]
$ drive pull [-r -no-prompt path] # pulls from remote
$ drive pull [-r -no-prompt -export path] # pulls from remote and exports Docs + Sheets to one of its export formats.
$ drive push [-r -no-prompt path] # pushes to the remote
$ drive push [-r -hidden path] # pushes also hidden directories and paths to the remote
$ drive diff [path] # outputs a diff of local and remote
Expand Down Expand Up @@ -53,6 +54,9 @@ Background sync is not just hard, it's stupid. My technical and philosophical ra
* Probably, it doesn't work on Windows.
* Google Drive allows a directory to contain files/directories with the same name. Client doesn't handle these cases yet. We don't recommend you to use `drive` if you have such files/directories to avoid data loss.
* Racing conditions occur if remote is being modified while we're trying to update the file. Google Drive provides resource versioning with ETags, use Etags to avoid racy cases.
* Google Docs + Sheets + Presentations data cannot be downloaded raw but only
as exported to different forms e.g docx, xlsx, csv etc hence doing a pull of
these types will result in a exported document.

## License
Copyright 2013 Google Inc. All Rights Reserved.
Expand Down
6 changes: 6 additions & 0 deletions changes.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ func (g *Commands) resolveChangeListRecv(
isPush bool, p string, r *File, l *File) (cl []*Change, err error) {
var change *Change
if isPush {
// Handle the case of doc files for which we don't have a direct download
// url but have exportable links. These files should not be clobbered on the cloud
if IsGoogleDoc(r) {
return cl, nil
}

change = &Change{Path: p, Src: l, Dest: r}
} else {
change = &Change{Path: p, Src: r, Dest: l}
Expand Down
7 changes: 5 additions & 2 deletions cmd/drive/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,15 @@ func (cmd *initCmd) Run(args []string) {
}

type pullCmd struct {
isRecursive *bool
isNoPrompt *bool
isRecursive *bool
isNoPrompt *bool
exportOnBackup *bool
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use export. Use less verbose variables in Go.

(And I should cleanup the codebase to fix the legacy bad naming issues throughout the repo.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:)

}

func (cmd *pullCmd) Flags(fs *flag.FlagSet) *flag.FlagSet {
cmd.isRecursive = fs.Bool("r", true, "performs the pull action recursively")
cmd.isNoPrompt = fs.Bool("no-prompt", false, "shows no prompt before applying the pull action")
cmd.exportOnBackup = fs.Bool("export", false, "export your docs + sheets files")
return fs
}

Expand All @@ -72,6 +74,7 @@ func (cmd *pullCmd) Run(args []string) {
Path: path,
IsRecursive: *cmd.isRecursive,
IsNoPrompt: *cmd.isNoPrompt,
ExportOnBackup: *cmd.exportOnBackup,
}).Pull())
}

Expand Down
3 changes: 3 additions & 0 deletions commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ type Options struct {
IsForce bool
// Hidden discovers hidden paths if set
Hidden bool
// ExportOnBackup when set allows the exporting of Google Docs + Sheets to a
// downloadable format e.g *.presentation to pptx.
ExportOnBackup bool
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Export bool

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Roger that.

}

type Commands struct {
Expand Down
74 changes: 60 additions & 14 deletions pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"fmt"
"io"
"os"
"strings"
"path/filepath"
"sync"
)
Expand All @@ -26,7 +27,7 @@ const (
maxNumOfConcPullTasks = 4
)

// Pull from remote if remote path exists and in a god context. If path is a
// Pull from remote if remote path exists and in a gd context. If path is a
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can drop gd here. This repo was once called god, and the comment is legacy now.

god was much of an ambitious name for a Drive client :)

// directory, it recursively pulls from the remote if there are remote changes.
// It doesn't check if there are remote changes if isForce is set.
func (g *Commands) Pull() (err error) {
Expand All @@ -47,12 +48,12 @@ func (g *Commands) Pull() (err error) {
}

if ok := printChangeList(cl, g.opts.IsNoPrompt); ok {
return g.playPullChangeList(cl)
return g.playPullChangeList(cl, g.opts.ExportOnBackup)
}
return
}

func (g *Commands) playPullChangeList(cl []*Change) (err error) {
func (g *Commands) playPullChangeList(cl []*Change, exportOnBackup bool) (err error) {
var next []*Change
g.taskStart(len(cl))

Expand All @@ -72,9 +73,9 @@ func (g *Commands) playPullChangeList(cl []*Change) (err error) {
for _, c := range next {
switch c.Op() {
case OpMod:
go g.localMod(&wg, c)
go g.localMod(&wg, c, exportOnBackup)
case OpAdd:
go g.localAdd(&wg, c)
go g.localAdd(&wg, c, exportOnBackup)
case OpDelete:
go g.localDelete(&wg, c)
}
Expand All @@ -86,20 +87,22 @@ func (g *Commands) playPullChangeList(cl []*Change) (err error) {
return err
}

func (g *Commands) localMod(wg *sync.WaitGroup, change *Change) (err error) {
func (g *Commands) localMod(wg *sync.WaitGroup, change *Change, exportOnBackup bool) (err error) {
defer g.taskDone()
defer wg.Done()
destAbsPath := g.context.AbsPathOf(change.Path)
if change.Src.BlobAt != "" {

if change.Src.BlobAt != "" || change.Src.ExportLinks != nil {
// download and replace
if err = g.download(change); err != nil {
if err = g.download(change, exportOnBackup); err != nil {
return
}
}
return os.Chtimes(destAbsPath, change.Src.ModTime, change.Src.ModTime)
}

func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change) (err error) {
func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change, exportOnBackup bool) (err error) {

defer g.taskDone()
defer wg.Done()
destAbsPath := g.context.AbsPathOf(change.Path)
Expand All @@ -108,9 +111,9 @@ func (g *Commands) localAdd(wg *sync.WaitGroup, change *Change) (err error) {
if change.Src.IsDir {
return os.Mkdir(destAbsPath, os.ModeDir|0755)
}
if change.Src.BlobAt != "" {
if change.Src.BlobAt != "" || change.Src.ExportLinks != nil {
// download and create
if err = g.download(change); err != nil {
if err = g.download(change, exportOnBackup); err != nil {
return
}
}
Expand All @@ -123,8 +126,51 @@ func (g *Commands) localDelete(wg *sync.WaitGroup, change *Change) (err error) {
return os.RemoveAll(change.Dest.BlobAt)
}

func (g *Commands) download(change *Change) (err error) {
destAbsPath := g.context.AbsPathOf(change.Path)
func touchFile(path string) (err error) {
var ef *os.File
defer func() {
if err != nil && ef != nil {
ef.Close()
}
}()
ef, err = os.Create(path)
return
}

func (g *Commands) download(change *Change, exportOnBackup bool) (err error) {
exportUrl := ""
baseName := change.Path

// If BlobAt is not set, we are most likely dealing with
// Document/SpreadSheet/Image. In this case we'll use the target
// exportable type since we cannot directly download the raw data.
// We also need to pay attention and add the exported extension
// to avoid overriding the original file on re-syncing.
if len(change.Src.BlobAt) < 1 && exportOnBackup && IsGoogleDoc(change.Src) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this if be as simple as:

if hasExportLinks(change.Str) && export {

}

Why do we have to care about BlobAt?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question, see I am thinking about the case that a file has both exportLinks as well as BlobAt. In this case we would want to download the actual file.
Actually you are right, given that isGoogleDoc involves a check for BlobAt == "".

var ok bool
var mimeKeyExtList[]string

mimeKeyExtList, ok = docExportsMap[change.Src.MimeType]
if !ok {
mimeKeyExtList = []string{"text/plain", "txt"}
}

// We need to touch an empty file for the
// non-downloadable version to avoid an erasal
// on later push. If there is a name conflict / data race,
// the original file won't be touched.
emptyFilepath := g.context.AbsPathOf(baseName)
err = touchFile(emptyFilepath)

// TODO: @odeke-em / @rakyll, if user selects all desired formats,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe export flag should accept a list of types. -export=png,docx

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great suggestion, this solves a bunch of problems I was thinking of.

// should we be be downloading every single one of them?
exportUrl = change.Src.ExportLinks[mimeKeyExtList[0]]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exportURL

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Roger that.

fmt.Print("Exported ", baseName)
baseName = strings.Join([]string{baseName, mimeKeyExtList[1]}, ".")
fmt.Println(" to: ", baseName)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fmt.Printf("Exported %s to %s", exportURL, name)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, exportURL might not be descriptive to the user.

}

destAbsPath := g.context.AbsPathOf(baseName)
var fo *os.File
fo, err = os.Create(destAbsPath)
if err != nil {
Expand All @@ -144,7 +190,7 @@ func (g *Commands) download(change *Change) (err error) {
blob.Close()
}
}()
blob, err = g.rem.Download(change.Src.Id)
blob, err = g.rem.Download(change.Src.Id, exportUrl)
if err != nil {
return err
}
Expand Down
40 changes: 38 additions & 2 deletions remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ var (
ErrPathNotExists = errors.New("remote path doesn't exist")
)

var docExportsMap = *newDocExportsMap()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

var docExportTypes = map[string][]string {

  •   "text/plain": []string{"text/plain", "txt",},
    
  •   "application/vnd.google-apps.drawing": []string{"image/svg+xml", "svg+xml",},
    
  •   "application/vnd.google-apps.spreadsheet": []string{
    
  •   "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx",
    
  •   },
    
  •   "application/vnd.google-apps.document": []string{
    
  •       "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx",
    
  •   },
    
  •   "application/vnd.google-apps.presentation": []string{
    
  •       "application/vnd.openxmlformats-officedocument.presentationml.presentation", "pptx",
    
  •   },
    
  • }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good plan.


type Remote struct {
transport *oauth.Transport
service *drive.Service
Expand All @@ -57,6 +59,18 @@ func NewRemoteContext(context *config.Context) *Remote {
return &Remote{service: service, transport: transport}
}

func IsGoogleDoc(f *File) bool {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't have to be in remote.go. It should be likely in pull.go.

And it should have be imported. isGoogleDoc or better isExportable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aye aye, I actually initially had it in pull.go. However, remember that we don't wanna clobber Google Docs on the cloud with their empty replacements on disk.
Also this function gets invoked in changes.go so I figured why not put in a common location.
Another thing, wouldn't the name isGoogleDoc drive the point home to alert the code readers that GoogleDocs + Sheets are non downloadable by default?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Never mind, I now concur, I've encountered a case while coding up the solution :)

if f == nil || f.IsDir {
return false;
}

_, ok := docExportsMap[f.MimeType]
if !ok {
return f.BlobAt == "";
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

go fmt your code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:)

}
return true;
}

func RetrieveRefreshToken(context *config.Context) (string, error) {
transport := newTransport(context)
url := transport.Config.AuthCodeURL("")
Expand Down Expand Up @@ -120,8 +134,14 @@ func (r *Remote) Publish(id string) (string, error) {
return "https://googledrive.com/host/" + id, nil
}

func (r *Remote) Download(id string) (io.ReadCloser, error) {
resp, err := r.transport.Client().Get("https://googledrive.com/host/" + id)
func (r *Remote) Download(id string, exportUrl string) (io.ReadCloser, error) {
var url string
if len(exportUrl) < 1 {
url = "https://googledrive.com/host/" + id
} else {
url = exportUrl
}
resp, err := r.transport.Client().Get(url)
if err != nil || resp.StatusCode < 200 || resp.StatusCode > 299 {
return resp.Body, err
}
Expand Down Expand Up @@ -197,3 +217,19 @@ func newTransport(context *config.Context) *oauth.Transport {
},
}
}

func newDocExportsMap() *map[string][]string {
return &map[string][]string {
"text/plain": []string{"text/plain", "txt",},
"application/vnd.google-apps.drawing": []string{"image/svg+xml", "svg+xml",},
"application/vnd.google-apps.spreadsheet": []string{
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx",
},
"application/vnd.google-apps.document": []string{
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx",
},
"application/vnd.google-apps.presentation": []string{
"application/vnd.openxmlformats-officedocument.presentationml.presentation", "pptx",
},
}
}
4 changes: 4 additions & 0 deletions types.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ type File struct {
ModTime time.Time
Size int64
BlobAt string
MimeType string
Md5Checksum string
ExportLinks map[string]string
}

func NewRemoteFile(f *drive.File) *File {
Expand All @@ -50,8 +52,10 @@ func NewRemoteFile(f *drive.File) *File {
IsDir: f.MimeType == "application/vnd.google-apps.folder",
ModTime: mtime,
Size: f.FileSize,
MimeType: f.MimeType,
BlobAt: f.DownloadUrl,
Md5Checksum: f.Md5Checksum,
ExportLinks: f.ExportLinks,
}
}

Expand Down