deploy: compute MD5 by reading if List didn't provide one
authorRobert van Gent <rvangent@google.com>
Fri, 3 May 2019 20:28:35 +0000 (13:28 -0700)
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Sat, 4 May 2019 07:31:34 +0000 (09:31 +0200)
deploy/deploy.go
go.mod
go.sum

index 5cc529f7e9bc03f6c072d99bdaace77bc0464c66..6ba348dd86f33023dee7456057448d9c63c643e5 100644 (file)
@@ -19,7 +19,6 @@ import (
        "context"
        "crypto/md5"
        "fmt"
-       "golang.org/x/text/unicode/norm"
        "io"
        "mime"
        "os"
@@ -33,6 +32,7 @@ import (
        "github.com/pkg/errors"
        "github.com/spf13/afero"
        jww "github.com/spf13/jwalterweatherman"
+       "golang.org/x/text/unicode/norm"
 
        "gocloud.dev/blob"
        _ "gocloud.dev/blob/azureblob" // import
@@ -418,6 +418,23 @@ func walkRemote(ctx context.Context, bucket *blob.Bucket) (map[string]*blob.List
                if err != nil {
                        return nil, err
                }
+               // If the remote didn't give us an MD5, compute one.
+               // This can happen for some providers (e.g., fileblob, which uses the
+               // local filesystem), but not for the most common Cloud providers
+               // (S3, GCS, Azure). Although, it can happen for S3 if the blob was uploaded
+               // via a multi-part upload.
+               // Although it's unfortunate to have to read the file, it's likely better
+               // than assuming a delta and re-uploading it.
+               if len(obj.MD5) == 0 {
+                       r, err := bucket.NewReader(ctx, obj.Key, nil)
+                       if err == nil {
+                               h := md5.New()
+                               if _, err := io.Copy(h, r); err == nil {
+                                       obj.MD5 = h.Sum(nil)
+                               }
+                               r.Close()
+                       }
+               }
                retval[obj.Key] = obj
        }
        return retval, nil
@@ -494,15 +511,9 @@ func findDiffs(localFiles map[string]*localFile, remoteFiles map[string]*blob.Li
                                upload = true
                                reason = reasonSize
                        } else if len(remoteFile.MD5) == 0 {
-                               // TODO: This can happen if the remote provider doesn't return an MD5
-                               // hash for the blob from their "list" command. This is common for
-                               // some providers (e.g., fileblob, which uses the local filesystem),
-                               // but not for the biggest Cloud providers (S3, GCS, Azure). Although,
-                               // it can happen for S3 if the blob was originally uploaded as a
-                               // multi-part upload (shouldn't happen when using "hugo deploy").
-                               // For now, we treat this as an MD5 mismatch and re-upload. An
-                               // alternative would be to read entire the remote blob and compute the
-                               // MD5 hash.
+                               // This shouldn't happen unless the remote didn't give us an MD5 hash
+                               // from List, AND we failed to compute one by reading the remote file.
+                               // Default to considering the files different.
                                upload = true
                                reason = reasonMD5Missing
                        } else if !bytes.Equal(lf.MD5(), remoteFile.MD5) {
diff --git a/go.mod b/go.mod
index e21c0cd6870529e04e39a7acf0973afe44f8cfad..8bf8c4c75093c5f0e6414349d633b8303c91c527 100644 (file)
--- a/go.mod
+++ b/go.mod
@@ -19,6 +19,7 @@ require (
        github.com/fortytw2/leaktest v1.2.0
        github.com/fsnotify/fsnotify v1.4.7
        github.com/gobwas/glob v0.2.3
+       github.com/google/go-cmp v0.2.0
        github.com/gorilla/websocket v1.4.0
        github.com/hashicorp/go-immutable-radix v1.0.0
        github.com/jdkato/prose v1.1.0
diff --git a/go.sum b/go.sum
index 7f7771f7ed77914819bdd342b2b9760b4495af4a..36cfdbece914a4dc90ae79ceede26c398e344d9d 100644 (file)
--- a/go.sum
+++ b/go.sum
@@ -211,6 +211,7 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y
 github.com/golang/snappy v0.0.0-20170215233205-553a64147049/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
+github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
 github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ=
 github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=