From 3c29c5af8ee865ef20741f576088e031e940c3d2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Wed, 14 Nov 2018 12:06:46 +0100 Subject: [PATCH] cache/filecache: Add a cache prune func Fixes #5439 --- cache/filecache/filecache.go | 48 +++++++++-- cache/filecache/filecache_pruner.go | 80 ++++++++++++++++++ cache/filecache/filecache_pruner_test.go | 100 +++++++++++++++++++++++ cache/filecache/filecache_test.go | 6 ++ hugolib/prune_resources.go | 88 +------------------- tpl/data/resources_test.go | 5 +- 6 files changed, 232 insertions(+), 95 deletions(-) create mode 100644 cache/filecache/filecache_pruner.go create mode 100644 cache/filecache/filecache_pruner_test.go diff --git a/cache/filecache/filecache.go b/cache/filecache/filecache.go index a934dd89..e9f72cb9 100644 --- a/cache/filecache/filecache.go +++ b/cache/filecache/filecache.go @@ -19,6 +19,7 @@ import ( "io/ioutil" "path/filepath" "strings" + "sync" "time" "github.com/gohugoio/hugo/common/hugio" @@ -44,7 +45,30 @@ type Cache struct { // 0 is effectively turning this cache off. maxAge time.Duration - nlocker *locker.Locker + nlocker *lockTracker +} + +type lockTracker struct { + seenMu sync.RWMutex + seen map[string]struct{} + + *locker.Locker +} + +// Lock tracks the ids in use. We use this information to do garbage collection +// after a Hugo build. +func (l *lockTracker) Lock(id string) { + l.seenMu.RLock() + if _, seen := l.seen[id]; !seen { + l.seenMu.RUnlock() + l.seenMu.Lock() + l.seen[id] = struct{}{} + l.seenMu.Unlock() + } else { + l.seenMu.RUnlock() + } + + l.Locker.Lock(id) } // ItemInfo contains info about a cached file. @@ -57,7 +81,7 @@ type ItemInfo struct { func NewCache(fs afero.Fs, maxAge time.Duration) *Cache { return &Cache{ Fs: fs, - nlocker: locker.NewLocker(), + nlocker: &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})}, maxAge: maxAge, } } @@ -232,7 +256,7 @@ func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser { return nil } - if time.Now().Sub(fi.ModTime()) > c.maxAge { + if c.isExpired(fi.ModTime()) { c.Fs.Remove(id) return nil } @@ -247,6 +271,10 @@ func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser { return f } +func (c *Cache) isExpired(modTime time.Time) bool { + return c.maxAge >= 0 && time.Now().Sub(modTime) > c.maxAge +} + // For testing func (c *Cache) getString(id string) string { id = cleanID(id) @@ -254,13 +282,15 @@ func (c *Cache) getString(id string) string { c.nlocker.Lock(id) defer c.nlocker.Unlock(id) - if r := c.getOrRemove(id); r != nil { - defer r.Close() - b, _ := ioutil.ReadAll(r) - return string(b) + f, err := c.Fs.Open(id) + + if err != nil { + return "" } + defer f.Close() - return "" + b, _ := ioutil.ReadAll(f) + return string(b) } @@ -309,5 +339,5 @@ func NewCachesFromPaths(p *paths.Paths) (Caches, error) { } func cleanID(name string) string { - return filepath.Clean(name) + return strings.TrimPrefix(filepath.Clean(name), helpers.FilePathSeparator) } diff --git a/cache/filecache/filecache_pruner.go b/cache/filecache/filecache_pruner.go new file mode 100644 index 00000000..322eabf9 --- /dev/null +++ b/cache/filecache/filecache_pruner.go @@ -0,0 +1,80 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filecache + +import ( + "io" + "os" + + "github.com/pkg/errors" + "github.com/spf13/afero" +) + +// Prune removes expired and unused items from this cache. +// The last one requires a full build so the cache usage can be tracked. +// Note that we operate directly on the filesystem here, so this is not +// thread safe. +func (c Caches) Prune() (int, error) { + counter := 0 + for k, cache := range c { + err := afero.Walk(cache.Fs, "", func(name string, info os.FileInfo, err error) error { + if info == nil { + return nil + } + + name = cleanID(name) + + if info.IsDir() { + f, err := cache.Fs.Open(name) + if err != nil { + // This cache dir may not exist. + return nil + } + defer f.Close() + _, err = f.Readdirnames(1) + if err == io.EOF { + // Empty dir. + return cache.Fs.Remove(name) + } + + return nil + } + + shouldRemove := cache.isExpired(info.ModTime()) + + if !shouldRemove && len(cache.nlocker.seen) > 0 { + // Remove it if it's not been touched/used in the last build. + _, seen := cache.nlocker.seen[name] + shouldRemove = !seen + } + + if shouldRemove { + err := cache.Fs.Remove(name) + if err == nil { + counter++ + } + return err + } + + return nil + }) + + if err != nil { + return counter, errors.Wrapf(err, "failed to prune cache %q", k) + } + + } + + return counter, nil +} diff --git a/cache/filecache/filecache_pruner_test.go b/cache/filecache/filecache_pruner_test.go new file mode 100644 index 00000000..64d10149 --- /dev/null +++ b/cache/filecache/filecache_pruner_test.go @@ -0,0 +1,100 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filecache + +import ( + "fmt" + "testing" + "time" + + "github.com/gohugoio/hugo/config" + "github.com/gohugoio/hugo/hugofs" + "github.com/gohugoio/hugo/hugolib/paths" + + "github.com/stretchr/testify/require" +) + +func TestPrune(t *testing.T) { + t.Parallel() + + assert := require.New(t) + + configStr := ` +resourceDir = "myresources" +[caches] +[caches.getjson] +maxAge = "200ms" +dir = "/cache/c" + +` + + cfg, err := config.FromConfigString(configStr, "toml") + assert.NoError(err) + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + caches, err := NewCachesFromPaths(p) + assert.NoError(err) + + jsonCache := caches.GetJSONCache() + for i := 0; i < 10; i++ { + id := fmt.Sprintf("i%d", i) + jsonCache.GetOrCreateBytes(id, func() ([]byte, error) { + return []byte("abc"), nil + }) + if i == 4 { + // This will expire the first 5 + time.Sleep(201 * time.Millisecond) + } + } + + count, err := caches.Prune() + assert.NoError(err) + assert.Equal(5, count) + + for i := 0; i < 10; i++ { + id := fmt.Sprintf("i%d", i) + v := jsonCache.getString(id) + if i < 5 { + assert.Equal("", v, id) + } else { + assert.Equal("abc", v, id) + } + } + + caches, err = NewCachesFromPaths(p) + assert.NoError(err) + jsonCache = caches.GetJSONCache() + // Touch one and then prune. + jsonCache.GetOrCreateBytes("i5", func() ([]byte, error) { + return []byte("abc"), nil + }) + + count, err = caches.Prune() + assert.NoError(err) + assert.Equal(4, count) + + // Now only the i5 should be left. + for i := 0; i < 10; i++ { + id := fmt.Sprintf("i%d", i) + v := jsonCache.getString(id) + if i != 5 { + assert.Equal("", v, id) + } else { + assert.Equal("abc", v, id) + } + } + +} diff --git a/cache/filecache/filecache_test.go b/cache/filecache/filecache_test.go index 986d41f7..9696b614 100644 --- a/cache/filecache/filecache_test.go +++ b/cache/filecache/filecache_test.go @@ -209,3 +209,9 @@ dir = "/cache/c" } wg.Wait() } + +func TestCleanID(t *testing.T) { + assert := require.New(t) + assert.Equal(filepath.FromSlash("a/b/c.txt"), cleanID(filepath.FromSlash("/a/b//c.txt"))) + assert.Equal(filepath.FromSlash("a/b/c.txt"), cleanID(filepath.FromSlash("a/b//c.txt"))) +} diff --git a/hugolib/prune_resources.go b/hugolib/prune_resources.go index 63623d75..bf5a1ef2 100644 --- a/hugolib/prune_resources.go +++ b/hugolib/prune_resources.go @@ -1,4 +1,4 @@ -// Copyright 2017-present The Hugo Authors. All rights reserved. +// Copyright 2018 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,89 +13,7 @@ package hugolib -import ( - "io" - "os" - "strings" - - "github.com/gohugoio/hugo/helpers" - - "github.com/spf13/afero" -) - -// GC requires a build first. +// GC requires a build first and must run on it's own. It is not thread safe. func (h *HugoSites) GC() (int, error) { - s := h.Sites[0] - assetsCacheFs := h.Deps.FileCaches.AssetsCache().Fs - imageCacheFs := h.Deps.FileCaches.ImageCache().Fs - - isImageInUse := func(name string) bool { - for _, site := range h.Sites { - if site.ResourceSpec.IsInImageCache(name) { - return true - } - } - - return false - } - - isAssetInUse := func(name string) bool { - // These assets are stored in tuplets with an added extension to the key. - key := strings.TrimSuffix(name, helpers.Ext(name)) - for _, site := range h.Sites { - if site.ResourceSpec.ResourceCache.Contains(key) { - return true - } - } - - return false - } - - walker := func(fs afero.Fs, dirname string, inUse func(filename string) bool) (int, error) { - counter := 0 - err := afero.Walk(fs, dirname, func(path string, info os.FileInfo, err error) error { - if info == nil { - return nil - } - - if info.IsDir() { - f, err := fs.Open(path) - if err != nil { - return nil - } - defer f.Close() - _, err = f.Readdirnames(1) - if err == io.EOF { - // Empty dir. - s.Fs.Source.Remove(path) - } - - return nil - } - - inUse := inUse(path) - if !inUse { - err := fs.Remove(path) - if err != nil && !os.IsNotExist(err) { - s.Log.ERROR.Printf("Failed to remove %q: %s", path, err) - } else { - counter++ - } - } - return nil - }) - - return counter, err - } - - imageCounter, err1 := walker(imageCacheFs, "", isImageInUse) - assetsCounter, err2 := walker(assetsCacheFs, "", isAssetInUse) - totalCount := imageCounter + assetsCounter - - if err1 != nil { - return totalCount, err1 - } - - return totalCount, err2 - + return h.Deps.FileCaches.Prune() } diff --git a/tpl/data/resources_test.go b/tpl/data/resources_test.go index b143bb1a..4a5482fd 100644 --- a/tpl/data/resources_test.go +++ b/tpl/data/resources_test.go @@ -192,7 +192,10 @@ func newDeps(cfg config.Provider) *deps.Deps { logger := loggers.NewErrorLogger() p, _ := paths.New(fs, cfg) - fileCaches, _ := filecache.NewCachesFromPaths(p) + fileCaches, err := filecache.NewCachesFromPaths(p) + if err != nil { + panic(err) + } return &deps.Deps{ Cfg: cfg, -- 2.30.2