From a9c5133a77c8490fad59807f44348214c7f3c954 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Tue, 21 Jul 2015 01:29:22 +0200 Subject: [PATCH] Fix data races in sorting and Reverse The custom sort functions used from the templates had some subtle data race- and related issues, especially when used in the single page template. This commit fixes this by making copies and protect the read and writes with a RWMutex. The results are cached (it will typically be invoked *number of pages* times with exactly the same data). This is, not surprisingly, also faster: ``` benchmark old ns/op new ns/op delta BenchmarkSortByWeightAndReverse 14228 491 -96.55% benchmark old allocs new allocs delta BenchmarkSortByWeightAndReverse 1 0 -100.00% benchmark old bytes new bytes delta BenchmarkSortByWeightAndReverse 32 0 -100.00% ``` Fixes #1293 --- hugolib/pageCache.go | 108 ++++++++++++++++++++++++++++++++++++++ hugolib/pageCache_test.go | 60 +++++++++++++++++++++ hugolib/pageSort.go | 93 ++++++++++++++++++++++++++------ hugolib/pageSort_test.go | 15 +++--- 4 files changed, 255 insertions(+), 21 deletions(-) create mode 100644 hugolib/pageCache.go create mode 100644 hugolib/pageCache_test.go diff --git a/hugolib/pageCache.go b/hugolib/pageCache.go new file mode 100644 index 00000000..e380a7c1 --- /dev/null +++ b/hugolib/pageCache.go @@ -0,0 +1,108 @@ +// Copyright © 2015 Steve Francia . +// +// Licensed under the Simple Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://opensource.org/licenses/Simple-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hugolib + +import ( + "sync" +) + +type pageCache struct { + sync.RWMutex + m map[string][][2]Pages +} + +func newPageCache() *pageCache { + return &pageCache{m: make(map[string][][2]Pages)} +} + +// get gets a Pages slice from the cache matching the given key and Pages slice. +// If none found in cache, a copy of the supplied slice is created. +// +// If an apply func is provided, that func is applied to the newly created copy. +// +// The cache and the execution of the apply func is protected by a RWMutex. +func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool) { + c.RLock() + if cached, ok := c.m[key]; ok { + for _, ps := range cached { + if probablyEqualPages(p, ps[0]) { + c.RUnlock() + return ps[1], true + } + } + + } + c.RUnlock() + + c.Lock() + defer c.Unlock() + + // double-check + if cached, ok := c.m[key]; ok { + for _, ps := range cached { + if probablyEqualPages(p, ps[0]) { + return ps[1], true + } + } + } + + pagesCopy := append(Pages(nil), p...) + + if v, ok := c.m[key]; ok { + c.m[key] = append(v, [2]Pages{p, pagesCopy}) + } else { + c.m[key] = [][2]Pages{[2]Pages{p, pagesCopy}} + } + + if apply != nil { + apply(pagesCopy) + } + + return pagesCopy, false + +} + +// "probably" as in: we do not compare every element for big slices, but that is +// good enough for our use case. +// TODO(bep) there is a similar method in pagination.go. DRY. +func probablyEqualPages(p1, p2 Pages) bool { + if p1 == nil && p2 == nil { + return true + } + + if p1 == nil || p2 == nil { + return false + } + + if p1.Len() != p2.Len() { + return false + } + + if p1.Len() == 0 { + return true + } + + step := 1 + + if len(p1) >= 50 { + step = len(p1) / 10 + } + + for i := 0; i < len(p1); i += step { + if p1[i] != p2[i] { + return false + } + } + return true +} diff --git a/hugolib/pageCache_test.go b/hugolib/pageCache_test.go new file mode 100644 index 00000000..6e48c63c --- /dev/null +++ b/hugolib/pageCache_test.go @@ -0,0 +1,60 @@ +package hugolib + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "sync" + "sync/atomic" + "testing" +) + +func TestPageCache(t *testing.T) { + c1 := newPageCache() + + changeFirst := func(p Pages) { + p[0].Description = "changed" + } + + var o1 uint64 = 0 + var o2 uint64 = 0 + + var wg sync.WaitGroup + + var l1 sync.Mutex + var l2 sync.Mutex + + var testPageSets []Pages + + for j := 0; j < 50; j++ { + testPageSets = append(testPageSets, createSortTestPages(j+1)) + } + + for i := 0; i < 100; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j, pages := range testPageSets { + msg := fmt.Sprintf("Go %d %d %d %d", i, j, o1, o2) + l1.Lock() + p, c := c1.get("k1", pages, nil) + assert.Equal(t, !atomic.CompareAndSwapUint64(&o1, uint64(j), uint64(j+1)), c, "c1: "+msg) + l1.Unlock() + p2, c2 := c1.get("k1", p, nil) + assert.True(t, c2) + assert.True(t, probablyEqualPages(p, p2)) + assert.True(t, probablyEqualPages(p, pages)) + assert.NotNil(t, p, msg) + + l2.Lock() + p3, c3 := c1.get("k2", pages, changeFirst) + assert.Equal(t, !atomic.CompareAndSwapUint64(&o2, uint64(j), uint64(j+1)), c3, "c3: "+msg) + l2.Unlock() + assert.NotNil(t, p3, msg) + assert.Equal(t, p3[0].Description, "changed", msg) + } + }() + } + + wg.Wait() + +} diff --git a/hugolib/pageSort.go b/hugolib/pageSort.go index da068a82..d42bb40d 100644 --- a/hugolib/pageSort.go +++ b/hugolib/pageSort.go @@ -17,6 +17,8 @@ import ( "sort" ) +var spc = newPageCache() + /* * Implementation of a custom sorter for Pages */ @@ -62,60 +64,121 @@ func (p Pages) Limit(n int) Pages { return p } +// ByWeight sorts the Pages by weight and returns a copy. +// +// Adjacent invocactions on the same receiver will return a cached result. +// +// This may safely be executed in parallel. func (p Pages) ByWeight() Pages { - PageBy(DefaultPageSort).Sort(p) - return p + key := "pageSort.ByWeight" + pages, _ := spc.get(key, p, PageBy(DefaultPageSort).Sort) + return pages } +// ByTitle sorts the Pages by title and returns a copy. +// +// Adjacent invocactions on the same receiver will return a cached result. +// +// This may safely be executed in parallel. func (p Pages) ByTitle() Pages { + + key := "pageSort.ByTitle" + title := func(p1, p2 *Page) bool { return p1.Title < p2.Title } - PageBy(title).Sort(p) - return p + pages, _ := spc.get(key, p, PageBy(title).Sort) + return pages } +// ByLinkTitle sorts the Pages by link title and returns a copy. +// +// Adjacent invocactions on the same receiver will return a cached result. +// +// This may safely be executed in parallel. func (p Pages) ByLinkTitle() Pages { + + key := "pageSort.ByLinkTitle" + linkTitle := func(p1, p2 *Page) bool { return p1.linkTitle < p2.linkTitle } - PageBy(linkTitle).Sort(p) - return p + pages, _ := spc.get(key, p, PageBy(linkTitle).Sort) + + return pages } +// ByDate sorts the Pages by date and returns a copy. +// +// Adjacent invocactions on the same receiver will return a cached result. +// +// This may safely be executed in parallel. func (p Pages) ByDate() Pages { + + key := "pageSort.ByDate" + date := func(p1, p2 *Page) bool { return p1.Date.Unix() < p2.Date.Unix() } - PageBy(date).Sort(p) - return p + pages, _ := spc.get(key, p, PageBy(date).Sort) + + return pages } +// ByPublishDate sorts the Pages by publish date and returns a copy. +// +// Adjacent invocactions on the same receiver will return a cached result. +// +// This may safely be executed in parallel. func (p Pages) ByPublishDate() Pages { + + key := "pageSort.ByPublishDate" + pubDate := func(p1, p2 *Page) bool { return p1.PublishDate.Unix() < p2.PublishDate.Unix() } - PageBy(pubDate).Sort(p) - return p + pages, _ := spc.get(key, p, PageBy(pubDate).Sort) + + return pages } +// ByLength sorts the Pages by length and returns a copy. +// +// Adjacent invocactions on the same receiver will return a cached result. +// +// This may safely be executed in parallel. func (p Pages) ByLength() Pages { + + key := "pageSort.ByLength" + length := func(p1, p2 *Page) bool { return len(p1.Content) < len(p2.Content) } - PageBy(length).Sort(p) - return p + pages, _ := spc.get(key, p, PageBy(length).Sort) + + return pages } +// Reverse reverses the order in Pages and returns a copy. +// +// Adjacent invocactions on the same receiver will return a cached result. +// +// This may safely be executed in parallel. func (p Pages) Reverse() Pages { - for i, j := 0, len(p)-1; i < j; i, j = i+1, j-1 { - p[i], p[j] = p[j], p[i] + key := "pageSort.Reverse" + + reverseFunc := func(pages Pages) { + for i, j := 0, len(pages)-1; i < j; i, j = i+1, j-1 { + pages[i], pages[j] = pages[j], pages[i] + } } - return p + pages, _ := spc.get(key, p, reverseFunc) + + return pages } diff --git a/hugolib/pageSort_test.go b/hugolib/pageSort_test.go index cef5c608..cd999f7a 100644 --- a/hugolib/pageSort_test.go +++ b/hugolib/pageSort_test.go @@ -10,12 +10,14 @@ import ( ) func TestPageSortReverse(t *testing.T) { - p := createSortTestPages(10) - assert.Equal(t, 0, p[0].FuzzyWordCount) - assert.Equal(t, 9, p[9].FuzzyWordCount) - p = p.Reverse() - assert.Equal(t, 9, p[0].FuzzyWordCount) - assert.Equal(t, 0, p[9].FuzzyWordCount) + p1 := createSortTestPages(10) + assert.Equal(t, 0, p1[0].FuzzyWordCount) + assert.Equal(t, 9, p1[9].FuzzyWordCount) + p2 := p1.Reverse() + assert.Equal(t, 9, p2[0].FuzzyWordCount) + assert.Equal(t, 0, p2[9].FuzzyWordCount) + // cached + assert.True(t, probablyEqualPages(p2, p1.Reverse())) } func BenchmarkSortByWeightAndReverse(b *testing.B) { @@ -51,6 +53,7 @@ func createSortTestPages(num int) Pages { } pages[i].FuzzyWordCount = i pages[i].Weight = w + pages[i].Description = "initial" } return pages -- 2.30.2