Remove superfluous p-tags around shortcodes
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Sun, 21 Jun 2015 11:08:30 +0000 (13:08 +0200)
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Sun, 21 Jun 2015 20:51:12 +0000 (22:51 +0200)
This commit replaces the regexp driven `replaceShortcodeTokens` with a handwritten one.

It wasnt't possible to handle the p-tags case without breaking performance.

This fix actually improves in that area:

```
benchmark                           old ns/op     new ns/op     delta
BenchmarkParsePage                  142738        142667        -0.05%
BenchmarkReplaceShortcodeTokens     665590        575645        -13.51%
BenchmarkShortcodeLexer             176038        181074        +2.86%

benchmark                           old allocs     new allocs     delta
BenchmarkParsePage                  87             87             +0.00%
BenchmarkReplaceShortcodeTokens     9631           9424           -2.15%
BenchmarkShortcodeLexer             274            274            +0.00%

benchmark                           old bytes     new bytes     delta
BenchmarkParsePage                  141830        141830        +0.00%
BenchmarkReplaceShortcodeTokens     52275         35219         -32.63%
BenchmarkShortcodeLexer             30177         30178         +0.00%
```

Fixes #1148

hugolib/handler_page.go
hugolib/page.go
hugolib/shortcode.go
hugolib/shortcode_test.go
hugolib/site_test.go

index 24c81021fe77bb9d2b58a1a9e95f718185539b67..f925976f718ff888848676af67d9dce46d0a8311 100644 (file)
@@ -61,7 +61,7 @@ func (h markdownHandler) PageConvert(p *Page, t tpl.Template) HandledResult {
        tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.renderContent(helpers.RemoveSummaryDivider(p.rawContent)))
 
        if len(p.contentShortCodes) > 0 {
-               replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, true, p.contentShortCodes,
+               replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, p.contentShortCodes,
                        tmpContent, tmpTableOfContents)
                if err != nil {
                        jww.FATAL.Printf("Fail to replace shortcode tokens in %s:\n%s", p.BaseFileName(), err.Error())
@@ -88,7 +88,7 @@ func (h htmlHandler) PageConvert(p *Page, t tpl.Template) HandledResult {
        var err error
 
        if len(p.contentShortCodes) > 0 {
-               content, err = replaceShortcodeTokens(p.rawContent, shortcodePlaceholderPrefix, true, p.contentShortCodes)
+               content, err = replaceShortcodeTokens(p.rawContent, shortcodePlaceholderPrefix, p.contentShortCodes)
 
                if err != nil {
                        jww.FATAL.Printf("Fail to replace shortcode tokens in %s:\n%s", p.BaseFileName(), err.Error())
@@ -114,7 +114,7 @@ func (h asciidocHandler) PageConvert(p *Page, t tpl.Template) HandledResult {
        tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.renderContent(helpers.RemoveSummaryDivider(p.rawContent)))
 
        if len(p.contentShortCodes) > 0 {
-               replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, true, p.contentShortCodes,
+               replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, p.contentShortCodes,
                        tmpContent, tmpTableOfContents)
                if err != nil {
                        jww.FATAL.Printf("Fail to replace shortcode tokens in %s:\n%s", p.BaseFileName(), err.Error())
@@ -142,7 +142,7 @@ func (h rstHandler) PageConvert(p *Page, t tpl.Template) HandledResult {
        tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.renderContent(helpers.RemoveSummaryDivider(p.rawContent)))
 
        if len(p.contentShortCodes) > 0 {
-               replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, true, p.contentShortCodes,
+               replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, p.contentShortCodes,
                        tmpContent, tmpTableOfContents)
                if err != nil {
                        jww.FATAL.Printf("Fail to replace shortcode tokens in %s:\n%s", p.BaseFileName(), err.Error())
@@ -169,7 +169,7 @@ func (h mmarkHandler) PageConvert(p *Page, t tpl.Template) HandledResult {
        tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.renderContent(helpers.RemoveSummaryDivider(p.rawContent)))
 
        if len(p.contentShortCodes) > 0 {
-               tmpContentWithTokensReplaced, err := replaceShortcodeTokens(tmpContent, shortcodePlaceholderPrefix, true, p.contentShortCodes)
+               tmpContentWithTokensReplaced, err := replaceShortcodeTokens(tmpContent, shortcodePlaceholderPrefix, p.contentShortCodes)
 
                if err != nil {
                        jww.FATAL.Printf("Fail to replace short code tokens in %s:\n%s", p.BaseFileName(), err.Error())
index 1f895cdcc79ac76bb7f85a3c03f0c243ea9674dd..19408eee57b30b0f405ec6a3c9e75b86c4c89e1d 100644 (file)
@@ -183,7 +183,7 @@ func (p *Page) setSummary() {
                renderedHeader := p.renderBytes(header)
                if len(p.contentShortCodes) > 0 {
                        tmpContentWithTokensReplaced, err :=
-                               replaceShortcodeTokens(renderedHeader, shortcodePlaceholderPrefix, true, p.contentShortCodes)
+                               replaceShortcodeTokens(renderedHeader, shortcodePlaceholderPrefix, p.contentShortCodes)
                        if err != nil {
                                jww.FATAL.Printf("Failed to replace short code tokens in Summary for %s:\n%s", p.BaseFileName(), err.Error())
                        } else {
index 2a973f99b654d85360c4704c45f2c271caea707f..69f712eb8ffe47839467c7ad9812e6fba85fad3c 100644 (file)
@@ -14,6 +14,8 @@
 package hugolib
 
 import (
+       "bytes"
+       "errors"
        "fmt"
        "html/template"
        "reflect"
@@ -132,7 +134,7 @@ func HandleShortcodes(stringToParse string, page *Page, t tpl.Template) (string,
        }
 
        if len(tmpShortcodes) > 0 {
-               tmpContentWithTokensReplaced, err := replaceShortcodeTokens([]byte(tmpContent), shortcodePlaceholderPrefix, true, tmpShortcodes)
+               tmpContentWithTokensReplaced, err := replaceShortcodeTokens([]byte(tmpContent), shortcodePlaceholderPrefix, tmpShortcodes)
 
                if err != nil {
                        return "", fmt.Errorf("Fail to replace short code tokens in %s:\n%s", page.BaseFileName(), err.Error())
@@ -436,10 +438,10 @@ Loop:
 }
 
 // replaceShortcodeTokensInsources calls replaceShortcodeTokens for every source given.
-func replaceShortcodeTokensInsources(prefix string, wrapped bool, replacements map[string]string, sources ...[]byte) (b [][]byte, err error) {
+func replaceShortcodeTokensInsources(prefix string, replacements map[string]string, sources ...[]byte) (b [][]byte, err error) {
        result := make([][]byte, len(sources))
        for i, s := range sources {
-               b, err := replaceShortcodeTokens(s, prefix, wrapped, replacements)
+               b, err := replaceShortcodeTokens(s, prefix, replacements)
 
                if err != nil {
                        return nil, err
@@ -450,43 +452,65 @@ func replaceShortcodeTokensInsources(prefix string, wrapped bool, replacements m
 }
 
 // Replace prefixed shortcode tokens (HUGOSHORTCODE-1, HUGOSHORTCODE-2) with the real content.
-// wrapped = true means that the token has been wrapped in {@{@/@}@}
-func replaceShortcodeTokens(source []byte, prefix string, wrapped bool, replacements map[string]string) (b []byte, err error) {
-       var re *regexp.Regexp
+func replaceShortcodeTokens(source []byte, prefix string, replacements map[string]string) ([]byte, error) {
 
-       if wrapped {
-               re, err = regexp.Compile(`\{@\{@` + regexp.QuoteMeta(prefix) + `-\d+@\}@\}`)
-               if err != nil {
-                       return nil, err
-               }
-       } else {
-               re, err = regexp.Compile(regexp.QuoteMeta(prefix) + `-(\d+)`)
-               if err != nil {
-                       return nil, err
-               }
+       if len(replacements) == 0 {
+               return source, nil
        }
 
-       // use panic/recover for reporting if an unknown
-       defer func() {
-               if r := recover(); r != nil {
-                       var ok bool
-                       b = nil
-                       err, ok = r.(error)
-                       if !ok {
-                               err = fmt.Errorf("unexpected panic during replaceShortcodeTokens: %v", r)
+       var buff bytes.Buffer
+
+       sourceLen := len(source)
+       width := 0
+       start := 0
+
+       pre := []byte("{@{@" + prefix)
+       post := []byte("@}@}")
+       pStart := []byte("<p>")
+       pEnd := []byte("</p>")
+
+       k := bytes.Index(source[start:], pre)
+
+       for k != -1 {
+               j := start + k
+               postIdx := bytes.Index(source[j:], post)
+               if postIdx < 0 {
+                       // this should never happen, but let the caller decide to panic or not
+                       return nil, errors.New("illegal state in content; shortcode token missing end delim")
+               }
+
+               end := j + postIdx + 4
+
+               newVal := []byte(replacements[string(source[j:end])])
+
+               // Issue #1148: Check for wrapping p-tags <p>
+               if j >= 3 && bytes.Equal(source[j-3:j], pStart) {
+                       if (k+4) < sourceLen && bytes.Equal(source[end:end+4], pEnd) {
+                               j -= 3
+                               end += 4
                        }
                }
-       }()
-       b = re.ReplaceAllFunc(source, func(m []byte) []byte {
-               key := string(m)
 
-               if val, ok := replacements[key]; ok {
-                       return []byte(val)
+               oldVal := source[j:end]
+               w, err := buff.Write(source[start:j])
+               if err != nil {
+                       return nil, errors.New("buff write failed")
                }
-               panic(fmt.Errorf("unknown shortcode token %q", key))
-       })
+               width += w
+               w, err = buff.Write(newVal)
+               if err != nil {
+                       return nil, errors.New("buff write failed")
+               }
+               width += w
+               start = j + len(oldVal)
 
-       return b, err
+               k = bytes.Index(source[start:], pre)
+       }
+       _, err := buff.Write(source[start:])
+       if err != nil {
+               return nil, errors.New("buff write failed")
+       }
+       return buff.Bytes(), nil
 }
 
 func getShortcodeTemplate(name string, t tpl.Template) *template.Template {
index 6d9c2037ba537042179f3f453e129eb564bdfe19..d7afbc14ebb93801ad7cedc4518ccafabf1c3c15 100644 (file)
@@ -332,16 +332,16 @@ func BenchmarkReplaceShortcodeTokens(b *testing.B) {
                replacements map[string]string
                expect       interface{}
        }{
-               {"Hello HUGOSHORTCODE-1.", map[string]string{"HUGOSHORTCODE-1": "World"}, "Hello World."},
-               {strings.Repeat("A", 100) + " HUGOSHORTCODE-1.", map[string]string{"HUGOSHORTCODE-1": "Hello World"}, strings.Repeat("A", 100) + " Hello World."},
-               {strings.Repeat("A", 500) + " HUGOSHORTCODE-1.", map[string]string{"HUGOSHORTCODE-1": "Hello World"}, strings.Repeat("A", 500) + " Hello World."},
-               {strings.Repeat("ABCD ", 500) + " HUGOSHORTCODE-1.", map[string]string{"HUGOSHORTCODE-1": "Hello World"}, strings.Repeat("ABCD ", 500) + " Hello World."},
-               {strings.Repeat("A", 500) + " HUGOSHORTCODE-1." + strings.Repeat("BC", 500) + " HUGOSHORTCODE-1.", map[string]string{"HUGOSHORTCODE-1": "Hello World"}, strings.Repeat("A", 500) + " Hello World." + strings.Repeat("BC", 500) + " Hello World."},
+               {"Hello {@{@HUGOSHORTCODE-1@}@}.", map[string]string{"{@{@HUGOSHORTCODE-1@}@}": "World"}, "Hello World."},
+               {strings.Repeat("A", 100) + " {@{@HUGOSHORTCODE-1@}@}.", map[string]string{"{@{@HUGOSHORTCODE-1@}@}": "Hello World"}, strings.Repeat("A", 100) + " Hello World."},
+               {strings.Repeat("A", 500) + " {@{@HUGOSHORTCODE-1@}@}.", map[string]string{"{@{@HUGOSHORTCODE-1@}@}": "Hello World"}, strings.Repeat("A", 500) + " Hello World."},
+               {strings.Repeat("ABCD ", 500) + " {@{@HUGOSHORTCODE-1@}@}.", map[string]string{"{@{@HUGOSHORTCODE-1@}@}": "Hello World"}, strings.Repeat("ABCD ", 500) + " Hello World."},
+               {strings.Repeat("A", 500) + " {@{@HUGOSHORTCODE-1@}@}." + strings.Repeat("BC", 500) + " {@{@HUGOSHORTCODE-1@}@}.", map[string]string{"{@{@HUGOSHORTCODE-1@}@}": "Hello World"}, strings.Repeat("A", 500) + " Hello World." + strings.Repeat("BC", 500) + " Hello World."},
        }
        b.ResetTimer()
        for i := 0; i < b.N; i++ {
                for i, this := range data {
-                       results, err := replaceShortcodeTokens([]byte(this.input), "HUGOSHORTCODE", false, this.replacements)
+                       results, err := replaceShortcodeTokens([]byte(this.input), "HUGOSHORTCODE", this.replacements)
 
                        if expectSuccess, ok := this.expect.(bool); ok && !expectSuccess {
                                if err == nil {
@@ -367,21 +367,30 @@ func TestReplaceShortcodeTokens(t *testing.T) {
                input        string
                prefix       string
                replacements map[string]string
-               wrappedInDiv bool
                expect       interface{}
        }{
-               {"Hello PREFIX-1.", "PREFIX", map[string]string{"PREFIX-1": "World"}, false, "Hello World."},
-               {"A {@{@A-1@}@} asdf {@{@A-2@}@}.", "A", map[string]string{"{@{@A-1@}@}": "v1", "{@{@A-2@}@}": "v2"}, true, "A v1 asdf v2."},
-               {"Hello PREFIX2-1. Go PREFIX2-2, Go, Go PREFIX2-3 Go Go!.", "PREFIX2", map[string]string{"PREFIX2-1": "Europe", "PREFIX2-2": "Jonny", "PREFIX2-3": "Johnny"}, false, "Hello Europe. Go Jonny, Go, Go Johnny Go Go!."},
-               {"A PREFIX-2 PREFIX-1.", "PREFIX", map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, false, "A B A."},
-               {"A PREFIX-1 PREFIX-2", "PREFIX", map[string]string{"PREFIX-1": "A"}, false, false},
-               {"A PREFIX-1 but not the second.", "PREFIX", map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, false, "A A but not the second."},
-               {"An PREFIX-1.", "PREFIX", map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, false, "An A."},
-               {"An PREFIX-1 PREFIX-2.", "PREFIX", map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, false, "An A B."},
-               {"A PREFIX-1 PREFIX-2 PREFIX-3 PREFIX-1 PREFIX-3.", "PREFIX", map[string]string{"PREFIX-1": "A", "PREFIX-2": "B", "PREFIX-3": "C"}, false, "A A B C A C."},
-               {"A {@{@PREFIX-1@}@} {@{@PREFIX-2@}@} {@{@PREFIX-3@}@} {@{@PREFIX-1@}@} {@{@PREFIX-3@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B", "{@{@PREFIX-3@}@}": "C"}, true, "A A B C A C."},
+               {"Hello {@{@PREFIX-1@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World"}, "Hello World."},
+               {"A {@{@A-1@}@} asdf {@{@A-2@}@}.", "A", map[string]string{"{@{@A-1@}@}": "v1", "{@{@A-2@}@}": "v2"}, "A v1 asdf v2."},
+               {"Hello {@{@PREFIX2-1@}@}. Go {@{@PREFIX2-2@}@}, Go, Go {@{@PREFIX2-3@}@} Go Go!.", "PREFIX2", map[string]string{"{@{@PREFIX2-1@}@}": "Europe", "{@{@PREFIX2-2@}@}": "Jonny", "{@{@PREFIX2-3@}@}": "Johnny"}, "Hello Europe. Go Jonny, Go, Go Johnny Go Go!."},
+               {"A {@{@PREFIX-2@}@} {@{@PREFIX-1@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B"}, "A B A."},
+               {"A {@{@PREFIX-1@}@} {@{@PREFIX-2", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A"}, false},
+               {"A {@{@PREFIX-1@}@} but not the second.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B"}, "A A but not the second."},
+               {"An {@{@PREFIX-1@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B"}, "An A."},
+               {"An {@{@PREFIX-1@}@} {@{@PREFIX-2@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B"}, "An A B."},
+               {"A {@{@PREFIX-1@}@} {@{@PREFIX-2@}@} {@{@PREFIX-3@}@} {@{@PREFIX-1@}@} {@{@PREFIX-3@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B", "{@{@PREFIX-3@}@}": "C"}, "A A B C A C."},
+               {"A {@{@PREFIX-1@}@} {@{@PREFIX-2@}@} {@{@PREFIX-3@}@} {@{@PREFIX-1@}@} {@{@PREFIX-3@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B", "{@{@PREFIX-3@}@}": "C"}, "A A B C A C."},
+               // Issue #1148 remove p-tags 10 =>
+               {"Hello <p>{@{@PREFIX-1@}@}</p>. END.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World"}, "Hello World. END."},
+               {"Hello <p>{@{@PREFIX-1@}@}</p>. <p>{@{@PREFIX-2@}@}</p> END.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World", "{@{@PREFIX-2@}@}": "THE"}, "Hello World. THE END."},
+               {"Hello <p>{@{@PREFIX-1@}@}. END</p>.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World"}, "Hello <p>World. END</p>."},
+               {"<p>Hello {@{@PREFIX-1@}@}</p>. END.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World"}, "<p>Hello World</p>. END."},
+               {"Hello <p>{@{@PREFIX-1@}@}12", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World"}, "Hello <p>World12"},
+               // Make sure the buffering expands as needed
+               {"Hello {@{@P-1@}@}. {@{@P-1@}@}-{@{@P-1@}@} {@{@P-1@}@} {@{@P-1@}@} {@{@P-1@}@} END", "P", map[string]string{"{@{@P-1@}@}": strings.Repeat("BC", 100)},
+                       fmt.Sprintf("Hello %s. %s-%s %s %s %s END",
+                               strings.Repeat("BC", 100), strings.Repeat("BC", 100), strings.Repeat("BC", 100), strings.Repeat("BC", 100), strings.Repeat("BC", 100), strings.Repeat("BC", 100))},
        } {
-               results, err := replaceShortcodeTokens([]byte(this.input), this.prefix, this.wrappedInDiv, this.replacements)
+               results, err := replaceShortcodeTokens([]byte(this.input), this.prefix, this.replacements)
 
                if b, ok := this.expect.(bool); ok && !b {
                        if err == nil {
@@ -393,7 +402,7 @@ func TestReplaceShortcodeTokens(t *testing.T) {
                                continue
                        }
                        if !reflect.DeepEqual(results, []byte(this.expect.(string))) {
-                               t.Errorf("[%d] replaceShortcodeTokens, got %q but expected %q", i, results, this.expect)
+                               t.Errorf("[%d] replaceShortcodeTokens, got \n%q but expected \n%q", i, results, this.expect)
                        }
                }
 
index 22db1eaf2c84a7b157a0f22388f236ef2b209cb8..5acff8c2a2fc85587c173d9b5b050b77fc151b73 100644 (file)
@@ -320,8 +320,13 @@ func doTestCrossrefs(t *testing.T, relative, uglyUrls bool) {
        sources := []source.ByteSource{
                {filepath.FromSlash("sect/doc1.md"),
                        []byte(fmt.Sprintf(`Ref 2: {{< %s "sect/doc2.md" >}}`, refShortcode))},
+               // Issue #1148: Make sure that no P-tags is added around shortcodes.
                {filepath.FromSlash("sect/doc2.md"),
-                       []byte(fmt.Sprintf(`Ref 1: {{< %s "sect/doc1.md" >}}`, refShortcode))},
+                       []byte(fmt.Sprintf(`**Ref 1:** 
+                       
+{{< %s "sect/doc1.md" >}}
+                       
+THE END.`, refShortcode))},
        }
 
        s := &Site{
@@ -341,7 +346,7 @@ func doTestCrossrefs(t *testing.T, relative, uglyUrls bool) {
                expected string
        }{
                {filepath.FromSlash(fmt.Sprintf("sect/doc1%s", expectedPathSuffix)), fmt.Sprintf("<p>Ref 2: %s/sect/doc2%s</p>\n", expectedBase, expectedUrlSuffix)},
-               {filepath.FromSlash(fmt.Sprintf("sect/doc2%s", expectedPathSuffix)), fmt.Sprintf("<p>Ref 1: %s/sect/doc1%s</p>\n", expectedBase, expectedUrlSuffix)},
+               {filepath.FromSlash(fmt.Sprintf("sect/doc2%s", expectedPathSuffix)), fmt.Sprintf("<p><strong>Ref 1:</strong></p>\n\n%s/sect/doc1%s\n\n<p>THE END.</p>\n", expectedBase, expectedUrlSuffix)},
        }
 
        for _, test := range tests {