markup/goldmark: Add an optional Blackfriday auto ID strategy
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Sun, 5 Jan 2020 10:52:00 +0000 (11:52 +0100)
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Sun, 5 Jan 2020 10:56:05 +0000 (11:56 +0100)
Fixes #6707

markup/blackfriday/convert.go
markup/blackfriday/convert_test.go
markup/goldmark/autoid.go
markup/goldmark/autoid_test.go
markup/goldmark/convert.go
markup/goldmark/convert_test.go
markup/goldmark/goldmark_config/config.go

index bbbc2b377d328d191f7b05e1aa1cdfd3f935c251..d844c5554a41416c9d797e492305008c67ce39c8 100644 (file)
@@ -15,6 +15,8 @@
 package blackfriday
 
 import (
+       "unicode"
+
        "github.com/gohugoio/hugo/identity"
        "github.com/gohugoio/hugo/markup/blackfriday/blackfriday_config"
        "github.com/gohugoio/hugo/markup/converter"
@@ -61,7 +63,27 @@ type blackfridayConverter struct {
 }
 
 func (c *blackfridayConverter) SanitizeAnchorName(s string) string {
-       return blackfriday.SanitizedAnchorName(s)
+       return SanitizedAnchorName(s)
+}
+
+// SanitizedAnchorName is how Blackfriday sanitizes anchor names.
+// Implementation borrowed from https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L1464
+func SanitizedAnchorName(text string) string {
+       var anchorName []rune
+       futureDash := false
+       for _, r := range text {
+               switch {
+               case unicode.IsLetter(r) || unicode.IsNumber(r):
+                       if futureDash && len(anchorName) > 0 {
+                               anchorName = append(anchorName, '-')
+                       }
+                       futureDash = false
+                       anchorName = append(anchorName, unicode.ToLower(r))
+               default:
+                       futureDash = true
+               }
+       }
+       return string(anchorName)
 }
 
 func (c *blackfridayConverter) AnchorSuffix() string {
index b4d66dec66be23de1c6efe2ae8488f1b934a7687..d2d8d927e94940988de4e9176e9728a2930e248f 100644 (file)
@@ -179,3 +179,45 @@ This is a footnote.[^1] And then some.
        c.Assert(s, qt.Contains, "This is a footnote.<sup class=\"footnote-ref\" id=\"fnref:testid:1\"><a href=\"#fn:testid:1\">1</a></sup>")
        c.Assert(s, qt.Contains, "<a class=\"footnote-return\" href=\"#fnref:testid:1\"><sup>[return]</sup></a>")
 }
+
+// Tests borrowed from https://github.com/russross/blackfriday/blob/a925a152c144ea7de0f451eaf2f7db9e52fa005a/block_test.go#L1817
+func TestSanitizedAnchorName(t *testing.T) {
+       tests := []struct {
+               text string
+               want string
+       }{
+               {
+                       text: "This is a header",
+                       want: "this-is-a-header",
+               },
+               {
+                       text: "This is also          a header",
+                       want: "this-is-also-a-header",
+               },
+               {
+                       text: "main.go",
+                       want: "main-go",
+               },
+               {
+                       text: "Article 123",
+                       want: "article-123",
+               },
+               {
+                       text: "<- Let's try this, shall we?",
+                       want: "let-s-try-this-shall-we",
+               },
+               {
+                       text: "        ",
+                       want: "",
+               },
+               {
+                       text: "Hello, 世界",
+                       want: "hello-世界",
+               },
+       }
+       for _, test := range tests {
+               if got := SanitizedAnchorName(test.text); got != test.want {
+                       t.Errorf("SanitizedAnchorName(%q):\ngot %q\nwant %q", test.text, got, test.want)
+               }
+       }
+}
index aaf1852d1616b71481d45eee264f5a6863d1093e..950d4a5778ee89239d9bd48e62b2253a70553474 100644 (file)
@@ -19,6 +19,8 @@ import (
        "unicode"
        "unicode/utf8"
 
+       "github.com/gohugoio/hugo/markup/blackfriday"
+
        "github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
 
        "github.com/gohugoio/hugo/common/text"
@@ -30,34 +32,41 @@ import (
        bp "github.com/gohugoio/hugo/bufferpool"
 )
 
-func sanitizeAnchorNameString(s string, asciiOnly bool) string {
-       return string(sanitizeAnchorName([]byte(s), asciiOnly))
+func sanitizeAnchorNameString(s string, idType string) string {
+       return string(sanitizeAnchorName([]byte(s), idType))
 }
 
-func sanitizeAnchorName(b []byte, asciiOnly bool) []byte {
-       return sanitizeAnchorNameWithHook(b, asciiOnly, nil)
+func sanitizeAnchorName(b []byte, idType string) []byte {
+       return sanitizeAnchorNameWithHook(b, idType, nil)
 }
 
-func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte {
+func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte {
        buf := bp.GetBuffer()
 
-       if asciiOnly {
-               // Normalize it to preserve accents if possible.
-               b = text.RemoveAccents(b)
-       }
+       if idType == goldmark_config.AutoHeadingIDTypeBlackfriday {
+               // TODO(bep) make it more efficient.
+               buf.WriteString(blackfriday.SanitizedAnchorName(string(b)))
+       } else {
+               asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii
 
-       for len(b) > 0 {
-               r, size := utf8.DecodeRune(b)
-               switch {
-               case asciiOnly && size != 1:
-               case r == '-' || isSpace(r):
-                       buf.WriteRune('-')
-               case isAlphaNumeric(r):
-                       buf.WriteRune(unicode.ToLower(r))
-               default:
+               if asciiOnly {
+                       // Normalize it to preserve accents if possible.
+                       b = text.RemoveAccents(b)
                }
 
-               b = b[size:]
+               for len(b) > 0 {
+                       r, size := utf8.DecodeRune(b)
+                       switch {
+                       case asciiOnly && size != 1:
+                       case r == '-' || isSpace(r):
+                               buf.WriteRune('-')
+                       case isAlphaNumeric(r):
+                               buf.WriteRune(unicode.ToLower(r))
+                       default:
+                       }
+
+                       b = b[size:]
+               }
        }
 
        if hook != nil {
@@ -83,19 +92,19 @@ func isSpace(r rune) bool {
 var _ parser.IDs = (*idFactory)(nil)
 
 type idFactory struct {
-       asciiOnly bool
-       vals      map[string]struct{}
+       idType string
+       vals   map[string]struct{}
 }
 
 func newIDFactory(idType string) *idFactory {
        return &idFactory{
-               vals:      make(map[string]struct{}),
-               asciiOnly: idType == goldmark_config.AutoHeadingIDTypeGitHubAscii,
+               vals:   make(map[string]struct{}),
+               idType: idType,
        }
 }
 
 func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
-       return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) {
+       return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) {
                if buf.Len() == 0 {
                        if kind == ast.KindHeading {
                                buf.WriteString("heading")
index 915c6a03cf9fcec3feb708e81d65fcb310da651e..1257b348250ed66b2b719b0a0aa9b528ae1628fa 100644 (file)
@@ -17,6 +17,8 @@ import (
        "strings"
        "testing"
 
+       "github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
+
        qt "github.com/frankban/quicktest"
 )
 
@@ -69,9 +71,9 @@ under_score
                expect := expectlines[i]
                c.Run(input, func(c *qt.C) {
                        b := []byte(input)
-                       got := string(sanitizeAnchorName(b, false))
+                       got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub))
                        c.Assert(got, qt.Equals, expect)
-                       c.Assert(sanitizeAnchorNameString(input, false), qt.Equals, expect)
+                       c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect)
                        c.Assert(string(b), qt.Equals, input)
                })
        }
@@ -80,16 +82,21 @@ under_score
 func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
        c := qt.New(t)
 
-       c.Assert(sanitizeAnchorNameString("god is神真美好 good", true), qt.Equals, "god-is-good")
-       c.Assert(sanitizeAnchorNameString("Resumé", true), qt.Equals, "resume")
+       c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good")
+       c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume")
+
+}
 
+func TestSanitizeAnchorNameBlackfriday(t *testing.T) {
+       c := qt.New(t)
+       c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we")
 }
 
 func BenchmarkSanitizeAnchorName(b *testing.B) {
        input := []byte("God is good: 神真美好")
        b.ResetTimer()
        for i := 0; i < b.N; i++ {
-               result := sanitizeAnchorName(input, false)
+               result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub)
                if len(result) != 24 {
                        b.Fatalf("got %d", len(result))
 
@@ -101,7 +108,7 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
        input := []byte("God is good: 神真美好")
        b.ResetTimer()
        for i := 0; i < b.N; i++ {
-               result := sanitizeAnchorName(input, true)
+               result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii)
                if len(result) != 12 {
                        b.Fatalf("got %d", len(result))
 
@@ -109,11 +116,23 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
        }
 }
 
+func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) {
+       input := []byte("God is good: 神真美好")
+       b.ResetTimer()
+       for i := 0; i < b.N; i++ {
+               result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday)
+               if len(result) != 24 {
+                       b.Fatalf("got %d", len(result))
+
+               }
+       }
+}
+
 func BenchmarkSanitizeAnchorNameString(b *testing.B) {
        input := "God is good: 神真美好"
        b.ResetTimer()
        for i := 0; i < b.N; i++ {
-               result := sanitizeAnchorNameString(input, false)
+               result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub)
                if len(result) != 24 {
                        b.Fatalf("got %d", len(result))
                }
index c6f958366180b0986bdea398044d5d5e74d330ae..d4c3533537ef5c20e5e8b26def15c2bd168c2c19 100644 (file)
@@ -29,7 +29,6 @@ import (
 
        "github.com/gohugoio/hugo/hugofs"
        "github.com/gohugoio/hugo/markup/converter"
-       "github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
        "github.com/gohugoio/hugo/markup/highlight"
        "github.com/gohugoio/hugo/markup/tableofcontents"
        "github.com/yuin/goldmark"
@@ -57,7 +56,7 @@ func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) {
                        cfg: cfg,
                        md:  md,
                        sanitizeAnchorName: func(s string) string {
-                               return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType == goldmark_config.AutoHeadingIDTypeGitHub)
+                               return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType)
                        },
                }, nil
        }), nil
index 3c173fb0a35f8138ae2e272cb1534698880b1373..31799b2a53c58f0960b3d87a251b4d450d1e73d2 100644 (file)
@@ -178,6 +178,21 @@ func TestConvertAutoIDAsciiOnly(t *testing.T) {
        c.Assert(got, qt.Contains, "<h2 id=\"god-is-good-\">")
 }
 
+func TestConvertAutoIDBlackfriday(t *testing.T) {
+       c := qt.New(t)
+
+       content := `
+## Let's try this, shall we?
+
+`
+       mconf := markup_config.Default
+       mconf.Goldmark.Parser.AutoHeadingIDType = goldmark_config.AutoHeadingIDTypeBlackfriday
+       b := convert(c, mconf, content)
+       got := string(b.Bytes())
+
+       c.Assert(got, qt.Contains, "<h2 id=\"let-s-try-this-shall-we\">")
+}
+
 func TestCodeFence(t *testing.T) {
        c := qt.New(t)
 
index 47399b52c54b2bc0301f51d269e66d3bc77b67d7..af33e03dc4bd67d21a88d2e99aed24247ac24590 100644 (file)
@@ -17,6 +17,7 @@ package goldmark_config
 const (
        AutoHeadingIDTypeGitHub      = "github"
        AutoHeadingIDTypeGitHubAscii = "github-ascii"
+       AutoHeadingIDTypeBlackfriday = "blackfriday"
 )
 
 // DefaultConfig holds the default Goldmark configuration.