hugolib: Fix broken manual summary handling

author Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>

Tue, 30 Oct 2018 19:24:34 +0000 (20:24 +0100)

committer Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>

Wed, 31 Oct 2018 22:14:37 +0000 (23:14 +0100)
author Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Tue, 30 Oct 2018 19:24:34 +0000 (20:24 +0100)
committer Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Wed, 31 Oct 2018 22:14:37 +0000 (23:14 +0100)
diff --git a/hugolib/page.go b/hugolib/page.go

index 578aed9edc62c27cdb7c72a4155bdbd7760ff26b..47083e9ef3ee6b4ca8cc159cc08a59f2c36425f6 100644 (file)
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -730,56 +730,48 @@ func splitUserDefinedSummaryAndContent(markup string, c []byte) (sc *summaryCont
                 }
         }()
  
-       c = bytes.TrimSpace(c)
-       startDivider := bytes.Index(c, internalSummaryDivider)
+       startDivider := bytes.Index(c, internalSummaryDividerBaseBytes)
  
         if startDivider == -1 {
                 return
         }
  
-       endDivider := startDivider + len(internalSummaryDivider)
-       endSummary := startDivider
-
-       var (
-               startMarkup []byte
-               endMarkup   []byte
-               addDiv      bool
-       )
-
+       startTag := "p"
         switch markup {
-       default:
-               startMarkup = []byte("<p>")
-               endMarkup = []byte("</p>")
         case "asciidoc":
-               startMarkup = []byte("<div class=\"paragraph\">")
-               endMarkup = []byte("</div>")
-       case "rst":
-               startMarkup = []byte("<p>")
-               endMarkup = []byte("</p>")
-               addDiv = true
+               startTag = "div"
+
+       }
+
+       // Walk back and forward to the surrounding tags.
+       start := bytes.LastIndex(c[:startDivider], []byte("<"+startTag))
+       end := bytes.Index(c[startDivider:], []byte("</"+startTag))
+
+       if start == -1 {
+               start = startDivider
+       } else {
+               start = startDivider - (startDivider - start)
         }
  
-       // Find the closest end/start markup string to the divider
-       fromStart := -1
-       fromIdx := bytes.LastIndex(c[:startDivider], startMarkup)
-       if fromIdx != -1 {
-               fromStart = startDivider - fromIdx - len(startMarkup)
+       if end == -1 {
+               end = startDivider + len(internalSummaryDividerBase)
+       } else {
+               end = startDivider + end + len(startTag) + 3
         }
-       fromEnd := bytes.Index(c[endDivider:], endMarkup)
  
-       if fromEnd != -1 && fromEnd <= fromStart {
-               endSummary = startDivider + fromEnd + len(endMarkup)
-       } else if fromStart != -1 && fromEnd != -1 {
-               endSummary = startDivider - fromStart - len(startMarkup)
+       var addDiv bool
+
+       switch markup {
+       case "rst":
+               addDiv = true
         }
  
-       withoutDivider := bytes.TrimSpace(append(c[:startDivider], c[endDivider:]...))
-       var (
-               summary []byte
-       )
+       withoutDivider := append(c[:start], bytes.Trim(c[end:], "\n")...)
+
+       var summary []byte
  
         if len(withoutDivider) > 0 {
-               summary = bytes.TrimSpace(withoutDivider[:endSummary])
+               summary = bytes.TrimSpace(withoutDivider[:start])
         }
  
         if addDiv {
@@ -793,7 +785,7 @@ func splitUserDefinedSummaryAndContent(markup string, c []byte) (sc *summaryCont
  
         sc = &summaryContent{
                 summary: summary,
-               content: withoutDivider,
+               content: bytes.TrimSpace(withoutDivider),
         }
  
         return
diff --git a/hugolib/page_content.go b/hugolib/page_content.go

index dab46411e0eb70d439b86e39d9525d477bc72e58..5a8258279fb0d37da57bad276263f8d6dfc0a942 100644 (file)
--- a/hugolib/page_content.go
+++ b/hugolib/page_content.go
@@ -27,7 +27,9 @@ import (
  )
  
  var (
-       internalSummaryDivider = []byte("HUGOMORE42")
+       internalSummaryDividerBase      = "HUGOMORE42"
+       internalSummaryDividerBaseBytes = []byte(internalSummaryDividerBase)
+       internalSummaryDividerPre       = []byte("\n\n" + internalSummaryDividerBase + "\n\n")
  )
  
  // The content related items on a Page.
@@ -111,7 +113,7 @@ Loop:
                         }
  
                 case it.Type == pageparser.TypeLeadSummaryDivider:
-                       result.Write(internalSummaryDivider)
+                       result.Write(internalSummaryDividerPre)
                         p.source.hasSummaryDivider = true
                         // Need to determine if the page is truncated.
                         f := func(item pageparser.Item) bool {
diff --git a/hugolib/page_test.go b/hugolib/page_test.go

index 4499a97e07732c9d2e1db3694ec5a929868ef558..18be64ceeee158f48dd70f0a77fa829437924ac7 100644 (file)
--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@@ -483,6 +483,7 @@ func checkPageContent(t *testing.T, page *Page, content string, msg ...interface
         a := normalizeContent(content)
         b := normalizeContent(string(page.content()))
         if a != b {
+               t.Log(trace())
                 t.Fatalf("Page content is:\n%q\nExpected:\n%q (%q)", b, a, msg)
         }
  }
@@ -553,6 +554,7 @@ func normalizeExpected(ext, str string) string {
                         }
                         expected += fmt.Sprintf("<div class=\"paragraph\">\n%s</p></div>\n", para)
                 }
+
                 return expected
         case "rst":
                 return fmt.Sprintf("<div class=\"document\">\n\n\n%s</div>", str)
@@ -569,7 +571,6 @@ func testAllMarkdownEnginesForPages(t *testing.T,
                 {"md", func() bool { return true }},
                 {"mmark", func() bool { return true }},
                 {"ad", func() bool { return helpers.HasAsciidoc() }},
-               // TODO(bep) figure a way to include this without too much work.{"html", func() bool { return true }},
                 {"rst", func() bool { return helpers.HasRst() }},
         }
  
@@ -642,57 +643,6 @@ func TestCreateNewPage(t *testing.T) {
         testAllMarkdownEnginesForPages(t, assertFunc, settings, simplePage)
  }
  
-func TestSplitSummaryAndContent(t *testing.T) {
-       t.Parallel()
-       for i, this := range []struct {
-               markup          string
-               content         string
-               expectedSummary string
-               expectedContent string
-       }{
-               {"markdown", `<p>Summary Same LineHUGOMORE42</p>
-
-<p>Some more text</p>`, "<p>Summary Same Line</p>", "<p>Summary Same Line</p>\n\n<p>Some more text</p>"},
-               {"asciidoc", `<div class="paragraph"><p>sn</p></div><div class="paragraph"><p>HUGOMORE42Some more text</p></div>`,
-                       "<div class=\"paragraph\"><p>sn</p></div>",
-                       "<div class=\"paragraph\"><p>sn</p></div><div class=\"paragraph\"><p>Some more text</p></div>"},
-               {"rst",
-                       "<div class=\"document\"><p>Summary Next Line</p><p>HUGOMORE42Some more text</p></div>",
-                       "<div class=\"document\"><p>Summary Next Line</p></div>",
-                       "<div class=\"document\"><p>Summary Next Line</p><p>Some more text</p></div>"},
-               {"markdown", "<p>a</p><p>b</p><p>HUGOMORE42c</p>", "<p>a</p><p>b</p>", "<p>a</p><p>b</p><p>c</p>"},
-               {"markdown", "<p>a</p><p>b</p><p>cHUGOMORE42</p>", "<p>a</p><p>b</p><p>c</p>", "<p>a</p><p>b</p><p>c</p>"},
-               {"markdown", "<p>a</p><p>bHUGOMORE42</p><p>c</p>", "<p>a</p><p>b</p>", "<p>a</p><p>b</p><p>c</p>"},
-               {"markdown", "<p>aHUGOMORE42</p><p>b</p><p>c</p>", "<p>a</p>", "<p>a</p><p>b</p><p>c</p>"},
-               {"markdown", "  HUGOMORE42 ", "", ""},
-               {"markdown", "HUGOMORE42", "", ""},
-               {"markdown", "<p>HUGOMORE42", "<p>", "<p>"},
-               {"markdown", "HUGOMORE42<p>", "", "<p>"},
-               {"markdown", "\n\n<p>HUGOMORE42</p>\n", "<p></p>", "<p></p>"},
-               // Issue #2586
-               // Note: Hugo will not split mid-sentence but will look for the closest
-               // paragraph end marker. This may be a change from Hugo 0.16, but it makes sense.
-               {"markdown", `<p>this is an example HUGOMORE42of the issue.</p>`,
-                       "<p>this is an example of the issue.</p>",
-                       "<p>this is an example of the issue.</p>"},
-               // Issue: #2538
-               {"markdown", fmt.Sprintf(` <p class="lead">%s</p>HUGOMORE42<p>%s</p>
-`,
-                       strings.Repeat("A", 10), strings.Repeat("B", 31)),
-                       fmt.Sprintf(`<p class="lead">%s</p>`, strings.Repeat("A", 10)),
-                       fmt.Sprintf(`<p class="lead">%s</p><p>%s</p>`, strings.Repeat("A", 10), strings.Repeat("B", 31)),
-               },
-       } {
-
-               sc, err := splitUserDefinedSummaryAndContent(this.markup, []byte(this.content))
-
-               require.NoError(t, err)
-               require.NotNil(t, sc, fmt.Sprintf("[%d] Nil %s", i, this.markup))
-               require.Equal(t, this.expectedSummary, string(sc.summary), fmt.Sprintf("[%d] Summary markup %s", i, this.markup))
-               require.Equal(t, this.expectedContent, string(sc.content), fmt.Sprintf("[%d] Content markup %s", i, this.markup))
-       }
-}
-
  func TestPageWithDelimiter(t *testing.T) {
         t.Parallel()
         assertFunc := func(t *testing.T, ext string, pages Pages) {
@@ -720,11 +670,14 @@ func TestPageWithDelimiterForMarkdownThatCrossesBorder(t *testing.T) {
  
         p := s.RegularPages[0]
  
-       if p.Summary() != template.HTML("<p>The <a href=\"http://gohugo.io/\">best static site generator</a>.<sup class=\"footnote-ref\" id=\"fnref:1\"><a href=\"#fn:1\">1</a></sup>\n</p>") {
+       if p.Summary() != template.HTML(
+               "<p>The <a href=\"http://gohugo.io/\">best static site generator</a>.<sup class=\"footnote-ref\" id=\"fnref:1\"><a href=\"#fn:1\">1</a></sup></p>") {
                 t.Fatalf("Got summary:\n%q", p.Summary())
         }
  
-       if p.content() != template.HTML("<p>The <a href=\"http://gohugo.io/\">best static site generator</a>.<sup class=\"footnote-ref\" id=\"fnref:1\"><a href=\"#fn:1\">1</a></sup>\n</p>\n<div class=\"footnotes\">\n\n<hr />\n\n<ol>\n<li id=\"fn:1\">Many people say so.\n <a class=\"footnote-return\" href=\"#fnref:1\"><sup>[return]</sup></a></li>\n</ol>\n</div>") {
+       if p.content() != template.HTML(
+               "<p>The <a href=\"http://gohugo.io/\">best static site generator</a>.<sup class=\"footnote-ref\" id=\"fnref:1\"><a href=\"#fn:1\">1</a></sup></p>\n\n<div class=\"footnotes\">\n\n<hr />\n\n<ol>\n<li id=\"fn:1\">Many people say so.\n <a class=\"footnote-return\" href=\"#fnref:1\"><sup>[return]</sup></a></li>\n</ol>\n</div>") {
+
                 t.Fatalf("Got content:\n%q", p.content())
         }
  }
@@ -1544,6 +1497,70 @@ func TestChompBOM(t *testing.T) {
         checkPageTitle(t, p, "Simple")
  }
  
+// https://github.com/gohugoio/hugo/issues/5381
+func TestPageManualSummary(t *testing.T) {
+       b := newTestSitesBuilder(t)
+       b.WithSimpleConfigFile()
+
+       b.WithContent("page-md-shortcode.md", `---
+title: "Hugo"
+---
+This is a {{< sc >}}.
+<!--more--> 
+Content.
+`)
+
+       b.WithContent("page-md-shortcode-same-line.md", `---
+title: "Hugo"
+---
+This is a {{< sc >}}.<!--more-->Same line.
+`)
+
+       b.WithContent("page-org-shortcode.org", `#+TITLE: T1
+#+AUTHOR: A1
+#+DESCRIPTION: D1
+This is a {{< sc >}}.
+# more
+Content.       
+`)
+
+       b.WithContent("page-org-variant1.org", `#+TITLE: T1
+Summary.
+
+# more
+
+Content.       
+`)
+
+       b.WithTemplatesAdded("layouts/shortcodes/sc.html", "a shortcode")
+       b.WithTemplatesAdded("layouts/_default/single.html", `
+SUMMARY:{{ .Summary }}:END
+--------------------------
+CONTENT:{{ .Content }}
+`)
+
+       b.CreateSites().Build(BuildCfg{})
+
+       b.AssertFileContent("public/page-md-shortcode/index.html",
+               "SUMMARY:<p>This is a a shortcode.</p>:END",
+               "CONTENT:<p>This is a a shortcode.</p>\n\n<p>Content.</p>\n",
+       )
+
+       b.AssertFileContent("public/page-md-shortcode-same-line/index.html",
+               "SUMMARY:<p>This is a a shortcode.</p>:END",
+               "CONTENT:<p>This is a a shortcode.</p>\n\n<p>Same line.</p>\n",
+       )
+
+       b.AssertFileContent("public/page-org-shortcode/index.html",
+               "SUMMARY:<p>This is a a shortcode.</p>:END",
+               "CONTENT:<p>This is a a shortcode.</p>\n\n<p>Content.\t</p>\n",
+       )
+       b.AssertFileContent("public/page-org-variant1/index.html",
+               "SUMMARY:<p>Summary.</p>:END",
+               "CONTENT:<p>Summary.</p>\n\n<p>Content.\t</p>\n",
+       )
+}
+
  // TODO(bep) this may be useful for other tests.
  func compareObjects(a interface{}, b interface{}) bool {
         aStr := strings.Split(fmt.Sprintf("%v", a), "")
@@ -1705,6 +1722,8 @@ Len PlainWords: {{ len .PlainWords }}
  Truncated: {{ .Truncated }}
  Len Summary: {{ len .Summary }}
  Len Content: {{ len .Content }}
+
+SUMMARY:{{ .Summary }}:{{ len .Summary }}:END
  `}
  
         b := newTestSitesBuilder(t)
@@ -1776,10 +1795,10 @@ Summary: In Chinese, 好 means good.
  
         b.AssertFileContent("public/p1/index.html", "WordCount: 510\nFuzzyWordCount: 600\nReadingTime: 3\nLen Plain: 2550\nLen PlainWords: 510\nTruncated: false\nLen Summary: 2549\nLen Content: 2557")
  
-       b.AssertFileContent("public/p2/index.html", "WordCount: 314\nFuzzyWordCount: 400\nReadingTime: 2\nLen Plain: 1570\nLen PlainWords: 314\nTruncated: true\nLen Summary: 34\nLen Content: 1592")
+       b.AssertFileContent("public/p2/index.html", "WordCount: 314\nFuzzyWordCount: 400\nReadingTime: 2\nLen Plain: 1569\nLen PlainWords: 314\nTruncated: true\nLen Summary: 25\nLen Content: 1583")
  
-       b.AssertFileContent("public/p3/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 639\nLen PlainWords: 7\nTruncated: true\nLen Summary: 52\nLen Content: 661")
-       b.AssertFileContent("public/p4/index.html", "WordCount: 7\nFuzzyWordCount: 100\nReadingTime: 1\nLen Plain: 639\nLen PlainWords: 7\nTruncated: true\nLen Summary: 52\nLen Content: 661")
+       b.AssertFileContent("public/p3/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 43\nLen Content: 652")
+       b.AssertFileContent("public/p4/index.html", "WordCount: 7\nFuzzyWordCount: 100\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 43\nLen Content: 652")
         b.AssertFileContent("public/p5/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 229\nLen Content: 653")
         b.AssertFileContent("public/p6/index.html", "WordCount: 7\nFuzzyWordCount: 100\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: false\nLen Summary: 637\nLen Content: 653")
  
diff --git a/hugolib/testhelpers_test.go b/hugolib/testhelpers_test.go

index 469cfeb4a8e1ca70fc92dd1dd30c964223e0105f..f80f40c48345c40f170fe3cc7116870abb8379ac 100644 (file)
--- a/hugolib/testhelpers_test.go
+++ b/hugolib/testhelpers_test.go
@@ -4,7 +4,9 @@ import (
         "io/ioutil"
         "path/filepath"
         "runtime"
+       "strconv"
         "testing"
+       "unicode/utf8"
  
         "bytes"
         "fmt"
@@ -698,6 +700,26 @@ func dumpPages(pages ...*Page) {
         }
  }
  
+func printStringIndexes(s string) {
+       lines := strings.Split(s, "\n")
+       i := 0
+
+       for _, line := range lines {
+
+               for _, r := range line {
+                       fmt.Printf("%-3s", strconv.Itoa(i))
+                       i += utf8.RuneLen(r)
+               }
+               i++
+               fmt.Println()
+               for _, r := range line {
+                       fmt.Printf("%-3s", string(r))
+               }
+               fmt.Println()
+
+       }
+
+}
  func isCI() bool {
         return os.Getenv("CI") != ""
  }
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go

index ddf109b3de07570abf2f1759b47ab86a0fdf0eab..565be2994c3c8d27bb842411d9e452c35055dd60 100644 (file)
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -194,6 +194,16 @@ func (l *pageLexer) consumeCRLF() bool {
         return consumed
  }
  
+func (l *pageLexer) consumeSpace() {
+       for {
+               r := l.next()
+               if r == eof || !unicode.IsSpace(r) {
+                       l.backup()
+                       return
+               }
+       }
+}
+
  func lexMainSection(l *pageLexer) stateFunc {
         // Fast forward as far as possible.
         var l1, l2 int
@@ -234,6 +244,8 @@ func lexMainSection(l *pageLexer) stateFunc {
                                 }
                                 l.summaryDividerChecked = true
                                 l.pos += len(l.summaryDivider)
+                               // This makes it a little easier to reason about later.
+                               l.consumeSpace()
                                 l.emit(TypeLeadSummaryDivider)
                         }
                 }
diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go

index ba4a2c84b761904ce16eff1d194a733f28eca335..60c431c10c7b28454018f19615d48e3ead8891ed 100644 (file)
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@@ -37,7 +37,7 @@ var (
         tstFrontMatterYAMLCRLF = nti(TypeFrontMatterYAML, "foo: \"bar\"\r\n")
         tstFrontMatterJSON     = nti(TypeFrontMatterJSON, tstJSON+"\r\n")
         tstSomeText            = nti(tText, "\nSome text.\n")
-       tstSummaryDivider      = nti(TypeLeadSummaryDivider, "<!--more-->")
+       tstSummaryDivider      = nti(TypeLeadSummaryDivider, "<!--more-->\n")
         tstHtmlStart           = nti(TypeHTMLStart, "<")
  
         tstORG = `
@@ -65,8 +65,9 @@ var frontMatterTests = []lexerTest{
         {"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}},
         {"JSON front matter", tstJSON + "\r\n\nSome text.\n", []Item{tstFrontMatterJSON, tstSomeText, tstEOF}},
         {"ORG front matter", tstORG + "\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstEOF}},
-       {"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, nti(TypeLeadSummaryDivider, "# more"), tstSomeText, tstEOF}},
-       {"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, tstSomeText, tstEOF}},
+       {"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, nti(TypeLeadSummaryDivider, "# more\n"), nti(tText, "Some text.\n"), tstEOF}},
+       {"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, nti(tText, "Some text.\n"), tstEOF}},
+       {"Summary divider same line", "+++\nfoo = \"bar\"\n+++\n\nSome text.<!--more-->Some text.\n", []Item{tstFrontMatterTOML, nti(tText, "\nSome text."), nti(TypeLeadSummaryDivider, "<!--more-->"), nti(tText, "Some text.\n"), tstEOF}},
  }
  
  func TestFrontMatter(t *testing.T) {
author	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
	Tue, 30 Oct 2018 19:24:34 +0000 (20:24 +0100)
committer	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
	Wed, 31 Oct 2018 22:14:37 +0000 (23:14 +0100)
hugolib/page.go		patch \| blob \| history
hugolib/page_content.go		patch \| blob \| history
hugolib/page_test.go		patch \| blob \| history
hugolib/testhelpers_test.go		patch \| blob \| history
parser/pageparser/pagelexer.go		patch \| blob \| history
parser/pageparser/pageparser_intro_test.go		patch \| blob \| history