publisher: Fix tag collector for nested table elements
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Mon, 25 May 2020 19:05:59 +0000 (21:05 +0200)
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Wed, 27 May 2020 09:10:48 +0000 (11:10 +0200)
Fixes #7318

hugolib/site_test.go
publisher/htmlElementsCollector.go
publisher/htmlElementsCollector_test.go

index 3116113c0fe241349b5382c1de03984f386f656e..54c2fbe59d4a5b9a9b146cc36fd1289144c0e389 100644 (file)
@@ -1123,7 +1123,7 @@ ABC.
                els := stats.HTMLElements
 
                b.Assert(els.Classes, qt.HasLen, 3606) // (4 * 900) + 4 +2
-               b.Assert(els.Tags, qt.HasLen, 8)
+               b.Assert(els.Tags, qt.HasLen, 9)
                b.Assert(els.IDs, qt.HasLen, 1)
        }
 
index e033f52d71698974f1e5248510f07df52a985105..7bb2ebf15328f081d7e2d69dd01a02e8fa31becd 100644 (file)
@@ -116,7 +116,13 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
 
                                        w.buff.Reset()
 
+                                       if strings.HasPrefix(s, "</") {
+                                               continue
+                                       }
+
+                                       s, tagName := w.insertStandinHTMLElement(s)
                                        el := parseHTMLElement(s)
+                                       el.Tag = tagName
 
                                        w.collector.mu.Lock()
                                        w.collector.elementSet[s] = true
@@ -132,6 +138,20 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
        return
 }
 
+// The net/html parser does not handle single table elemnts as input, e.g. tbody.
+// We only care about the element/class/ids, so just store away the original tag name
+// and pretend it's a <div>.
+func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, string) {
+       tag := el[1:]
+       spacei := strings.Index(tag, " ")
+       if spacei != -1 {
+               tag = tag[:spacei]
+       }
+       newv := strings.Replace(el, tag, "div", 1)
+       return newv, strings.ToLower(tag)
+
+}
+
 func (c *cssClassCollectorWriter) endCollecting(drop bool) {
        c.isCollecting = false
        c.inQuote = false
index 72abd94f07d1d35c14a6486fffaff15cdcf39cbe..e255a735478b94a3c2035fb255b07a61ac5a806b 100644 (file)
@@ -51,6 +51,12 @@ func TestClassCollector(t *testing.T) {
                {"duplicates", `<div class="b a b"></div>`, f("div", "a b", "")},
                {"single quote", `<body class='b a'></body>`, f("body", "a b", "")},
                {"no quote", `<body class=b id=myelement></body>`, f("body", "b", "myelement")},
+               {"thead", `
+               https://github.com/gohugoio/hugo/issues/7318
+<table class="cl1">
+    <thead class="cl2"><tr class="cl3"><td class="cl4"></td></tr></thead>
+    <tbody class="cl5"><tr class="cl6"><td class="cl7"></td></tr></tbody>
+</table>`, f("table tbody td thead tr", "cl1 cl2 cl3 cl4 cl5 cl6 cl7", "")},
                // https://github.com/gohugoio/hugo/issues/7161
                {"minified a href", `<a class="b a" href=/></a>`, f("a", "a b", "")},