From: Bjørn Erik Pedersen Date: Wed, 17 Oct 2018 11:48:55 +0000 (+0200) Subject: parser/pageparser: Add front matter etc. support X-Git-Tag: v0.50~45 X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=2fdc4a24d;p=brevno-suite%2Fhugo parser/pageparser: Add front matter etc. support See #5324 --- diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go index ae2f6cbc..f7495c90 100644 --- a/parser/pageparser/item.go +++ b/parser/pageparser/item.go @@ -73,10 +73,10 @@ func (i Item) String() string { return i.Val case i.typ > tKeywordMarker: return fmt.Sprintf("<%s>", i.Val) - case len(i.Val) > 20: - return fmt.Sprintf("%.20q...", i.Val) + case len(i.Val) > 50: + return fmt.Sprintf("%v:%.20q...", i.typ, i.Val) } - return fmt.Sprintf("[%s]", i.Val) + return fmt.Sprintf("%v:[%s]", i.typ, i.Val) } type itemType int @@ -85,6 +85,15 @@ const ( tError itemType = iota tEOF + // page items + tHTMLLead // < + tSummaryDivider // + tSummaryDividerOrg // # more + tFrontMatterYAML + tFrontMatterTOML + tFrontMatterJSON + tFrontMatterORG + // shortcode items tLeftDelimScNoMarkup tRightDelimScNoMarkup @@ -95,8 +104,7 @@ const ( tScParam tScParamVal - //itemIdentifier - tText // plain text, used for everything outside the shortcodes + tText // plain text // preserved for later - keywords come after this tKeywordMarker diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go index 5267c563..0c97becd 100644 --- a/parser/pageparser/pagelexer.go +++ b/parser/pageparser/pagelexer.go @@ -44,13 +44,15 @@ type lexerShortcodeState struct { } type pageLexer struct { - name string - input string - state stateFunc - pos pos // input position - start pos // item start position - width pos // width of last element - lastPos pos // position of the last item returned by nextItem + input string + stateStart stateFunc + state stateFunc + pos pos // input position + start pos // item start position + width pos // width of last element + lastPos pos // position of the last item returned by nextItem + + contentSections int lexerShortcodeState @@ -63,18 +65,18 @@ func Parse(s string) *Tokens { } func ParseFrom(s string, from int) *Tokens { - lexer := newPageLexer("default", s, pos(from)) + lexer := newPageLexer(s, pos(from), lexMainSection) // TODO(bep) 2errors lexer.run() return &Tokens{lexer: lexer} } // note: the input position here is normally 0 (start), but // can be set if position of first shortcode is known -func newPageLexer(name, input string, inputPosition pos) *pageLexer { +func newPageLexer(input string, inputPosition pos, stateStart stateFunc) *pageLexer { lexer := &pageLexer{ - name: name, - input: input, - pos: inputPosition, + input: input, + pos: inputPosition, + stateStart: stateStart, lexerShortcodeState: lexerShortcodeState{ currLeftDelimItem: tLeftDelimScNoMarkup, currRightDelimItem: tRightDelimScNoMarkup, @@ -88,14 +90,13 @@ func newPageLexer(name, input string, inputPosition pos) *pageLexer { // main loop func (l *pageLexer) run() *pageLexer { - for l.state = lexTextOutsideShortcodes; l.state != nil; { + for l.state = l.stateStart; l.state != nil; { l.state = l.state(l) } return l } -// state functions - +// Shortcode syntax const ( leftDelimScNoMarkup = "{{<" rightDelimScNoMarkup = ">}}" @@ -105,6 +106,12 @@ const ( rightComment = "*/" ) +// Page syntax +const ( + summaryDivider = "" + summaryDividerOrg = "# more" +) + func (l *pageLexer) next() rune { if int(l.pos) >= len(l.input) { l.width = 0 @@ -178,11 +185,21 @@ func (l *pageLexer) nextItem() Item { return item } -// scans until an opening shortcode opening bracket. -// if no shortcodes, it will keep on scanning until EOF -func lexTextOutsideShortcodes(l *pageLexer) stateFunc { +func (l *pageLexer) consumeCRLF() bool { + var consumed bool + for _, r := range crLf { + if l.next() != r { + l.backup() + } else { + consumed = true + } + } + return consumed +} + +func lexMainSection(l *pageLexer) stateFunc { for { - if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup) { + if l.isShortCodeStart() { if l.pos > l.start { l.emit(tText) } @@ -194,12 +211,79 @@ func lexTextOutsideShortcodes(l *pageLexer) stateFunc { l.currRightDelimItem = tRightDelimScNoMarkup } return lexShortcodeLeftDelim + } + if l.contentSections <= 1 { + if strings.HasPrefix(l.input[l.pos:], summaryDivider) { + if l.pos > l.start { + l.emit(tText) + } + l.contentSections++ + l.pos += pos(len(summaryDivider)) + l.emit(tSummaryDivider) + } else if strings.HasPrefix(l.input[l.pos:], summaryDividerOrg) { + if l.pos > l.start { + l.emit(tText) + } + l.contentSections++ + l.pos += pos(len(summaryDividerOrg)) + l.emit(tSummaryDividerOrg) + } } - if l.next() == eof { + + r := l.next() + if r == eof { break } + } + + return lexDone + +} + +func (l *pageLexer) isShortCodeStart() bool { + return strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup) +} + +func lexIntroSection(l *pageLexer) stateFunc { +LOOP: + for { + r := l.next() + if r == eof { + break + } + + switch { + case r == '+': + return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", "+++") + case r == '-': + return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", "---") + case r == '{': + return lexFrontMatterJSON + case r == '#': + return lexFrontMatterOrgMode + case !isSpace(r) && !isEndOfLine(r): + if r == '<' { + l.emit(tHTMLLead) + // Not need to look further. Hugo treats this as plain HTML, + // no front matter, no shortcodes, no nothing. + l.pos = pos(len(l.input)) + l.emit(tText) + break LOOP + } + return l.errorf("failed to detect front matter type; got unknown identifier %q", r) + } + } + + l.contentSections = 1 + + // Now move on to the shortcodes. + return lexMainSection +} + +func lexDone(l *pageLexer) stateFunc { + // Done! if l.pos > l.start { l.emit(tText) @@ -208,6 +292,122 @@ func lexTextOutsideShortcodes(l *pageLexer) stateFunc { return nil } +func lexFrontMatterJSON(l *pageLexer) stateFunc { + // Include the left delimiter + l.backup() + + var ( + inQuote bool + level int + ) + + for { + + r := l.next() + + switch { + case r == eof: + return l.errorf("unexpected EOF parsing JSON front matter") + case r == '{': + if !inQuote { + level++ + } + case r == '}': + if !inQuote { + level-- + } + case r == '"': + inQuote = !inQuote + case r == '\\': + // This may be an escaped quote. Make sure it's not marked as a + // real one. + l.next() + } + + if level == 0 { + break + } + } + + l.consumeCRLF() + l.emit(tFrontMatterJSON) + + return lexMainSection +} + +func lexFrontMatterOrgMode(l *pageLexer) stateFunc { + /* + #+TITLE: Test File For chaseadamsio/goorgeous + #+AUTHOR: Chase Adams + #+DESCRIPTION: Just another golang parser for org content! + */ + + const prefix = "#+" + + l.backup() + + if !strings.HasPrefix(l.input[l.pos:], prefix) { + // TODO(bep) consider error + return lexMainSection + } + + // Read lines until we no longer see a #+ prefix +LOOP: + for { + + r := l.next() + + switch { + case r == '\n': + if !strings.HasPrefix(l.input[l.pos:], prefix) { + break LOOP + } + case r == eof: + break LOOP + + } + } + + l.emit(tFrontMatterORG) + + return lexMainSection + +} + +// Handle YAML or TOML front matter. +func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim string) stateFunc { + for i := 0; i < 2; i++ { + if r := l.next(); r != delimr { + return l.errorf("invalid %s delimiter", name) + } + } + + if !l.consumeCRLF() { + return l.errorf("invalid %s delimiter", name) + } + + // We don't care about the delimiters. + l.ignore() + + for { + r := l.next() + if r == eof { + return l.errorf("EOF looking for end %s front matter delimiter", name) + } + if isEndOfLine(r) { + if strings.HasPrefix(l.input[l.pos:], delim) { + l.emit(tp) + l.pos += 3 + l.consumeCRLF() + l.ignore() + break + } + } + } + + return lexMainSection +} + func lexShortcodeLeftDelim(l *pageLexer) stateFunc { l.pos += pos(len(l.currentLeftShortcodeDelim())) if strings.HasPrefix(l.input[l.pos:], leftComment) { @@ -234,14 +434,14 @@ func lexShortcodeComment(l *pageLexer) stateFunc { l.ignore() l.pos += pos(len(l.currentRightShortcodeDelim())) l.emit(tText) - return lexTextOutsideShortcodes + return lexMainSection } func lexShortcodeRightDelim(l *pageLexer) stateFunc { l.closingState = 0 l.pos += pos(len(l.currentRightShortcodeDelim())) l.emit(l.currentRightShortcodeDelimItem()) - return lexTextOutsideShortcodes + return lexMainSection } // either: @@ -485,6 +685,8 @@ func isAlphaNumericOrHyphen(r rune) bool { return isAlphaNumeric(r) || r == '-' } +var crLf = []rune{'\r', '\n'} + func isEndOfLine(r rune) bool { return r == '\r' || r == '\n' } diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go new file mode 100644 index 00000000..3dc08c77 --- /dev/null +++ b/parser/pageparser/pageparser_intro_test.go @@ -0,0 +1,103 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pageparser + +import ( + "fmt" + "strings" + "testing" +) + +type lexerTest struct { + name string + input string + items []Item +} + +var ( + tstJSON = `{ "a": { "b": "\"Hugo\"}" } }` + tstHTMLLead = Item{tHTMLLead, 0, " <"} + tstFrontMatterTOML = Item{tFrontMatterTOML, 0, "foo = \"bar\"\n"} + tstFrontMatterYAML = Item{tFrontMatterYAML, 0, "foo: \"bar\"\n"} + tstFrontMatterYAMLCRLF = Item{tFrontMatterYAML, 0, "foo: \"bar\"\r\n"} + tstFrontMatterJSON = Item{tFrontMatterJSON, 0, tstJSON + "\r\n"} + tstSomeText = Item{tText, 0, "\nSome text.\n"} + tstSummaryDivider = Item{tSummaryDivider, 0, ""} + tstSummaryDividerOrg = Item{tSummaryDividerOrg, 0, "# more"} + + tstORG = ` +#+TITLE: T1 +#+AUTHOR: A1 +#+DESCRIPTION: D1 +` + tstFrontMatterORG = Item{tFrontMatterORG, 0, tstORG} +) + +var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$") + +// TODO(bep) a way to toggle ORG mode vs the rest. +var frontMatterTests = []lexerTest{ + {"empty", "", []Item{tstEOF}}, + {"HTML Document", ` `, []Item{tstHTMLLead, Item{tText, 0, "html> "}, tstEOF}}, + {"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}}, + // Note that we keep all bytes as they are, but we need to handle CRLF + {"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}}, + {"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}}, + {"JSON front matter", tstJSON + "\r\n\nSome text.\n", []Item{tstFrontMatterJSON, tstSomeText, tstEOF}}, + {"ORG front matter", tstORG + "\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstEOF}}, + {"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstSummaryDividerOrg, tstSomeText, tstEOF}}, + {"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, tstSomeText, tstEOF}}, +} + +func TestFrontMatter(t *testing.T) { + t.Parallel() + for i, test := range frontMatterTests { + items := collect(test.name, test.input, false, lexIntroSection) + if !equal(items, test.items) { + got := crLfReplacer.Replace(fmt.Sprint(items)) + expected := crLfReplacer.Replace(fmt.Sprint(test.items)) + t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected) + } + } +} + +func collect(name, input string, skipFrontMatter bool, stateStart stateFunc) (items []Item) { + l := newPageLexer(input, 0, stateStart) + l.run() + + for { + item := l.nextItem() + items = append(items, item) + if item.typ == tEOF || item.typ == tError { + break + } + } + return +} + +// no positional checking, for now ... +func equal(i1, i2 []Item) bool { + if len(i1) != len(i2) { + return false + } + for k := range i1 { + if i1[k].typ != i2[k].typ { + return false + } + if i1[k].Val != i2[k].Val { + return false + } + } + return true +} diff --git a/parser/pageparser/pageparser_shortcode_test.go b/parser/pageparser/pageparser_shortcode_test.go new file mode 100644 index 00000000..525c7452 --- /dev/null +++ b/parser/pageparser/pageparser_shortcode_test.go @@ -0,0 +1,171 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pageparser + +import "testing" + +var ( + tstEOF = Item{tEOF, 0, ""} + tstLeftNoMD = Item{tLeftDelimScNoMarkup, 0, "{{<"} + tstRightNoMD = Item{tRightDelimScNoMarkup, 0, ">}}"} + tstLeftMD = Item{tLeftDelimScWithMarkup, 0, "{{%"} + tstRightMD = Item{tRightDelimScWithMarkup, 0, "%}}"} + tstSCClose = Item{tScClose, 0, "/"} + tstSC1 = Item{tScName, 0, "sc1"} + tstSC2 = Item{tScName, 0, "sc2"} + tstSC3 = Item{tScName, 0, "sc3"} + tstSCSlash = Item{tScName, 0, "sc/sub"} + tstParam1 = Item{tScParam, 0, "param1"} + tstParam2 = Item{tScParam, 0, "param2"} + tstVal = Item{tScParamVal, 0, "Hello World"} +) + +var shortCodeLexerTests = []lexerTest{ + {"empty", "", []Item{tstEOF}}, + {"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}}, + {"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}}, + {"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, + {"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, + + {"forward slash inside name", `{{< sc/sub >}}`, []Item{tstLeftNoMD, tstSCSlash, tstRightNoMD, tstEOF}}, + + {"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}}, + {"with spaces", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, + {"mismatched rightDelim", `{{< sc1 %}}`, []Item{tstLeftNoMD, tstSC1, + {tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}}, + {"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{ + tstLeftMD, + tstSC1, + tstRightMD, + {tText, 0, " inner "}, + tstLeftMD, + tstSCClose, + tstSC1, + tstRightMD, + tstEOF, + }}, + {"close, but no open", `{{< /sc1 >}}`, []Item{ + tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}}, + {"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{ + tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, + {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}}, + {"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{ + tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, + {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}}, + {"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{ + tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, + {tError, 0, "unclosed shortcode"}}}, + {"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{ + tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}}, + {"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{ + tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}}, + + {"two params", `{{< sc1 param1 param2 >}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}}, + // issue #934 + {"self-closing", `{{< sc1 />}}`, []Item{ + tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}}, + // Issue 2498 + {"multiple self-closing", `{{< sc1 />}}{{< sc1 />}}`, []Item{ + tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, + tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}}, + {"self-closing with param", `{{< sc1 param1 />}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}}, + {"multiple self-closing with param", `{{< sc1 param1 />}}{{< sc1 param1 />}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, + tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}}, + {"multiple different self-closing with param", `{{< sc1 param1 />}}{{< sc2 param1 />}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, + tstLeftNoMD, tstSC2, tstParam1, tstSCClose, tstRightNoMD, tstEOF}}, + {"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []Item{ + tstLeftNoMD, tstSC1, tstRightNoMD, + tstLeftNoMD, tstSC2, tstRightNoMD, + tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}}, + {"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{ + tstLeftNoMD, tstSC1, tstRightNoMD, + {tText, 0, "ab"}, + tstLeftMD, tstSC2, tstParam1, tstRightMD, + {tText, 0, "cd"}, + tstLeftNoMD, tstSC3, tstRightNoMD, + {tText, 0, "ef"}, + tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD, + {tText, 0, "gh"}, + tstLeftMD, tstSCClose, tstSC2, tstRightMD, + {tText, 0, "ij"}, + tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, + {tText, 0, "kl"}, tstEOF, + }}, + + {"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{ + tstLeftNoMD, tstSC1, {tScParam, 0, "param nr. 1"}, {tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}}, + {"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}}, + {"escaped quotes", `{{< sc1 param1=\"Hello World\" >}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}}, + {"escaped quotes, positional param", `{{< sc1 \"param1\" >}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}}, + {"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\" >}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, + {tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}}, + {"escaped quotes inside nonescaped quotes", + `{{< sc1 param1="Hello \"escaped\" World" >}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, {tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}}, + {"escaped quotes inside nonescaped quotes in positional param", + `{{< sc1 "Hello \"escaped\" World" >}}`, []Item{ + tstLeftNoMD, tstSC1, {tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}}, + {"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}}, + {"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, tstVal, + {tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}}, + {"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, tstVal, + {tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}}, + {"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, + {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}}, + {"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{ + tstLeftNoMD, tstSC1, tstParam1, + {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}}, + {"commented out", `{{}}`, []Item{ + {tText, 0, "{{<"}, {tText, 0, " sc1 "}, {tText, 0, ">}}"}, tstEOF}}, + {"commented out, with asterisk inside", `{{}}`, []Item{ + {tText, 0, "{{<"}, {tText, 0, " sc1 \"**/*.pdf\" "}, {tText, 0, ">}}"}, tstEOF}}, + {"commented out, missing close", `{{}}`, []Item{ + {tError, 0, "comment must be closed"}}}, + {"commented out, misplaced close", `{{}}*/`, []Item{ + {tError, 0, "comment must be closed"}}}, +} + +func TestShortcodeLexer(t *testing.T) { + t.Parallel() + for i, test := range shortCodeLexerTests { + items := collect(test.name, test.input, true, lexMainSection) + if !equal(items, test.items) { + t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items) + } + } +} + +func BenchmarkShortcodeLexer(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, test := range shortCodeLexerTests { + items := collect(test.name, test.input, true, lexMainSection) + if !equal(items, test.items) { + b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items) + } + } + } +} diff --git a/parser/pageparser/pageparser_test.go b/parser/pageparser/pageparser_test.go deleted file mode 100644 index ceb439a6..00000000 --- a/parser/pageparser/pageparser_test.go +++ /dev/null @@ -1,207 +0,0 @@ -// Copyright 2018 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pageparser - -import ( - "testing" -) - -type shortCodeLexerTest struct { - name string - input string - items []Item -} - -var ( - tstEOF = Item{tEOF, 0, ""} - tstLeftNoMD = Item{tLeftDelimScNoMarkup, 0, "{{<"} - tstRightNoMD = Item{tRightDelimScNoMarkup, 0, ">}}"} - tstLeftMD = Item{tLeftDelimScWithMarkup, 0, "{{%"} - tstRightMD = Item{tRightDelimScWithMarkup, 0, "%}}"} - tstSCClose = Item{tScClose, 0, "/"} - tstSC1 = Item{tScName, 0, "sc1"} - tstSC2 = Item{tScName, 0, "sc2"} - tstSC3 = Item{tScName, 0, "sc3"} - tstSCSlash = Item{tScName, 0, "sc/sub"} - tstParam1 = Item{tScParam, 0, "param1"} - tstParam2 = Item{tScParam, 0, "param2"} - tstVal = Item{tScParamVal, 0, "Hello World"} -) - -var shortCodeLexerTests = []shortCodeLexerTest{ - {"empty", "", []Item{tstEOF}}, - {"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}}, - {"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}}, - {"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, - {"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, - - {"forward slash inside name", `{{< sc/sub >}}`, []Item{tstLeftNoMD, tstSCSlash, tstRightNoMD, tstEOF}}, - - {"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}}, - {"with spaces", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}}, - {"mismatched rightDelim", `{{< sc1 %}}`, []Item{tstLeftNoMD, tstSC1, - {tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}}, - {"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{ - tstLeftMD, - tstSC1, - tstRightMD, - {tText, 0, " inner "}, - tstLeftMD, - tstSCClose, - tstSC1, - tstRightMD, - tstEOF, - }}, - {"close, but no open", `{{< /sc1 >}}`, []Item{ - tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}}, - {"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{ - tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, - {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}}, - {"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{ - tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, - {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}}, - {"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{ - tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, - {tError, 0, "unclosed shortcode"}}}, - {"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{ - tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}}, - {"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{ - tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}}, - - {"two params", `{{< sc1 param1 param2 >}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}}, - // issue #934 - {"self-closing", `{{< sc1 />}}`, []Item{ - tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}}, - // Issue 2498 - {"multiple self-closing", `{{< sc1 />}}{{< sc1 />}}`, []Item{ - tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, - tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF}}, - {"self-closing with param", `{{< sc1 param1 />}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}}, - {"multiple self-closing with param", `{{< sc1 param1 />}}{{< sc1 param1 />}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, - tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF}}, - {"multiple different self-closing with param", `{{< sc1 param1 />}}{{< sc2 param1 />}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, - tstLeftNoMD, tstSC2, tstParam1, tstSCClose, tstRightNoMD, tstEOF}}, - {"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []Item{ - tstLeftNoMD, tstSC1, tstRightNoMD, - tstLeftNoMD, tstSC2, tstRightNoMD, - tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}}, - {"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{ - tstLeftNoMD, tstSC1, tstRightNoMD, - {tText, 0, "ab"}, - tstLeftMD, tstSC2, tstParam1, tstRightMD, - {tText, 0, "cd"}, - tstLeftNoMD, tstSC3, tstRightNoMD, - {tText, 0, "ef"}, - tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD, - {tText, 0, "gh"}, - tstLeftMD, tstSCClose, tstSC2, tstRightMD, - {tText, 0, "ij"}, - tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, - {tText, 0, "kl"}, tstEOF, - }}, - - {"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{ - tstLeftNoMD, tstSC1, {tScParam, 0, "param nr. 1"}, {tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}}, - {"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}}, - {"escaped quotes", `{{< sc1 param1=\"Hello World\" >}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}}, - {"escaped quotes, positional param", `{{< sc1 \"param1\" >}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}}, - {"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\" >}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, - {tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}}, - {"escaped quotes inside nonescaped quotes", - `{{< sc1 param1="Hello \"escaped\" World" >}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, {tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}}, - {"escaped quotes inside nonescaped quotes in positional param", - `{{< sc1 "Hello \"escaped\" World" >}}`, []Item{ - tstLeftNoMD, tstSC1, {tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}}, - {"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}}, - {"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, tstVal, - {tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}}, - {"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, tstVal, - {tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}}, - {"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, - {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}}, - {"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{ - tstLeftNoMD, tstSC1, tstParam1, - {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}}, - {"commented out", `{{}}`, []Item{ - {tText, 0, "{{<"}, {tText, 0, " sc1 "}, {tText, 0, ">}}"}, tstEOF}}, - {"commented out, with asterisk inside", `{{}}`, []Item{ - {tText, 0, "{{<"}, {tText, 0, " sc1 \"**/*.pdf\" "}, {tText, 0, ">}}"}, tstEOF}}, - {"commented out, missing close", `{{}}`, []Item{ - {tError, 0, "comment must be closed"}}}, - {"commented out, misplaced close", `{{}}*/`, []Item{ - {tError, 0, "comment must be closed"}}}, -} - -func TestShortcodeLexer(t *testing.T) { - t.Parallel() - for i, test := range shortCodeLexerTests { - items := collect(&test) - if !equal(items, test.items) { - t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items) - } - } -} - -func BenchmarkShortcodeLexer(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - for _, test := range shortCodeLexerTests { - items := collect(&test) - if !equal(items, test.items) { - b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items) - } - } - } -} - -func collect(t *shortCodeLexerTest) (items []Item) { - l := newPageLexer(t.name, t.input, 0).run() - for { - item := l.nextItem() - items = append(items, item) - if item.typ == tEOF || item.typ == tError { - break - } - } - return -} - -// no positional checking, for now ... -func equal(i1, i2 []Item) bool { - if len(i1) != len(i2) { - return false - } - for k := range i1 { - if i1[k].typ != i2[k].typ { - return false - } - if i1[k].Val != i2[k].Val { - return false - } - } - return true -}