parser/pageparser: Use []byte in page lexer

author Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>

Thu, 18 Oct 2018 07:04:48 +0000 (09:04 +0200)

committer Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>

Mon, 22 Oct 2018 17:57:44 +0000 (19:57 +0200)
author Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Thu, 18 Oct 2018 07:04:48 +0000 (09:04 +0200)
committer Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Mon, 22 Oct 2018 17:57:44 +0000 (19:57 +0200)
diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go

index f7495c90e4fb7c14e7a46b90e40249faba0cca86..35bc8e2687db01fe77c84f539cade37b48b80fea 100644 (file)
--- a/parser/pageparser/item.go
+++ b/parser/pageparser/item.go
@@ -18,7 +18,7 @@ import "fmt"
  type Item struct {
         typ itemType
         pos pos
-       Val string
+       Val []byte
  }
  
  func (i Item) IsText() bool {
@@ -70,7 +70,7 @@ func (i Item) String() string {
         case i.typ == tEOF:
                 return "EOF"
         case i.typ == tError:
-               return i.Val
+               return string(i.Val)
         case i.typ > tKeywordMarker:
                 return fmt.Sprintf("<%s>", i.Val)
         case len(i.Val) > 50:
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go

index 0c97becdefff85a572674ea636529a773339f339..3bdfb6c336c6cef56b8657152df72116995aa1e1 100644 (file)
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -18,8 +18,8 @@
  package pageparser
  
  import (
+       "bytes"
         "fmt"
-       "strings"
         "unicode"
         "unicode/utf8"
  )
@@ -44,7 +44,7 @@ type lexerShortcodeState struct {
  }
  
  type pageLexer struct {
-       input      string
+       input      []byte
         stateStart stateFunc
         state      stateFunc
         pos        pos // input position
@@ -65,14 +65,16 @@ func Parse(s string) *Tokens {
  }
  
  func ParseFrom(s string, from int) *Tokens {
-       lexer := newPageLexer(s, pos(from), lexMainSection) // TODO(bep) 2errors
+       input := []byte(s)
+       lexer := newPageLexer(input, pos(from), lexMainSection) // TODO(bep) 2errors
         lexer.run()
         return &Tokens{lexer: lexer}
  }
  
  // note: the input position here is normally 0 (start), but
  // can be set if position of first shortcode is known
-func newPageLexer(input string, inputPosition pos, stateStart stateFunc) *pageLexer {
+// TODO(bep) 2errors byte
+func newPageLexer(input []byte, inputPosition pos, stateStart stateFunc) *pageLexer {
         lexer := &pageLexer{
                 input:      input,
                 pos:        inputPosition,
@@ -97,19 +99,22 @@ func (l *pageLexer) run() *pageLexer {
  }
  
  // Shortcode syntax
-const (
-       leftDelimScNoMarkup    = "{{<"
-       rightDelimScNoMarkup   = ">}}"
-       leftDelimScWithMarkup  = "{{%"
-       rightDelimScWithMarkup = "%}}"
-       leftComment            = "/*" // comments in this context us used to to mark shortcodes as "not really a shortcode"
-       rightComment           = "*/"
+var (
+       leftDelimScNoMarkup    = []byte("{{<")
+       rightDelimScNoMarkup   = []byte(">}}")
+       leftDelimScWithMarkup  = []byte("{{%")
+       rightDelimScWithMarkup = []byte("%}}")
+       leftComment            = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode"
+       rightComment           = []byte("*/")
  )
  
  // Page syntax
-const (
-       summaryDivider    = "<!--more-->"
-       summaryDividerOrg = "# more"
+var (
+       summaryDivider    = []byte("<!--more-->")
+       summaryDividerOrg = []byte("# more")
+       delimTOML         = []byte("+++")
+       delimYAML         = []byte("---")
+       delimOrg          = []byte("#+")
  )
  
  func (l *pageLexer) next() rune {
@@ -118,9 +123,7 @@ func (l *pageLexer) next() rune {
                 return eof
         }
  
-       // looks expensive, but should produce the same iteration sequence as the string range loop
-       // see: http://blog.golang.org/strings
-       runeValue, runeWidth := utf8.DecodeRuneInString(l.input[l.pos:])
+       runeValue, runeWidth := utf8.DecodeRune(l.input[l.pos:])
         l.width = pos(runeWidth)
         l.pos += l.width
         return runeValue
@@ -146,7 +149,7 @@ func (l *pageLexer) emit(t itemType) {
  
  // special case, do not send '\\' back to client
  func (l *pageLexer) ignoreEscapesAndEmit(t itemType) {
-       val := strings.Map(func(r rune) rune {
+       val := bytes.Map(func(r rune) rune {
                 if r == '\\' {
                         return -1
                 }
@@ -157,7 +160,7 @@ func (l *pageLexer) ignoreEscapesAndEmit(t itemType) {
  }
  
  // gets the current value (for debugging and error handling)
-func (l *pageLexer) current() string {
+func (l *pageLexer) current() []byte {
         return l.input[l.start:l.pos]
  }
  
@@ -166,14 +169,16 @@ func (l *pageLexer) ignore() {
         l.start = l.pos
  }
  
+var lf = []byte("\n")
+
  // nice to have in error logs
  func (l *pageLexer) lineNum() int {
-       return strings.Count(l.input[:l.lastPos], "\n") + 1
+       return bytes.Count(l.input[:l.lastPos], lf) + 1
  }
  
  // nil terminates the parser
  func (l *pageLexer) errorf(format string, args ...interface{}) stateFunc {
-       l.items = append(l.items, Item{tError, l.start, fmt.Sprintf(format, args...)})
+       l.items = append(l.items, Item{tError, l.start, []byte(fmt.Sprintf(format, args...))})
         return nil
  }
  
@@ -203,7 +208,7 @@ func lexMainSection(l *pageLexer) stateFunc {
                         if l.pos > l.start {
                                 l.emit(tText)
                         }
-                       if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) {
+                       if bytes.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) {
                                 l.currLeftDelimItem = tLeftDelimScWithMarkup
                                 l.currRightDelimItem = tRightDelimScWithMarkup
                         } else {
@@ -214,14 +219,14 @@ func lexMainSection(l *pageLexer) stateFunc {
                 }
  
                 if l.contentSections <= 1 {
-                       if strings.HasPrefix(l.input[l.pos:], summaryDivider) {
+                       if bytes.HasPrefix(l.input[l.pos:], summaryDivider) {
                                 if l.pos > l.start {
                                         l.emit(tText)
                                 }
                                 l.contentSections++
                                 l.pos += pos(len(summaryDivider))
                                 l.emit(tSummaryDivider)
-                       } else if strings.HasPrefix(l.input[l.pos:], summaryDividerOrg) {
+                       } else if bytes.HasPrefix(l.input[l.pos:], summaryDividerOrg) {
                                 if l.pos > l.start {
                                         l.emit(tText)
                                 }
@@ -243,7 +248,7 @@ func lexMainSection(l *pageLexer) stateFunc {
  }
  
  func (l *pageLexer) isShortCodeStart() bool {
-       return strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup)
+       return bytes.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || bytes.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup)
  }
  
  func lexIntroSection(l *pageLexer) stateFunc {
@@ -256,9 +261,9 @@ LOOP:
  
                 switch {
                 case r == '+':
-                       return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", "+++")
+                       return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", delimTOML)
                 case r == '-':
-                       return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", "---")
+                       return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", delimYAML)
                 case r == '{':
                         return lexFrontMatterJSON
                 case r == '#':
@@ -342,11 +347,9 @@ func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
                 #+DESCRIPTION: Just another golang parser for org content!
         */
  
-       const prefix = "#+"
-
         l.backup()
  
-       if !strings.HasPrefix(l.input[l.pos:], prefix) {
+       if !bytes.HasPrefix(l.input[l.pos:], delimOrg) {
                 // TODO(bep) consider error
                 return lexMainSection
         }
@@ -359,7 +362,7 @@ LOOP:
  
                 switch {
                 case r == '\n':
-                       if !strings.HasPrefix(l.input[l.pos:], prefix) {
+                       if !bytes.HasPrefix(l.input[l.pos:], delimOrg) {
                                 break LOOP
                         }
                 case r == eof:
@@ -375,7 +378,7 @@ LOOP:
  }
  
  // Handle YAML or TOML front matter.
-func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim string) stateFunc {
+func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name string, delim []byte) stateFunc {
         for i := 0; i < 2; i++ {
                 if r := l.next(); r != delimr {
                         return l.errorf("invalid %s delimiter", name)
@@ -395,7 +398,7 @@ func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim
                         return l.errorf("EOF looking for end %s front matter delimiter", name)
                 }
                 if isEndOfLine(r) {
-                       if strings.HasPrefix(l.input[l.pos:], delim) {
+                       if bytes.HasPrefix(l.input[l.pos:], delim) {
                                 l.emit(tp)
                                 l.pos += 3
                                 l.consumeCRLF()
@@ -410,7 +413,7 @@ func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim
  
  func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
         l.pos += pos(len(l.currentLeftShortcodeDelim()))
-       if strings.HasPrefix(l.input[l.pos:], leftComment) {
+       if bytes.HasPrefix(l.input[l.pos:], leftComment) {
                 return lexShortcodeComment
         }
         l.emit(l.currentLeftShortcodeDelimItem())
@@ -420,7 +423,7 @@ func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
  }
  
  func lexShortcodeComment(l *pageLexer) stateFunc {
-       posRightComment := strings.Index(l.input[l.pos:], rightComment+l.currentRightShortcodeDelim())
+       posRightComment := bytes.Index(l.input[l.pos:], append(rightComment, l.currentRightShortcodeDelim()...))
         if posRightComment <= 1 {
                 return l.errorf("comment must be closed")
         }
@@ -576,7 +579,7 @@ Loop:
                 case r == '/':
                 default:
                         l.backup()
-                       word := l.input[l.start:l.pos]
+                       word := string(l.input[l.start:l.pos])
                         if l.closingState > 0 && !l.openShortcodes[word] {
                                 return l.errorf("closing tag for shortcode '%s' does not match start tag", word)
                         } else if l.closingState > 0 {
@@ -600,7 +603,7 @@ Loop:
  }
  
  func lexEndOfShortcode(l *pageLexer) stateFunc {
-       if strings.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
+       if bytes.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
                 return lexShortcodeRightDelim
         }
         switch r := l.next(); {
@@ -614,7 +617,7 @@ func lexEndOfShortcode(l *pageLexer) stateFunc {
  
  // scans the elements inside shortcode tags
  func lexInsideShortcode(l *pageLexer) stateFunc {
-       if strings.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
+       if bytes.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
                 return lexShortcodeRightDelim
         }
         switch r := l.next(); {
@@ -659,7 +662,7 @@ func (l *pageLexer) currentRightShortcodeDelimItem() itemType {
         return l.currRightDelimItem
  }
  
-func (l *pageLexer) currentLeftShortcodeDelim() string {
+func (l *pageLexer) currentLeftShortcodeDelim() []byte {
         if l.currLeftDelimItem == tLeftDelimScWithMarkup {
                 return leftDelimScWithMarkup
         }
@@ -667,7 +670,7 @@ func (l *pageLexer) currentLeftShortcodeDelim() string {
  
  }
  
-func (l *pageLexer) currentRightShortcodeDelim() string {
+func (l *pageLexer) currentRightShortcodeDelim() []byte {
         if l.currRightDelimItem == tRightDelimScWithMarkup {
                 return rightDelimScWithMarkup
         }
diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go

index 3dc08c7769313bfdccdf4b8195418e919d1ae81d..19e30dc9adb3ef05fb9decaab9853afc3a9abe9d 100644 (file)
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@@ -15,6 +15,7 @@ package pageparser
  
  import (
         "fmt"
+       "reflect"
         "strings"
         "testing"
  )
@@ -25,23 +26,27 @@ type lexerTest struct {
         items []Item
  }
  
+func nti(tp itemType, val string) Item {
+       return Item{tp, 0, []byte(val)}
+}
+
  var (
         tstJSON                = `{ "a": { "b": "\"Hugo\"}" } }`
-       tstHTMLLead            = Item{tHTMLLead, 0, "  <"}
-       tstFrontMatterTOML     = Item{tFrontMatterTOML, 0, "foo = \"bar\"\n"}
-       tstFrontMatterYAML     = Item{tFrontMatterYAML, 0, "foo: \"bar\"\n"}
-       tstFrontMatterYAMLCRLF = Item{tFrontMatterYAML, 0, "foo: \"bar\"\r\n"}
-       tstFrontMatterJSON     = Item{tFrontMatterJSON, 0, tstJSON + "\r\n"}
-       tstSomeText            = Item{tText, 0, "\nSome text.\n"}
-       tstSummaryDivider      = Item{tSummaryDivider, 0, "<!--more-->"}
-       tstSummaryDividerOrg   = Item{tSummaryDividerOrg, 0, "# more"}
+       tstHTMLLead            = nti(tHTMLLead, "  <")
+       tstFrontMatterTOML     = nti(tFrontMatterTOML, "foo = \"bar\"\n")
+       tstFrontMatterYAML     = nti(tFrontMatterYAML, "foo: \"bar\"\n")
+       tstFrontMatterYAMLCRLF = nti(tFrontMatterYAML, "foo: \"bar\"\r\n")
+       tstFrontMatterJSON     = nti(tFrontMatterJSON, tstJSON+"\r\n")
+       tstSomeText            = nti(tText, "\nSome text.\n")
+       tstSummaryDivider      = nti(tSummaryDivider, "<!--more-->")
+       tstSummaryDividerOrg   = nti(tSummaryDividerOrg, "# more")
  
         tstORG = `
  #+TITLE: T1
  #+AUTHOR: A1
  #+DESCRIPTION: D1
  `
-       tstFrontMatterORG = Item{tFrontMatterORG, 0, tstORG}
+       tstFrontMatterORG = nti(tFrontMatterORG, tstORG)
  )
  
  var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
@@ -49,7 +54,7 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
  // TODO(bep) a way to toggle ORG mode vs the rest.
  var frontMatterTests = []lexerTest{
         {"empty", "", []Item{tstEOF}},
-       {"HTML Document", `  <html>  `, []Item{tstHTMLLead, Item{tText, 0, "html>  "}, tstEOF}},
+       {"HTML Document", `  <html>  `, []Item{tstHTMLLead, nti(tText, "html>  "), tstEOF}},
         {"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
         // Note that we keep all bytes as they are, but we need to handle CRLF
         {"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},
@@ -63,7 +68,7 @@ var frontMatterTests = []lexerTest{
  func TestFrontMatter(t *testing.T) {
         t.Parallel()
         for i, test := range frontMatterTests {
-               items := collect(test.name, test.input, false, lexIntroSection)
+               items := collect([]byte(test.input), false, lexIntroSection)
                 if !equal(items, test.items) {
                         got := crLfReplacer.Replace(fmt.Sprint(items))
                         expected := crLfReplacer.Replace(fmt.Sprint(test.items))
@@ -72,7 +77,7 @@ func TestFrontMatter(t *testing.T) {
         }
  }
  
-func collect(name, input string, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
+func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
         l := newPageLexer(input, 0, stateStart)
         l.run()
  
@@ -95,7 +100,7 @@ func equal(i1, i2 []Item) bool {
                 if i1[k].typ != i2[k].typ {
                         return false
                 }
-               if i1[k].Val != i2[k].Val {
+               if !reflect.DeepEqual(i1[k].Val, i2[k].Val) {
                         return false
                 }
         }
diff --git a/parser/pageparser/pageparser_shortcode_test.go b/parser/pageparser/pageparser_shortcode_test.go

index 525c7452fa9d5ad410359201dda0bcbedad49199..efef6fca2408987c5c6ac97f625eb48b282d49bc 100644 (file)
--- a/parser/pageparser/pageparser_shortcode_test.go
+++ b/parser/pageparser/pageparser_shortcode_test.go
@@ -16,25 +16,25 @@ package pageparser
  import "testing"
  
  var (
-       tstEOF       = Item{tEOF, 0, ""}
-       tstLeftNoMD  = Item{tLeftDelimScNoMarkup, 0, "{{<"}
-       tstRightNoMD = Item{tRightDelimScNoMarkup, 0, ">}}"}
-       tstLeftMD    = Item{tLeftDelimScWithMarkup, 0, "{{%"}
-       tstRightMD   = Item{tRightDelimScWithMarkup, 0, "%}}"}
-       tstSCClose   = Item{tScClose, 0, "/"}
-       tstSC1       = Item{tScName, 0, "sc1"}
-       tstSC2       = Item{tScName, 0, "sc2"}
-       tstSC3       = Item{tScName, 0, "sc3"}
-       tstSCSlash   = Item{tScName, 0, "sc/sub"}
-       tstParam1    = Item{tScParam, 0, "param1"}
-       tstParam2    = Item{tScParam, 0, "param2"}
-       tstVal       = Item{tScParamVal, 0, "Hello World"}
+       tstEOF       = nti(tEOF, "")
+       tstLeftNoMD  = nti(tLeftDelimScNoMarkup, "{{<")
+       tstRightNoMD = nti(tRightDelimScNoMarkup, ">}}")
+       tstLeftMD    = nti(tLeftDelimScWithMarkup, "{{%")
+       tstRightMD   = nti(tRightDelimScWithMarkup, "%}}")
+       tstSCClose   = nti(tScClose, "/")
+       tstSC1       = nti(tScName, "sc1")
+       tstSC2       = nti(tScName, "sc2")
+       tstSC3       = nti(tScName, "sc3")
+       tstSCSlash   = nti(tScName, "sc/sub")
+       tstParam1    = nti(tScParam, "param1")
+       tstParam2    = nti(tScParam, "param2")
+       tstVal       = nti(tScParamVal, "Hello World")
  )
  
  var shortCodeLexerTests = []lexerTest{
         {"empty", "", []Item{tstEOF}},
-       {"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}},
-       {"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}},
+       {"spaces", " \t\n", []Item{nti(tText, " \t\n"), tstEOF}},
+       {"text", `to be or not`, []Item{nti(tText, "to be or not"), tstEOF}},
         {"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
         {"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
  
@@ -43,12 +43,12 @@ var shortCodeLexerTests = []lexerTest{
         {"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
         {"with spaces", `{{<     sc1     >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
         {"mismatched rightDelim", `{{< sc1 %}}`, []Item{tstLeftNoMD, tstSC1,
-               {tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}},
+               nti(tError, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted")}},
         {"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{
                 tstLeftMD,
                 tstSC1,
                 tstRightMD,
-               {tText, 0, " inner "},
+               nti(tText, " inner "),
                 tstLeftMD,
                 tstSCClose,
                 tstSC1,
@@ -56,20 +56,20 @@ var shortCodeLexerTests = []lexerTest{
                 tstEOF,
         }},
         {"close, but no open", `{{< /sc1 >}}`, []Item{
-               tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}},
+               tstLeftNoMD, nti(tError, "got closing shortcode, but none is open")}},
         {"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{
                 tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
-               {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
+               nti(tError, "closing tag for shortcode 'another' does not match start tag")}},
         {"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{
                 tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
-               {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
+               nti(tError, "closing tag for shortcode 'another' does not match start tag")}},
         {"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{
                 tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1,
-               {tError, 0, "unclosed shortcode"}}},
+               nti(tError, "unclosed shortcode")}},
         {"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{
-               tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}},
+               tstLeftNoMD, tstSC1, nti(tScParam, "-ziL-Q_456igdO-4"), tstRightNoMD, tstEOF}},
         {"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{
-               tstLeftNoMD, tstSC1, {tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}},
+               tstLeftNoMD, tstSC1, nti(tScParam, "-ziL-.%QigdO-4"), tstRightNoMD, tstEOF}},
  
         {"two params", `{{< sc1 param1   param2 >}}`, []Item{
                 tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}},
@@ -94,64 +94,64 @@ var shortCodeLexerTests = []lexerTest{
                 tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}},
         {"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{
                 tstLeftNoMD, tstSC1, tstRightNoMD,
-               {tText, 0, "ab"},
+               nti(tText, "ab"),
                 tstLeftMD, tstSC2, tstParam1, tstRightMD,
-               {tText, 0, "cd"},
+               nti(tText, "cd"),
                 tstLeftNoMD, tstSC3, tstRightNoMD,
-               {tText, 0, "ef"},
+               nti(tText, "ef"),
                 tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD,
-               {tText, 0, "gh"},
+               nti(tText, "gh"),
                 tstLeftMD, tstSCClose, tstSC2, tstRightMD,
-               {tText, 0, "ij"},
+               nti(tText, "ij"),
                 tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD,
-               {tText, 0, "kl"}, tstEOF,
+               nti(tText, "kl"), tstEOF,
         }},
  
         {"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{
-               tstLeftNoMD, tstSC1, {tScParam, 0, "param nr. 1"}, {tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}},
+               tstLeftNoMD, tstSC1, nti(tScParam, "param nr. 1"), nti(tScParam, "param nr. 2"), tstRightNoMD, tstEOF}},
         {"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{
-               tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}},
+               tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, nti(tScParamVal, "p2Val"), tstRightNoMD, tstEOF}},
         {"escaped quotes", `{{< sc1 param1=\"Hello World\"  >}}`, []Item{
                 tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}},
         {"escaped quotes, positional param", `{{< sc1 \"param1\"  >}}`, []Item{
                 tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}},
         {"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\"  >}}`, []Item{
                 tstLeftNoMD, tstSC1, tstParam1,
-               {tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}},
+               nti(tScParamVal, `Hello `), nti(tError, `got positional parameter 'escaped'. Cannot mix named and positional parameters`)}},
         {"escaped quotes inside nonescaped quotes",
                 `{{< sc1 param1="Hello \"escaped\" World"  >}}`, []Item{
-                       tstLeftNoMD, tstSC1, tstParam1, {tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
+                       tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, `Hello "escaped" World`), tstRightNoMD, tstEOF}},
         {"escaped quotes inside nonescaped quotes in positional param",
                 `{{< sc1 "Hello \"escaped\" World"  >}}`, []Item{
-                       tstLeftNoMD, tstSC1, {tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
+                       tstLeftNoMD, tstSC1, nti(tScParam, `Hello "escaped" World`), tstRightNoMD, tstEOF}},
         {"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{
-               tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}},
+               tstLeftNoMD, tstSC1, tstParam2, nti(tError, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'")}},
         {"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{
                 tstLeftNoMD, tstSC1, tstParam1, tstVal,
-               {tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}},
+               nti(tError, "got positional parameter 'p2'. Cannot mix named and positional parameters")}},
         {"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{
                 tstLeftNoMD, tstSC1, tstParam1, tstVal,
-               {tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}},
+               nti(tError, "got quoted positional parameter. Cannot mix named and positional parameters")}},
         {"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{
                 tstLeftNoMD, tstSC1, tstParam1,
-               {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
+               nti(tError, "got named parameter 'param2'. Cannot mix named and positional parameters")}},
         {"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{
                 tstLeftNoMD, tstSC1, tstParam1,
-               {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
+               nti(tError, "got named parameter 'param2'. Cannot mix named and positional parameters")}},
         {"commented out", `{{</* sc1 */>}}`, []Item{
-               {tText, 0, "{{<"}, {tText, 0, " sc1 "}, {tText, 0, ">}}"}, tstEOF}},
+               nti(tText, "{{<"), nti(tText, " sc1 "), nti(tText, ">}}"), tstEOF}},
         {"commented out, with asterisk inside", `{{</* sc1 "**/*.pdf" */>}}`, []Item{
-               {tText, 0, "{{<"}, {tText, 0, " sc1 \"**/*.pdf\" "}, {tText, 0, ">}}"}, tstEOF}},
+               nti(tText, "{{<"), nti(tText, " sc1 \"**/*.pdf\" "), nti(tText, ">}}"), tstEOF}},
         {"commented out, missing close", `{{</* sc1 >}}`, []Item{
-               {tError, 0, "comment must be closed"}}},
+               nti(tError, "comment must be closed")}},
         {"commented out, misplaced close", `{{</* sc1 >}}*/`, []Item{
-               {tError, 0, "comment must be closed"}}},
+               nti(tError, "comment must be closed")}},
  }
  
  func TestShortcodeLexer(t *testing.T) {
         t.Parallel()
         for i, test := range shortCodeLexerTests {
-               items := collect(test.name, test.input, true, lexMainSection)
+               items := collect([]byte(test.input), true, lexMainSection)
                 if !equal(items, test.items) {
                         t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)
                 }
@@ -159,13 +159,17 @@ func TestShortcodeLexer(t *testing.T) {
  }
  
  func BenchmarkShortcodeLexer(b *testing.B) {
+       testInputs := make([][]byte, len(shortCodeLexerTests))
+       for i, input := range shortCodeLexerTests {
+               testInputs[i] = []byte(input.input)
+       }
         b.ResetTimer()
         for i := 0; i < b.N; i++ {
-               for _, test := range shortCodeLexerTests {
-                       items := collect(test.name, test.input, true, lexMainSection)
-                       if !equal(items, test.items) {
-                               b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)
+               for _, input := range testInputs {
+                       items := collect(input, true, lexMainSection)
+                       if len(items) == 0 {
                         }
+
                 }
         }
  }
author	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
	Thu, 18 Oct 2018 07:04:48 +0000 (09:04 +0200)
committer	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
	Mon, 22 Oct 2018 17:57:44 +0000 (19:57 +0200)
parser/pageparser/item.go		patch \| blob \| history
parser/pageparser/pagelexer.go		patch \| blob \| history
parser/pageparser/pageparser_intro_test.go		patch \| blob \| history
parser/pageparser/pageparser_shortcode_test.go		patch \| blob \| history