Shortcode rewrite, take 2
authorbep <bjorn.erik.pedersen@gmail.com>
Mon, 27 Oct 2014 20:48:30 +0000 (21:48 +0100)
committerspf13 <steve.francia@gmail.com>
Mon, 17 Nov 2014 23:32:06 +0000 (18:32 -0500)
This commit contains a restructuring and partial rewrite of the shortcode handling.

Prior to this commit rendering of the page content was mingled with handling of the shortcodes. This led to several oddities.

The new flow is:

1. Shortcodes are extracted from page and replaced with placeholders.
2. Shortcodes are processed and rendered
3. Page is processed
4. The placeholders are replaced with the rendered shortcodes

The handling of summaries is also made simpler by this.

This commit also introduces some other chenges:

1. distinction between shortcodes that need further processing and those who do not:

* `{{< >}}`: Typically raw HTML. Will not be processed.
* `{{% %}}`: Will be processed by the page's markup engine (Markdown or (infuture) Asciidoctor)

The above also involves a new shortcode-parser, with lexical scanning inspired by Rob Pike's talk called "Lexical Scanning in Go",
which should be easier to understand, give better error messages and perform better.

2. If you want to exclude a shortcode from being processed (for documentation etc.), the inner part of the shorcode must be commented out, i.e. `{{%/* movie 47238zzb */%}}`. See the updated shortcode section in the documentation for further examples.

The new parser supports nested shortcodes. This isn't new, but has two related design choices worth mentioning:

* The shortcodes will be rendered individually, so If both `{{< >}}` and `{{% %}}` are used in the nested hierarchy, one will be passed through the page's markdown processor, the other not.
* To avoid potential costly overhead of always looking far ahead for a possible closing tag, this implementation looks at the template itself, and is branded as a container with inner content if it contains a reference to `.Inner`

Fixes #565
Fixes #480
Fixes #461

And probably some others.

docs/content/extras/shortcodes.md
helpers/pygments.go
hugolib/page.go
hugolib/page_test.go
hugolib/shortcode.go
hugolib/shortcode_test.go
hugolib/shortcodeparser.go [new file with mode: 0644]
hugolib/shortcodeparser_test.go [new file with mode: 0644]
hugolib/template.go

index 5464b1a57d216e1386887917c695fa9a46a2bb01..c0b6e0b04ef582580348daf4dd23a3f20732e953 100644 (file)
@@ -29,8 +29,8 @@ want a [partial template](/templates/partial) instead.
 
 ## Using a shortcode
 
-In your content files, a shortcode can be called by using '`{{% name parameters
-%}}`' respectively. Shortcodes are space delimited (parameters with spaces
+In your content files, a shortcode can be called by using '`{{%/* name parameters
+*/%}}`' respectively. Shortcodes are space delimited (parameters with spaces
 can be quoted).
 
 The first word is always the name of the shortcode. Parameters follow the name.
@@ -43,7 +43,7 @@ shortcodes match (name only), the closing being prepended with a slash.
 
 Example of a paired shortcode:
 
-    {{ % highlight go %}} A bunch of code here {{ % /highlight %}}
+    {{%/* highlight go */%}} A bunch of code here {{%/* /highlight */%}}
 
 
 ## Hugo Shortcodes
@@ -60,9 +60,8 @@ HTML. Read more on [highlighting](/extras/highlighting).
 closing shortcode.
 
 #### Example
-The example has an extra space between the “`{{`” and “`%`” characters to prevent rendering here.
 
-    {{ % highlight html %}}
+    {{%/* highlight html */%}}
     <section id="main">
       <div>
        <h1 id="title">{{ .Title }}</h1>
@@ -71,7 +70,7 @@ The example has an extra space between the “`{{`” and “`%`” characters t
         {{ end }}
       </div>
     </section>
-    {{ % /highlight %}}
+    {{%/* /highlight */%}}
 
 
 #### Example Output
@@ -104,7 +103,7 @@ The example has an extra space between the “`{{`” and “`%`” characters t
 #### Example
 *Example has an extra space so Hugo doesn’t actually render it*.
 
-    {{ % figure src="/media/spf13.jpg" title="Steve Francia" %}}
+    {{%/* figure src="/media/spf13.jpg" title="Steve Francia" */%}}
 
 #### Example output
 
@@ -157,7 +156,7 @@ You can also use the variable `.Page` to access all the normal [Page Variables](
 
 ## Single Positional Example: youtube
 
-    {{% youtube 09jf3ow9jfw %}}
+    {{%/* youtube 09jf3ow9jfw */%}}
 
 Would load the template /layouts/shortcodes/youtube.html
 
@@ -179,7 +178,7 @@ This would be rendered as:
 ## Single Named Example: image with caption
 *Example has an extra space so Hugo doesn’t actually render it*
 
-    {{ % img src="/media/spf13.jpg" title="Steve Francia" %}}
+    {{%/* img src="/media/spf13.jpg" title="Steve Francia" */%}}
 
 Would load the template /layouts/shortcodes/img.html
 
@@ -216,11 +215,11 @@ Would be rendered as:
 
 *Example has an extra space so Hugo doesn’t actually render it*.
 
-    {{ % highlight html %}}
+    {{%/* highlight html */%}}
     <html>
         <body> This HTML </body>
     </html>
-    {{ % /highlight %}}
+    {{%/* /highlight */%}}
 
 The template for this utilizes the following code (already include in Hugo)
 
index 2ff500da3d88cfe0473dd6de567e3a8ceb1d9d6f..bb779053362ac74d6f1c909a5fb8339eef7ac9d8 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright © 2013 Steve Francia <spf@spf13.com>.
+// Copyright © 2013-14 Steve Francia <spf@spf13.com>.
 //
 // Licensed under the Simple Public License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -23,11 +23,18 @@ import (
        "github.com/spf13/viper"
 )
 
-func Highlight(code string, lexer string) string {
-       var pygmentsBin = "pygmentize"
+const pygmentsBin = "pygmentize"
 
+func HasPygments() bool {
        if _, err := exec.LookPath(pygmentsBin); err != nil {
+               return false
+       }
+       return true
+}
+
+func Highlight(code string, lexer string) string {
 
+       if !HasPygments() {
                jww.WARN.Println("Highlighting requires Pygments to be installed and in the path")
                return code
        }
index e30506c26068ee1c56a62128e5028674d67ae485..14a290c7ee2c6a4e56a777cb7fd3ecf2c6c09a22 100644 (file)
@@ -17,6 +17,10 @@ import (
        "bytes"
        "errors"
        "fmt"
+       "github.com/spf13/hugo/helpers"
+       "github.com/spf13/hugo/parser"
+       jww "github.com/spf13/jwalterweatherman"
+       "github.com/spf13/viper"
        "html/template"
        "io"
        "net/url"
@@ -25,12 +29,8 @@ import (
        "time"
 
        "github.com/spf13/cast"
-       "github.com/spf13/hugo/helpers"
        "github.com/spf13/hugo/hugofs"
-       "github.com/spf13/hugo/parser"
        "github.com/spf13/hugo/source"
-       jww "github.com/spf13/jwalterweatherman"
-       "github.com/spf13/viper"
 )
 
 type Page struct {
@@ -47,14 +47,15 @@ type Page struct {
        Tmpl            Template
        Markup          string
 
-       extension   string
-       contentType string
-       renderable  bool
-       layout      string
-       linkTitle   string
-       frontmatter []byte
-       rawContent  []byte
-       plain       string // TODO should be []byte
+       extension         string
+       contentType       string
+       renderable        bool
+       layout            string
+       linkTitle         string
+       frontmatter       []byte
+       rawContent        []byte
+       contentShortCodes map[string]string
+       plain             string // TODO should be []byte
        PageMeta
        Source
        Position
@@ -83,7 +84,7 @@ type Pages []*Page
 
 func (p *Page) Plain() string {
        if len(p.plain) == 0 {
-               p.plain = helpers.StripHTML(StripShortcodes(string(p.renderBytes(p.rawContent))))
+               p.plain = helpers.StripHTML(string(p.Content))
        }
        return p.plain
 }
@@ -100,13 +101,33 @@ func (p *Page) UniqueId() string {
        return p.Source.UniqueId()
 }
 
+// for logging
+func (p *Page) lineNumRawContentStart() int {
+       return bytes.Count(p.frontmatter, []byte("\n")) + 1
+}
+
 func (p *Page) setSummary() {
+
+       // at this point, p.rawContent contains placeholders for the short codes,
+       // rendered and ready in p.contentShortcodes
+
        if bytes.Contains(p.rawContent, helpers.SummaryDivider) {
                // If user defines split:
-               // Split then render
+               // Split, replace shortcode tokens, then render
                p.Truncated = true // by definition
                header := bytes.Split(p.rawContent, helpers.SummaryDivider)[0]
-               p.Summary = helpers.BytesToHTML(p.renderBytes(header))
+               renderedHeader := p.renderBytes(header)
+               numShortcodesInHeader := bytes.Count(header, []byte(shortcodePlaceholderPrefix))
+               if len(p.contentShortCodes) > 0 {
+                       tmpContentWithTokensReplaced, err :=
+                               replaceShortcodeTokens(renderedHeader, shortcodePlaceholderPrefix, numShortcodesInHeader, true, p.contentShortCodes)
+                       if err != nil {
+                               jww.FATAL.Printf("Failed to replace short code tokens in Summary for %s:\n%s", p.BaseFileName(), err.Error())
+                       } else {
+                               renderedHeader = tmpContentWithTokensReplaced
+                       }
+               }
+               p.Summary = helpers.BytesToHTML(renderedHeader)
        } else {
                // If hugo defines split:
                // render, strip html, then split
@@ -217,9 +238,6 @@ func (p *Page) ReadFrom(buf io.Reader) (err error) {
                return
        }
 
-       //analyze for raw stats
-       p.analyzePage()
-
        return nil
 }
 
@@ -550,7 +568,6 @@ func (page *Page) parse(reader io.Reader) error {
        }
 
        page.rawContent = psr.Content()
-       page.setSummary()
 
        return nil
 }
@@ -613,15 +630,32 @@ func (page *Page) SaveSource() error {
 }
 
 func (p *Page) ProcessShortcodes(t Template) {
-       p.rawContent = []byte(ShortcodesHandle(string(p.rawContent), p, t))
-       p.Summary = template.HTML(ShortcodesHandle(string(p.Summary), p, t))
+
+       // these short codes aren't used until after Page render,
+       // but processed here to avoid coupling
+       tmpContent, tmpContentShortCodes := extractAndRenderShortcodes(string(p.rawContent), p, t)
+       p.rawContent = []byte(tmpContent)
+       p.contentShortCodes = tmpContentShortCodes
+
 }
 
 func (page *Page) Convert() error {
        markupType := page.guessMarkupType()
        switch markupType {
        case "markdown", "rst":
+
                tmpContent, tmpTableOfContents := helpers.ExtractTOC(page.renderContent(helpers.RemoveSummaryDivider(page.rawContent)))
+
+               if len(page.contentShortCodes) > 0 {
+                       tmpContentWithTokensReplaced, err := replaceShortcodeTokens(tmpContent, shortcodePlaceholderPrefix, -1, true, page.contentShortCodes)
+
+                       if err != nil {
+                               jww.FATAL.Printf("Fail to replace short code tokens in %s:\n%s", page.BaseFileName(), err.Error())
+                       } else {
+                               tmpContent = tmpContentWithTokensReplaced
+                       }
+               }
+
                page.Content = helpers.BytesToHTML(tmpContent)
                page.TableOfContents = helpers.BytesToHTML(tmpTableOfContents)
        case "html":
@@ -629,6 +663,12 @@ func (page *Page) Convert() error {
        default:
                return fmt.Errorf("Error converting unsupported file type '%s' for page '%s'", markupType, page.Source.Path())
        }
+
+       // now we know enough to create a summary of the page and count some words
+       page.setSummary()
+       //analyze for raw stats
+       page.analyzePage()
+
        return nil
 }
 
index 1334b675a21d417af895fc85ecf00f96ab984c85..3af1d1971503bb9754bc48232bd626ad1551f72e 100644 (file)
@@ -116,7 +116,7 @@ Some more text
        SIMPLE_PAGE_WITH_SHORTCODE_IN_SUMMARY = `---
 title: Simple
 ---
-Summary Next Line. {{% img src="/not/real" %}}.
+Summary Next Line. {{<figure src="/not/real" >}}.
 More text here.
 
 Some more text
@@ -335,14 +335,18 @@ func TestPageWithDelimiter(t *testing.T) {
 }
 
 func TestPageWithShortCodeInSummary(t *testing.T) {
+       s := new(Site)
+       s.prepTemplates()
        p, _ := NewPage("simple.md")
        err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_SHORTCODE_IN_SUMMARY))
-       p.Convert()
        if err != nil {
                t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
        }
+       p.ProcessShortcodes(s.Tmpl)
+       p.Convert()
+
        checkPageTitle(t, p, "Simple")
-       checkPageContent(t, p, "<p>Summary Next Line. {{% img src=&ldquo;/not/real&rdquo; %}}.\nMore text here.</p>\n\n<p>Some more text</p>\n")
+       checkPageContent(t, p, "<p>Summary Next Line. \n<figure >\n    \n        <img src=\"/not/real\" />\n    \n    \n</figure>\n.\nMore text here.</p>\n\n<p>Some more text</p>\n")
        checkPageSummary(t, p, "Summary Next Line. . More text here. Some more text")
        checkPageType(t, p, "page")
        checkPageLayout(t, p, "page/single.html", "_default/single.html", "theme/page/single.html", "theme/_default/single.html")
index ef413bfb30bad52730abe3b9aeacfcc885f1d126..6dfc4ef02d4006e593bfd85fa707bf1efc4f94e0 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright © 2013 Steve Francia <spf@spf13.com>.
+// Copyright © 2013-14 Steve Francia <spf@spf13.com>.
 //
 // Licensed under the Simple Public License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,13 +15,14 @@ package hugolib
 
 import (
        "bytes"
+       "fmt"
+       "github.com/spf13/hugo/helpers"
+       jww "github.com/spf13/jwalterweatherman"
        "html/template"
        "reflect"
+       "regexp"
+       "strconv"
        "strings"
-       "unicode"
-
-       "github.com/spf13/hugo/helpers"
-       jww "github.com/spf13/jwalterweatherman"
 )
 
 type ShortcodeFunc func([]string) string
@@ -76,222 +77,367 @@ func (scp *ShortcodeWithPage) Get(key interface{}) interface{} {
 
 }
 
-type Shortcodes map[string]ShortcodeFunc
+// Note - this value must not contain any markup syntax
+const shortcodePlaceholderPrefix = "HUGOSHORTCODE"
 
-func ShortcodesHandle(stringToParse string, p *Page, t Template) string {
-       leadStart := strings.Index(stringToParse, `{{%`)
-       if leadStart >= 0 {
-               leadEnd := strings.Index(stringToParse[leadStart:], `%}}`) + leadStart
-               if leadEnd > leadStart {
-                       name, par := SplitParams(stringToParse[leadStart+3 : leadEnd])
-                       tmpl := GetTemplate(name, t)
-                       if tmpl == nil {
-                               return stringToParse
-                       }
-                       params := Tokenize(par)
-                       // Always look for closing tag.
-                       endStart, endEnd := FindEnd(stringToParse[leadEnd:], name)
-                       var data = &ShortcodeWithPage{Params: params, Page: p}
-                       if endStart > 0 {
-                               s := stringToParse[leadEnd+3 : leadEnd+endStart]
-                               data.Inner = template.HTML(helpers.RenderBytes([]byte(CleanP(ShortcodesHandle(s, p, t))), p.guessMarkupType(), p.UniqueId()))
-                               remainder := CleanP(stringToParse[leadEnd+endEnd:])
-
-                               return CleanP(stringToParse[:leadStart]) +
-                                       ShortcodeRender(tmpl, data) +
-                                       CleanP(ShortcodesHandle(remainder, p, t))
-                       }
-                       return CleanP(stringToParse[:leadStart]) +
-                               ShortcodeRender(tmpl, data) +
-                               CleanP(ShortcodesHandle(stringToParse[leadEnd+3:], p,
-                                       t))
-               }
-       }
-       return stringToParse
+type shortcode struct {
+       name     string
+       inner    []interface{} // string or nested shortcode
+       params   interface{}   // map or array
+       err      error
+       doMarkup bool
 }
 
-// Clean up odd behavior when closing tag is on first line
-// or opening tag is on the last line due to extra line in markdown file
-func CleanP(str string) string {
-       if strings.HasSuffix(strings.TrimSpace(str), "<p>") {
-               idx := strings.LastIndex(str, "<p>")
-               str = str[:idx]
-       }
+func (sc shortcode) String() string {
+       // for testing (mostly), so any change here will break tests!
+       return fmt.Sprintf("%s(%q, %t){%s}", sc.name, sc.params, sc.doMarkup, sc.inner)
+}
 
-       if strings.HasPrefix(strings.TrimSpace(str), "</p>") {
-               str = str[strings.Index(str, "</p>")+5:]
-       }
+// all in  one go: extract, render and replace
+// only used for testing
+func ShortcodesHandle(stringToParse string, page *Page, t Template) string {
 
-       return str
-}
+       tmpContent, tmpShortcodes := extractAndRenderShortcodes(stringToParse, page, t)
+
+       if len(tmpShortcodes) > 0 {
+               tmpContentWithTokensReplaced, err := replaceShortcodeTokens([]byte(tmpContent), shortcodePlaceholderPrefix, -1, true, tmpShortcodes)
 
-func FindEnd(str string, name string) (int, int) {
-       var endPos int
-       var startPos int
-       var try []string
-
-       try = append(try, "{{% /"+name+" %}}")
-       try = append(try, "{{% /"+name+"%}}")
-       try = append(try, "{{%/"+name+"%}}")
-       try = append(try, "{{%/"+name+" %}}")
-
-       lowest := len(str)
-       for _, x := range try {
-               start := strings.Index(str, x)
-               if start < lowest && start > 0 {
-                       startPos = start
-                       endPos = startPos + len(x)
+               if err != nil {
+                       jww.ERROR.Printf("Fail to replace short code tokens in %s:\n%s", page.BaseFileName(), err.Error())
+               } else {
+                       return string(tmpContentWithTokensReplaced)
                }
        }
 
-       return startPos, endPos
+       return string(tmpContent)
 }
 
-func GetTemplate(name string, t Template) *template.Template {
-       if x := t.Lookup("shortcodes/" + name + ".html"); x != nil {
-               return x
-       }
-       if x := t.Lookup("theme/shortcodes/" + name + ".html"); x != nil {
-               return x
+var isInnerShortcodeCache = make(map[string]bool)
+
+// to avoid potential costly look-aheads for closing tags we look inside the template itself
+// we could change the syntax to self-closing tags, but that would make users cry
+// the value found is cached
+func isInnerShortcode(t *template.Template) bool {
+       if m, ok := isInnerShortcodeCache[t.Name()]; ok {
+               return m
        }
-       return t.Lookup("_internal/shortcodes/" + name + ".html")
+
+       match, _ := regexp.MatchString("{{.*?\\.Inner.*?}}", t.Tree.Root.String())
+       isInnerShortcodeCache[t.Name()] = match
+
+       return match
 }
 
-func StripShortcodes(stringToParse string) string {
-       posStart := strings.Index(stringToParse, "{{%")
-       if posStart > 0 {
-               posEnd := strings.Index(stringToParse[posStart:], "%}}") + posStart
-               if posEnd > posStart {
-                       newString := stringToParse[:posStart] + StripShortcodes(stringToParse[posEnd+3:])
-                       return newString
-               }
+func createShortcodePlaceholder(id int) string {
+       return fmt.Sprintf("<div>%s-%d</div>", shortcodePlaceholderPrefix, id)
+}
+
+func renderShortcodes(sc shortcode, p *Page, t Template) string {
+
+       tokenizedRenderedShortcodes := make(map[string](string))
+       startCount := 0
+
+       shortcodes := renderShortcode(sc, tokenizedRenderedShortcodes, startCount, p, t)
+
+       // placeholders will be numbered from 1.. and top down
+       for i := 1; i <= len(tokenizedRenderedShortcodes); i++ {
+               placeHolder := createShortcodePlaceholder(i)
+               shortcodes = strings.Replace(shortcodes, placeHolder, tokenizedRenderedShortcodes[placeHolder], 1)
        }
-       return stringToParse
+       return shortcodes
 }
 
-func CleanupSpacesAroundEquals(rawfirst []string) []string {
-       var first = make([]string, 0)
+func renderShortcode(sc shortcode, tokenizedShortcodes map[string](string), cnt int, p *Page, t Template) string {
+       var data = &ShortcodeWithPage{Params: sc.params, Page: p}
+       tmpl := GetTemplate(sc.name, t)
 
-       for i := 0; i < len(rawfirst); i++ {
-               v := rawfirst[i]
-               index := strings.Index(v, "=")
+       if tmpl == nil {
+               jww.ERROR.Printf("Unable to locate template for shortcode '%s' in page %s", sc.name, p.BaseFileName())
+               return ""
+       }
 
-               if index == len(v)-1 {
-                       // Trailing '='
-                       if len(rawfirst) > i {
-                               if v == "=" {
-                                       first[len(first)-1] = first[len(first)-1] + v + rawfirst[i+1] // concat prior with this and next
-                                       i++                                                           // Skip next
-                               } else {
-                                       // Trailing ' = '
-                                       first = append(first, v+rawfirst[i+1]) // append this token and the next
-                                       i++                                    // Skip next
-                               }
-                       } else {
-                               break
+       if len(sc.inner) > 0 {
+               var inner string
+               for _, innerData := range sc.inner {
+                       switch innerData.(type) {
+                       case string:
+                               inner += innerData.(string)
+                       case shortcode:
+                               // nested shortcodes will be rendered individually, replace them with temporary numbered tokens
+                               cnt++
+                               placeHolder := createShortcodePlaceholder(cnt)
+                               renderedContent := renderShortcode(innerData.(shortcode), tokenizedShortcodes, cnt, p, t)
+                               tokenizedShortcodes[placeHolder] = renderedContent
+                               inner += placeHolder
+                       default:
+                               jww.ERROR.Printf("Illegal state on shortcode rendering of '%s' in page %s. Illegal type in inner data: %s ",
+                                       sc.name, p.BaseFileName(), reflect.TypeOf(innerData))
+                               return ""
                        }
-               } else if index == 0 {
-                       // Leading '='
-                       first[len(first)-1] = first[len(first)-1] + v // concat this token to the prior one
-                       continue
+               }
+
+               if sc.doMarkup {
+                       data.Inner = template.HTML(helpers.RenderBytes([]byte(inner), p.guessMarkupType(), p.UniqueId()))
                } else {
-                       first = append(first, v)
+                       data.Inner = template.HTML(inner)
                }
+
        }
 
-       return first
+       return ShortcodeRender(tmpl, data)
 }
 
-func Tokenize(in string) interface{} {
-       var final = make([]string, 0)
+func extractAndRenderShortcodes(stringToParse string, p *Page, t Template) (string, map[string]string) {
 
-       // if there isn't a space or an equal sign, no need to parse
-       if strings.Index(in, " ") < 0 && strings.Index(in, "=") < 0 {
-               return append(final, in)
+       content, shortcodes, err := extractShortcodes(stringToParse, p, t)
+       renderedShortcodes := make(map[string]string)
+
+       if err != nil {
+               //  try to render what we have whilst logging the error
+               jww.ERROR.Println(err.Error())
        }
 
-       var keys = make([]string, 0)
-       inQuote := false
-       start := 0
+       for key, sc := range shortcodes {
+               if sc.err != nil {
+                       // need to have something to replace with
+                       renderedShortcodes[key] = ""
+               } else {
+                       renderedShortcodes[key] = renderShortcodes(sc, p, t)
+               }
+       }
 
-       first := CleanupSpacesAroundEquals(strings.Fields(in))
+       return content, renderedShortcodes
 
-       for i, v := range first {
-               index := strings.Index(v, "=")
-               if !inQuote {
-                       if index > 1 {
-                               keys = append(keys, v[:index])
-                               v = v[index+1:]
+}
+
+// pageTokens state:
+// - before: positioned just before the shortcode start
+// - after: shortcode(s) consumed (plural when they are nested)
+func extractShortcode(pt *pageTokens, p *Page, t Template) (shortcode, error) {
+       sc := shortcode{}
+       var isInner = false
+
+       var currItem item
+       var cnt = 0
+
+Loop:
+       for {
+               currItem = pt.next()
+
+               switch currItem.typ {
+               case tLeftDelimScWithMarkup, tLeftDelimScNoMarkup:
+                       next := pt.peek()
+                       if next.typ == tScClose {
+                               continue
                        }
-               }
 
-               // Adjusted to handle htmlencoded and non htmlencoded input
-               if !strings.HasPrefix(v, "&ldquo;") && !strings.HasPrefix(v, "\"") && !inQuote {
-                       final = append(final, v)
-               } else if inQuote && (strings.HasSuffix(v, "&rdquo;") ||
-                       strings.HasSuffix(v, "\"")) && !strings.HasSuffix(v, "\\\"") {
-                       if strings.HasSuffix(v, "\"") {
-                               first[i] = v[:len(v)-1]
+                       if cnt > 0 {
+                               // nested shortcode; append it to inner content
+                               pt.backup3(currItem, next)
+                               nested, err := extractShortcode(pt, p, t)
+                               if err == nil {
+                                       sc.inner = append(sc.inner, nested)
+                               } else {
+                                       return sc, err
+                               }
+
                        } else {
-                               first[i] = v[:len(v)-7]
+                               sc.doMarkup = currItem.typ == tLeftDelimScWithMarkup
                        }
-                       final = append(final, strings.Join(first[start:i+1], " "))
-                       inQuote = false
-               } else if (strings.HasPrefix(v, "&ldquo;") ||
-                       strings.HasPrefix(v, "\"")) && !inQuote {
-                       if strings.HasSuffix(v, "&rdquo;") || strings.HasSuffix(v,
-                               "\"") {
-                               if strings.HasSuffix(v, "\"") {
-                                       if len(v) > 1 {
-                                               final = append(final, v[1:len(v)-1])
-                                       } else {
-                                               final = append(final, "")
-                                       }
+
+                       cnt++
+
+               case tRightDelimScWithMarkup, tRightDelimScNoMarkup:
+                       // we trust the template on this:
+                       // if there's no inner, we're done
+                       if !isInner {
+                               return sc, nil
+                       }
+
+               case tScClose:
+                       if !isInner {
+                               next := pt.peek()
+                               if next.typ == tError {
+                                       // return that error, more specific
+                                       continue
+                               }
+                               return sc, fmt.Errorf("Shortcode '%s' has no .Inner, yet a closing tag was provided", next.val)
+                       }
+                       pt.consume(2)
+                       return sc, nil
+               case tText:
+                       sc.inner = append(sc.inner, currItem.val)
+               case tScName:
+                       sc.name = currItem.val
+                       tmpl := GetTemplate(sc.name, t)
+
+                       if tmpl == nil {
+                               return sc, fmt.Errorf("Unable to locate template for shortcode '%s' in page %s", sc.name, p.BaseFileName())
+                       }
+                       isInner = isInnerShortcode(tmpl)
+
+               case tScParam:
+                       if !pt.isValueNext() {
+                               continue
+                       } else if pt.peek().typ == tScParamVal {
+                               // named params
+                               if sc.params == nil {
+                                       params := make(map[string]string)
+                                       params[currItem.val] = pt.next().val
+                                       sc.params = params
                                } else {
-                                       final = append(final, v[7:len(v)-7])
+                                       params := sc.params.(map[string]string)
+                                       params[currItem.val] = pt.next().val
                                }
                        } else {
-                               start = i
-                               if strings.HasPrefix(v, "\"") {
-                                       first[i] = v[1:]
+                               // positional params
+                               if sc.params == nil {
+                                       var params []string
+                                       params = append(params, currItem.val)
+                                       sc.params = params
                                } else {
-                                       first[i] = v[7:]
+                                       params := sc.params.([]string)
+                                       params = append(params, currItem.val)
+                                       sc.params = params
                                }
-                               inQuote = true
                        }
+
+               case tError, tEOF:
+                       // handled by caller
+                       pt.backup()
+                       break Loop
+
                }
+       }
+       return sc, nil
+}
+
+func extractShortcodes(stringToParse string, p *Page, t Template) (string, map[string]shortcode, error) {
+
+       shortCodes := make(map[string]shortcode)
+
+       startIdx := strings.Index(stringToParse, "{{")
+
+       // short cut for docs with no shortcodes
+       if startIdx < 0 {
+               return stringToParse, shortCodes, nil
+       }
+
+       // the parser takes a string;
+       // since this is an internal API, it could make sense to use the mutable []byte all the way, but
+       // it seems that the time isn't really spent in the byte copy operations, and the impl. gets a lot cleaner
+       pt := &pageTokens{lexer: newShortcodeLexer("parse-page", stringToParse, pos(startIdx))}
+
+       id := 1 // incremented id, will be appended onto temp. shortcode placeholders
+       var result bytes.Buffer
+
+       // the parser is guaranteed to return items in proper order or fail, so …
+       // … it's safe to keep some "global" state
+       var currItem item
+       var currShortcode shortcode
+       var err error
+
+Loop:
+       for {
+               currItem = pt.next()
+
+               switch currItem.typ {
+               case tText:
+                       result.WriteString(currItem.val)
+               case tLeftDelimScWithMarkup, tLeftDelimScNoMarkup:
+                       // let extractShortcode handle left delim (will do so recursively)
+                       pt.backup()
+                       if currShortcode, err = extractShortcode(pt, p, t); err != nil {
+                               return result.String(), shortCodes, err
+                       }
+
+                       if currShortcode.params == nil {
+                               currShortcode.params = make([]string, 0)
+                       }
 
-               // No closing "... just make remainder the final token
-               if inQuote && i == len(first) {
-                       final = append(final, first[start:]...)
+                       // wrap it in a block level element to let it be left alone by the markup engine
+                       placeHolder := createShortcodePlaceholder(id)
+                       result.WriteString(placeHolder)
+                       shortCodes[placeHolder] = currShortcode
+                       id++
+               case tEOF:
+                       break Loop
+               case tError:
+                       err := fmt.Errorf("%s:%d: %s",
+                               p.BaseFileName(), (p.lineNumRawContentStart() + pt.lexer.lineNum() - 1), currItem)
+                       currShortcode.err = err
+                       return result.String(), shortCodes, err
                }
        }
 
-       if len(keys) > 0 && (len(keys) != len(final)) {
-               // This will happen if the quotes aren't balanced
-               return final
+       return result.String(), shortCodes, nil
+
+}
+
+// Replace prefixed shortcode tokens (HUGOSHORTCODE-1, HUGOSHORTCODE-2) with the real content.
+// This assumes that all tokens exist in the input string and that they are in order.
+// numReplacements = -1 will do len(replacements), and it will always start from the beginning (1)
+// wrappendInDiv = true means that the token is wrapped in a <div></div>
+func replaceShortcodeTokens(source []byte, prefix string, numReplacements int, wrappedInDiv bool, replacements map[string]string) ([]byte, error) {
+
+       if numReplacements < 0 {
+               numReplacements = len(replacements)
        }
 
-       if len(keys) > 0 {
-               var m = make(map[string]string)
-               for i, k := range keys {
-                       m[k] = final[i]
+       if numReplacements == 0 {
+               return source, nil
+       }
+
+       newLen := len(source)
+
+       for i := 1; i <= numReplacements; i++ {
+               key := prefix + "-" + strconv.Itoa(i)
+
+               if wrappedInDiv {
+                       key = "<div>" + key + "</div>"
                }
+               val := []byte(replacements[key])
 
-               return m
+               newLen += (len(val) - len(key))
        }
 
-       return final
-}
+       buff := make([]byte, newLen)
 
-func SplitParams(in string) (name string, par2 string) {
-       newIn := strings.TrimSpace(in)
-       i := strings.IndexFunc(newIn, unicode.IsSpace)
-       if i < 1 {
-               return strings.TrimSpace(in), ""
+       width := 0
+       start := 0
+
+       for i := 0; i < numReplacements; i++ {
+               tokenNum := i + 1
+               oldVal := prefix + "-" + strconv.Itoa(tokenNum)
+               if wrappedInDiv {
+                       oldVal = "<div>" + oldVal + "</div>"
+               }
+               newVal := []byte(replacements[oldVal])
+               j := start
+
+               k := bytes.Index(source[start:], []byte(oldVal))
+               if k < 0 {
+                       // this should never happen, but let the caller decide to panic or not
+                       return nil, fmt.Errorf("illegal state in content; shortcode token #%d is missing or out of order", tokenNum)
+               }
+               j += k
+
+               width += copy(buff[width:], source[start:j])
+               width += copy(buff[width:], newVal)
+               start = j + len(oldVal)
        }
+       width += copy(buff[width:], source[start:])
+       return buff[0:width], nil
+}
 
-       return strings.TrimSpace(newIn[:i+1]), strings.TrimSpace(newIn[i+1:])
+func GetTemplate(name string, t Template) *template.Template {
+       if x := t.Lookup("shortcodes/" + name + ".html"); x != nil {
+               return x
+       }
+       if x := t.Lookup("theme/shortcodes/" + name + ".html"); x != nil {
+               return x
+       }
+       return t.Lookup("_internal/shortcodes/" + name + ".html")
 }
 
 func ShortcodeRender(tmpl *template.Template, data *ShortcodeWithPage) string {
index 6a5aadd7934c47d0b952d4bee741f89f86b0b94e..91e2978055445f1b5cb6609206b65fb9a5ba0401 100644 (file)
@@ -1,6 +1,12 @@
 package hugolib
 
 import (
+       "fmt"
+       "github.com/spf13/hugo/helpers"
+       "github.com/spf13/viper"
+       "reflect"
+       "regexp"
+       "sort"
        "strings"
        "testing"
 )
@@ -21,40 +27,40 @@ func CheckShortCodeMatch(t *testing.T, input, expected string, template Template
 
 func TestNonSC(t *testing.T) {
        tem := NewTemplate()
-
-       CheckShortCodeMatch(t, "{{% movie 47238zzb %}}", "{{% movie 47238zzb %}}", tem)
+       // notice the syntax diff from 0.12, now comment delims must be added
+       CheckShortCodeMatch(t, "{{%/* movie 47238zzb */%}}", "{{% movie 47238zzb %}}", tem)
 }
 
 func TestPositionalParamSC(t *testing.T) {
        tem := NewTemplate()
        tem.AddInternalShortcode("video.html", `Playing Video {{ .Get 0 }}`)
 
-       CheckShortCodeMatch(t, "{{% video 47238zzb %}}", "Playing Video 47238zzb", tem)
-       CheckShortCodeMatch(t, "{{% video 47238zzb 132 %}}", "Playing Video 47238zzb", tem)
-       CheckShortCodeMatch(t, "{{%video 47238zzb%}}", "Playing Video 47238zzb", tem)
-       CheckShortCodeMatch(t, "{{%video 47238zzb    %}}", "Playing Video 47238zzb", tem)
-       CheckShortCodeMatch(t, "{{%   video   47238zzb    %}}", "Playing Video 47238zzb", tem)
+       CheckShortCodeMatch(t, "{{< video 47238zzb >}}", "Playing Video 47238zzb", tem)
+       CheckShortCodeMatch(t, "{{< video 47238zzb 132 >}}", "Playing Video 47238zzb", tem)
+       CheckShortCodeMatch(t, "{{<video 47238zzb>}}", "Playing Video 47238zzb", tem)
+       CheckShortCodeMatch(t, "{{<video 47238zzb    >}}", "Playing Video 47238zzb", tem)
+       CheckShortCodeMatch(t, "{{<   video   47238zzb    >}}", "Playing Video 47238zzb", tem)
 }
 
 func TestNamedParamSC(t *testing.T) {
        tem := NewTemplate()
        tem.AddInternalShortcode("img.html", `<img{{ with .Get "src" }} src="{{.}}"{{end}}{{with .Get "class"}} class="{{.}}"{{end}}>`)
 
-       CheckShortCodeMatch(t, `{{% img src="one" %}}`, `<img src="one">`, tem)
-       CheckShortCodeMatch(t, `{{% img class="aspen" %}}`, `<img class="aspen">`, tem)
-       CheckShortCodeMatch(t, `{{% img src= "one" %}}`, `<img src="one">`, tem)
-       CheckShortCodeMatch(t, `{{% img src ="one" %}}`, `<img src="one">`, tem)
-       CheckShortCodeMatch(t, `{{% img src = "one" %}}`, `<img src="one">`, tem)
-       CheckShortCodeMatch(t, `{{% img src = "one" class = "aspen grove" %}}`, `<img src="one" class="aspen grove">`, tem)
+       CheckShortCodeMatch(t, `{{< img src="one" >}}`, `<img src="one">`, tem)
+       CheckShortCodeMatch(t, `{{< img class="aspen" >}}`, `<img class="aspen">`, tem)
+       CheckShortCodeMatch(t, `{{< img src= "one" >}}`, `<img src="one">`, tem)
+       CheckShortCodeMatch(t, `{{< img src ="one" >}}`, `<img src="one">`, tem)
+       CheckShortCodeMatch(t, `{{< img src = "one" >}}`, `<img src="one">`, tem)
+       CheckShortCodeMatch(t, `{{< img src = "one" class = "aspen grove" >}}`, `<img src="one" class="aspen grove">`, tem)
 }
 
 func TestInnerSC(t *testing.T) {
        tem := NewTemplate()
        tem.AddInternalShortcode("inside.html", `<div{{with .Get "class"}} class="{{.}}"{{end}}>{{ .Inner }}</div>`)
 
-       CheckShortCodeMatch(t, `{{% inside class="aspen" %}}`, `<div class="aspen"></div>`, tem)
-       CheckShortCodeMatch(t, `{{% inside class="aspen" %}}More Here{{% /inside %}}`, "<div class=\"aspen\"><p>More Here</p>\n</div>", tem)
-       CheckShortCodeMatch(t, `{{% inside %}}More Here{{% /inside %}}`, "<div><p>More Here</p>\n</div>", tem)
+       CheckShortCodeMatch(t, `{{< inside class="aspen" >}}`, `<div class="aspen"></div>`, tem)
+       CheckShortCodeMatch(t, `{{< inside class="aspen" >}}More Here{{< /inside >}}`, "<div class=\"aspen\">More Here</div>", tem)
+       CheckShortCodeMatch(t, `{{< inside >}}More Here{{< /inside >}}`, "<div>More Here</div>", tem)
 }
 
 func TestInnerSCWithMarkdown(t *testing.T) {
@@ -69,6 +75,28 @@ func TestInnerSCWithMarkdown(t *testing.T) {
 {{% /inside %}}`, "<div><h1>More Here</h1>\n\n<p><a href=\"http://spf13.com\">link</a> and text</p>\n</div>", tem)
 }
 
+func TestInnerSCWithAndWithoutMarkdown(t *testing.T) {
+       tem := NewTemplate()
+       tem.AddInternalShortcode("inside.html", `<div{{with .Get "class"}} class="{{.}}"{{end}}>{{ .Inner }}</div>`)
+
+       CheckShortCodeMatch(t, `{{% inside %}}
+# More Here
+
+[link](http://spf13.com) and text
+
+{{% /inside %}}
+
+And then:
+
+{{< inside >}}
+# More Here
+
+This is **plain** text.
+
+{{< /inside >}}
+`, "<div><h1>More Here</h1>\n\n<p><a href=\"http://spf13.com\">link</a> and text</p>\n</div>\n\nAnd then:\n\n<div>\n# More Here\n\nThis is **plain** text.\n\n</div>\n", tem)
+}
+
 func TestEmbeddedSC(t *testing.T) {
        tem := NewTemplate()
        CheckShortCodeMatch(t, "{{% test %}}", "This is a simple Test", tem)
@@ -76,13 +104,220 @@ func TestEmbeddedSC(t *testing.T) {
        CheckShortCodeMatch(t, `{{% figure src="/found/here" class="bananas orange" caption="This is a caption" %}}`, "\n<figure class=\"bananas orange\">\n    \n        <img src=\"/found/here\" alt=\"This is a caption\" />\n    \n    \n    <figcaption>\n        <p>\n        This is a caption\n        \n            \n        \n        </p> \n    </figcaption>\n    \n</figure>\n", tem)
 }
 
+func TestNestedSC(t *testing.T) {
+       tem := NewTemplate()
+       tem.AddInternalShortcode("scn1.html", `<div>Outer, inner is {{ .Inner }}</div>`)
+       tem.AddInternalShortcode("scn2.html", `<div>SC2</div>`)
+
+       CheckShortCodeMatch(t, `{{% scn1 %}}{{% scn2 %}}{{% /scn1 %}}`, "<div>Outer, inner is <div>SC2</div>\n</div>", tem)
+}
+
+func TestNestedComplexSC(t *testing.T) {
+       tem := NewTemplate()
+       tem.AddInternalShortcode("row.html", `-row-{{ .Inner}}-rowStop-`)
+       tem.AddInternalShortcode("column.html", `-col-{{.Inner    }}-colStop-`)
+       tem.AddInternalShortcode("aside.html", `-aside-{{    .Inner  }}-asideStop-`)
+
+       CheckShortCodeMatch(t, `{{< row >}}1-s{{% column %}}2-**s**{{< aside >}}3-**s**{{< /aside >}}4-s{{% /column %}}5-s{{< /row >}}6-s`,
+               "-row-1-s-col-<p>2-<strong>s</strong>-aside-3-**s**-asideStop-4-s</p>\n-colStop-5-s-rowStop-6-s", tem)
+
+       // turn around the markup flag
+       CheckShortCodeMatch(t, `{{% row %}}1-s{{< column >}}2-**s**{{% aside %}}3-**s**{{% /aside %}}4-s{{< /column >}}5-s{{% /row %}}6-s`,
+               "-row-<p>1-s-col-2-**s**-aside-<p>3-<strong>s</strong></p>\n-asideStop-4-s-colStop-5-s</p>\n-rowStop-6-s", tem)
+}
+
 func TestFigureImgWidth(t *testing.T) {
        tem := NewTemplate()
        CheckShortCodeMatch(t, `{{% figure src="/found/here" class="bananas orange" alt="apple" width="100px" %}}`, "\n<figure class=\"bananas orange\">\n    \n        <img src=\"/found/here\" alt=\"apple\" width=\"100px\" />\n    \n    \n</figure>\n", tem)
 }
 
-func TestUnbalancedQuotes(t *testing.T) {
+func TestHighlight(t *testing.T) {
+       if !helpers.HasPygments() {
+               t.Skip("Skip test as Pygments is not installed")
+       }
+       defer viper.Set("PygmentsStyle", viper.Get("PygmentsStyle"))
+       viper.Set("PygmentsStyle", "bw")
+
        tem := NewTemplate()
 
-       CheckShortCodeMatch(t, `{{% figure src="/uploads/2011/12/spf13-mongosv-speaking-copy-1024x749.jpg "Steve Francia speaking at OSCON 2012" alt="MongoSV 2011" %}}`, "\n<figure >\n    \n        <img src=\"/uploads/2011/12/spf13-mongosv-speaking-copy-1024x749.jpg%20%22Steve%20Francia%20speaking%20at%20OSCON%202012\" alt=\"MongoSV 2011\" />\n    \n    \n</figure>\n", tem)
+       code := `
+{{< highlight java >}}
+void do();
+{{< /highlight >}}`
+       CheckShortCodeMatch(t, code, "\n<div class=\"highlight\" style=\"background: #ffffff\"><pre style=\"line-height: 125%\"><span style=\"font-weight: bold\">void</span> do();\n</pre></div>\n", tem)
+}
+
+const testScPlaceholderRegexp = "<div>HUGOSHORTCODE-\\d+</div>"
+
+func TestExtractShortcodes(t *testing.T) {
+       for i, this := range []struct {
+               name             string
+               input            string
+               expectShortCodes string
+               expect           interface{}
+               expectErrorMsg   string
+       }{
+               {"text", "Some text.", "map[]", "Some text.", ""},
+               {"invalid right delim", "{{< tag }}", "", false, "simple:4:.*unrecognized character.*}"},
+               {"invalid close", "\n{{< /tag >}}", "", false, "simple:5:.*got closing shortcode, but none is open"},
+               {"invalid close2", "\n\n{{< tag >}}{{< /anotherTag >}}", "", false, "simple:6: closing tag for shortcode 'anotherTag' does not match start tag"},
+               {"unterminated quote 1", `{{< figure src="im caption="S" >}}`, "", false, "simple:4:.got pos.*"},
+               {"unterminated quote 1", `{{< figure src="im" caption="S >}}`, "", false, "simple:4:.*unterm.*}"},
+               {"one shortcode, no markup", "{{< tag >}}", "", testScPlaceholderRegexp, ""},
+               {"one shortcode, markup", "{{% tag %}}", "", testScPlaceholderRegexp, ""},
+               {"one pos param", "{{% tag param1 %}}", `tag([\"param1\"], true){[]}"]`, testScPlaceholderRegexp, ""},
+               {"two pos params", "{{< tag param1 param2>}}", `tag([\"param1\" \"param2\"], false){[]}"]`, testScPlaceholderRegexp, ""},
+               {"one named param", `{{% tag param1="value" %}}`, `tag(map[\"param1\":\"value\"], true){[]}`, testScPlaceholderRegexp, ""},
+               {"two named params", `{{< tag param1="value1" param2="value2" >}}`, `tag(map[\"param1\":\"value1\" \"param2\":\"value2\"], false){[]}"]`,
+                       testScPlaceholderRegexp, ""},
+               {"inner", `Some text. {{< inner >}}Inner Content{{< / inner >}}. Some more text.`, `inner([], false){[Inner Content]}`,
+                       fmt.Sprintf("Some text. %s. Some more text.", testScPlaceholderRegexp), ""},
+               {"close, but not inner", "{{< tag >}}foo{{< /tag >}}", "", false, "Shortcode 'tag' has no .Inner.*"},
+               {"nested inner", `Inner->{{< inner >}}Inner Content->{{% inner2 param1 %}}inner2txt{{% /inner2 %}}Inner close->{{< / inner >}}<-done`,
+                       `inner([], false){[Inner Content-> inner2([\"param1\"], true){[inner2txt]} Inner close->]}`,
+                       fmt.Sprintf("Inner->%s<-done", testScPlaceholderRegexp), ""},
+               {"nested, nested inner", `Inner->{{< inner >}}inner2->{{% inner2 param1 %}}inner2txt->inner3{{< inner3>}}inner3txt{{</ inner3 >}}{{% /inner2 %}}final close->{{< / inner >}}<-done`,
+                       `inner([], false){[inner2-> inner2([\"param1\"], true){[inner2txt->inner3 inner3(%!q(<nil>), false){[inner3txt]}]} final close->`,
+                       fmt.Sprintf("Inner->%s<-done", testScPlaceholderRegexp), ""},
+               {"two inner", `Some text. {{% inner %}}First **Inner** Content{{% / inner %}} {{< inner >}}Inner **Content**{{< / inner >}}. Some more text.`,
+                       `map["<div>HUGOSHORTCODE-1</div>:inner([], true){[First **Inner** Content]}" "<div>HUGOSHORTCODE-2</div>:inner([], false){[Inner **Content**]}"]`,
+                       fmt.Sprintf("Some text. %s %s. Some more text.", testScPlaceholderRegexp, testScPlaceholderRegexp), ""},
+               {"closed without content", `Some text. {{< inner param1 >}}{{< / inner >}}. Some more text.`, `inner([\"param1\"], false){[]}`,
+                       fmt.Sprintf("Some text. %s. Some more text.", testScPlaceholderRegexp), ""},
+               {"two shortcodes", "{{< sc1 >}}{{< sc2 >}}",
+                       `map["<div>HUGOSHORTCODE-1</div>:sc1([], false){[]}" "<div>HUGOSHORTCODE-2</div>:sc2([], false){[]}"]`,
+                       testScPlaceholderRegexp + testScPlaceholderRegexp, ""},
+               {"mix of shortcodes", `Hello {{< sc1 >}}world{{% sc2 p2="2"%}}. And that's it.`,
+                       `map["<div>HUGOSHORTCODE-1</div>:sc1([], false){[]}" "<div>HUGOSHORTCODE-2</div>:sc2(map[\"p2\":\"2\"]`,
+                       fmt.Sprintf("Hello %sworld%s. And that's it.", testScPlaceholderRegexp, testScPlaceholderRegexp), ""},
+               {"mix with inner", `Hello {{< sc1 >}}world{{% inner p2="2"%}}Inner{{%/ inner %}}. And that's it.`,
+                       `map["<div>HUGOSHORTCODE-1</div>:sc1([], false){[]}" "<div>HUGOSHORTCODE-2</div>:inner(map[\"p2\":\"2\"], true){[Inner]}"]`,
+                       fmt.Sprintf("Hello %sworld%s. And that's it.", testScPlaceholderRegexp, testScPlaceholderRegexp), ""},
+       } {
+
+               p, _ := pageFromString(SIMPLE_PAGE, "simple.md")
+               tem := NewTemplate()
+               tem.AddInternalShortcode("tag.html", `tag`)
+               tem.AddInternalShortcode("sc1.html", `sc1`)
+               tem.AddInternalShortcode("sc2.html", `sc2`)
+               tem.AddInternalShortcode("inner.html", `{{.Inner}}`)
+               tem.AddInternalShortcode("inner2.html", `{{.Inner}}`)
+               tem.AddInternalShortcode("inner3.html", `{{.Inner}}`)
+
+               content, shortCodes, err := extractShortcodes(this.input, p, tem)
+
+               if b, ok := this.expect.(bool); ok && !b {
+                       if err == nil {
+                               t.Fatalf("[%d] %s: ExtractShortcodes didn't return an expected error", i, this.name)
+                       } else {
+                               r, _ := regexp.Compile(this.expectErrorMsg)
+                               if !r.MatchString(err.Error()) {
+                                       t.Fatalf("[%d] %s: ExtractShortcodes didn't return an expected error message, expected %s got %s",
+                                               i, this.name, this.expectErrorMsg, err.Error())
+                               }
+                       }
+                       continue
+               } else {
+                       if err != nil {
+                               t.Fatalf("[%d] %s: failed: %q", i, this.name, err)
+                       }
+               }
+
+               var expected string
+               av := reflect.ValueOf(this.expect)
+               switch av.Kind() {
+               case reflect.String:
+                       expected = av.String()
+               }
+
+               r, err := regexp.Compile(expected)
+
+               if err != nil {
+                       t.Fatalf("[%d] %s: Failed to compile regexp %q: %q", i, this.name, expected, err)
+               }
+
+               if strings.Count(content, shortcodePlaceholderPrefix) != len(shortCodes) {
+                       t.Fatalf("[%d] %s: Not enough placeholders, found %d", i, this.name, len(shortCodes))
+               }
+
+               if !r.MatchString(content) {
+                       t.Fatalf("[%d] %s: Shortcode extract didn't match. Expected: %q, Got: %q", i, this.name, expected, content)
+               }
+
+               for placeHolder, sc := range shortCodes {
+                       if !strings.Contains(content, placeHolder) {
+                               t.Fatalf("[%d] %s: Output does not contain placeholder %q", i, this.name, placeHolder)
+                       }
+
+                       if sc.params == nil {
+                               t.Fatalf("[%d] %s: Params is nil for shortcode '%s'", i, this.name, sc.name)
+                       }
+               }
+
+               if this.expectShortCodes != "" {
+                       shortCodesAsStr := fmt.Sprintf("map%q", collectAndShortShortcodes(shortCodes))
+                       if !strings.Contains(shortCodesAsStr, this.expectShortCodes) {
+                               t.Fatalf("[%d] %s: Short codes not as expected, got %s - expected to contain %s", i, this.name, shortCodesAsStr, this.expectShortCodes)
+                       }
+               }
+       }
+}
+
+func collectAndShortShortcodes(shortcodes map[string]shortcode) []string {
+       var asArray []string
+
+       for key, sc := range shortcodes {
+               asArray = append(asArray, fmt.Sprintf("%s:%s", key, sc))
+       }
+
+       sort.Strings(asArray)
+       return asArray
+
+}
+
+func TestReplaceShortcodeTokens(t *testing.T) {
+       for i, this := range []struct {
+               input           []byte
+               prefix          string
+               replacements    map[string]string
+               numReplacements int
+               wrappedInDiv    bool
+               expect          interface{}
+       }{
+               {[]byte("Hello PREFIX-1."), "PREFIX",
+                       map[string]string{"PREFIX-1": "World"}, -1, false, []byte("Hello World.")},
+               {[]byte("A <div>A-1</div> asdf <div>A-2</div>."), "A",
+                       map[string]string{"<div>A-1</div>": "v1", "<div>A-2</div>": "v2"}, -1, true, []byte("A v1 asdf v2.")},
+               {[]byte("Hello PREFIX2-1. Go PREFIX2-2, Go, Go PREFIX2-3 Go Go!."), "PREFIX2",
+                       map[string]string{"PREFIX2-1": "Europe", "PREFIX2-2": "Jonny", "PREFIX2-3": "Johnny"},
+                       -1, false, []byte("Hello Europe. Go Jonny, Go, Go Johnny Go Go!.")},
+               {[]byte("A PREFIX-2 PREFIX-1."), "PREFIX",
+                       map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, -1, false, false},
+               {[]byte("A PREFIX-1 PREFIX-2"), "PREFIX",
+                       map[string]string{"PREFIX-1": "A"}, -1, false, []byte("A A PREFIX-2")},
+               {[]byte("A PREFIX-1 but not the second."), "PREFIX",
+                       map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, -1, false, false},
+               {[]byte("An PREFIX-1."), "PREFIX",
+                       map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, 1, false, []byte("An A.")},
+               {[]byte("An PREFIX-1 PREFIX-2."), "PREFIX",
+                       map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, 1, false, []byte("An A PREFIX-2.")},
+       } {
+               results, err := replaceShortcodeTokens(this.input, this.prefix, this.numReplacements, this.wrappedInDiv, this.replacements)
+
+               if b, ok := this.expect.(bool); ok && !b {
+                       if err == nil {
+                               t.Errorf("[%d] replaceShortcodeTokens didn't return an expected error", i)
+                       }
+               } else {
+                       if err != nil {
+                               t.Errorf("[%d] failed: %s", i, err)
+                               continue
+                       }
+                       if !reflect.DeepEqual(results, this.expect) {
+                               t.Errorf("[%d] replaceShortcodeTokens, got %q but expected %q", i, results, this.expect)
+                       }
+               }
+
+       }
+
 }
diff --git a/hugolib/shortcodeparser.go b/hugolib/shortcodeparser.go
new file mode 100644 (file)
index 0000000..457c295
--- /dev/null
@@ -0,0 +1,598 @@
+// Copyright © 2013-14 Steve Francia <spf@spf13.com>.
+//
+// Licensed under the Simple Public License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://opensource.org/licenses/Simple-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hugolib
+
+import (
+       "fmt"
+       "strings"
+       "unicode"
+       "unicode/utf8"
+)
+
+// The lexical scanning below is highly inspired by the great talk given by
+// Rob Pike called "Lexical Scanning in Go" (it's on YouTube, Google it!).
+// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
+
+// parsing
+
+type pageTokens struct {
+       lexer     *pagelexer
+       token     [3]item // 3-item look-ahead is what we currently need
+       peekCount int
+}
+
+func (t *pageTokens) next() item {
+       if t.peekCount > 0 {
+               t.peekCount--
+       } else {
+               t.token[0] = t.lexer.nextItem()
+       }
+       return t.token[t.peekCount]
+}
+
+// backs up one token.
+func (t *pageTokens) backup() {
+       t.peekCount++
+}
+
+// backs up two tokens.
+func (t *pageTokens) backup2(t1 item) {
+       t.token[1] = t1
+       t.peekCount = 2
+}
+
+// backs up three tokens.
+func (t *pageTokens) backup3(t2, t1 item) {
+       t.token[1] = t1
+       t.token[2] = t2
+       t.peekCount = 3
+}
+
+// check for non-error and non-EOF types coming next
+func (t *pageTokens) isValueNext() bool {
+       i := t.peek()
+       return i.typ != tError && i.typ != tEOF
+}
+
+// look at, but do not consume, the next item
+// repeated, sequential calls will return the same item
+func (t *pageTokens) peek() item {
+       if t.peekCount > 0 {
+               return t.token[t.peekCount-1]
+       }
+       t.peekCount = 1
+       t.token[0] = t.lexer.nextItem()
+       return t.token[0]
+}
+
+// convencience method to consume the next n tokens, but back off Errors and EOF
+func (t *pageTokens) consume(cnt int) {
+       for i := 0; i < cnt; i++ {
+               token := t.next()
+               if token.typ == tError || token.typ == tEOF {
+                       t.backup()
+                       break
+               }
+       }
+}
+
+// lexical scanning
+
+// position (in bytes)
+type pos int
+
+type item struct {
+       typ itemType
+       pos pos
+       val string
+}
+
+func (i item) String() string {
+       switch {
+       case i.typ == tEOF:
+               return "EOF"
+       case i.typ == tError:
+               return i.val
+       case i.typ > tKeywordMarker:
+               return fmt.Sprintf("<%s>", i.val)
+       case len(i.val) > 20:
+               return fmt.Sprintf("%.20q...", i.val)
+       }
+       return fmt.Sprintf("[%s]", i.val)
+}
+
+type itemType int
+
+// named params in shortcodes
+type namedParam struct {
+       name  string
+       value string
+}
+
+// for testing
+func (np namedParam) String() string {
+       return fmt.Sprintf("%s=%s", np.name, np.value)
+}
+
+const (
+       tError itemType = iota
+       tEOF
+
+       // shortcode items
+       tLeftDelimScNoMarkup
+       tRightDelimScNoMarkup
+       tLeftDelimScWithMarkup
+       tRightDelimScWithMarkup
+       tScClose
+       tScName
+       tScParam
+       tScParamVal
+
+       //itemIdentifier
+       tText // plain text, used for everything outside the shortcodes
+
+       // preserved for later - keywords come after this
+       tKeywordMarker
+)
+
+const eof = -1
+
+// returns the next state in scanner.
+type stateFunc func(*pagelexer) stateFunc
+
+type pagelexer struct {
+       name    string
+       input   string
+       state   stateFunc
+       pos     pos // input position
+       start   pos // item start position
+       width   pos // width of last element
+       lastPos pos // position of the last item returned by nextItem
+
+       // shortcode state
+       currLeftDelimItem  itemType
+       currRightDelimItem itemType
+       currShortcodeName  string          // is only set when a shortcode is in opened state
+       closingState       int             // > 0 = on it's way to be closed
+       elementStepNum     int             // step number in element
+       paramElements      int             // number of elements (name + value = 2) found first
+       openShortcodes     map[string]bool // set of shortcodes in open state
+
+       // items delivered to client
+       items chan item
+}
+
+// note: the input position here is normally 0 (start), but
+// can be set if position of first shortcode is known
+func newShortcodeLexer(name, input string, inputPosition pos) *pagelexer {
+       lexer := &pagelexer{
+               name:               name,
+               input:              input,
+               currLeftDelimItem:  tLeftDelimScNoMarkup,
+               currRightDelimItem: tRightDelimScNoMarkup,
+               pos:                inputPosition,
+               openShortcodes:     make(map[string]bool),
+               items:              make(chan item),
+       }
+       go lexer.runShortcodeLexer()
+       return lexer
+}
+
+// main loop
+// this looks kind of funky, but it works
+func (l *pagelexer) runShortcodeLexer() {
+       for l.state = lexTextOutsideShortcodes; l.state != nil; {
+               l.state = l.state(l)
+       }
+
+       close(l.items)
+}
+
+// state functions
+
+const (
+       leftDelimScNoMarkup    = "{{<"
+       rightDelimScNoMarkup   = ">}}"
+       leftDelimScWithMarkup  = "{{%"
+       rightDelimScWithMarkup = "%}}"
+       leftComment            = "/*" // comments in this context us used to to mark shortcodes as "not really a shortcode"
+       rightComment           = "*/"
+)
+
+func (l *pagelexer) next() rune {
+       if int(l.pos) >= len(l.input) {
+               l.width = 0
+               return eof
+       }
+
+       // looks expensive, but should produce the same iteration sequence as the string range loop
+       // see: http://blog.golang.org/strings
+       runeValue, runeWidth := utf8.DecodeRuneInString(l.input[l.pos:])
+       l.width = pos(runeWidth)
+       l.pos += l.width
+       return runeValue
+}
+
+// peek, but no consume
+func (l *pagelexer) peek() rune {
+       r := l.next()
+       l.backup()
+       return r
+}
+
+// steps back one
+func (l *pagelexer) backup() {
+       l.pos -= l.width
+}
+
+// sends an item back to the client.
+func (l *pagelexer) emit(t itemType) {
+       l.items <- item{t, l.start, l.input[l.start:l.pos]}
+       l.start = l.pos
+}
+
+// special case, do not send '\\' back to client
+func (l *pagelexer) ignoreEscapesAndEmit(t itemType) {
+       val := strings.Map(func(r rune) rune {
+               if r == '\\' {
+                       return -1
+               }
+               return r
+       }, l.input[l.start:l.pos])
+       l.items <- item{t, l.start, val}
+       l.start = l.pos
+}
+
+// gets the current value (for debugging and error handling)
+func (l *pagelexer) current() string {
+       return l.input[l.start:l.pos]
+}
+
+// ignore current element
+func (l *pagelexer) ignore() {
+       l.start = l.pos
+}
+
+// nice to have in error logs
+func (l *pagelexer) lineNum() int {
+       return strings.Count(l.input[:l.lastPos], "\n") + 1
+}
+
+// nil terminates the parser
+func (l *pagelexer) errorf(format string, args ...interface{}) stateFunc {
+       l.items <- item{tError, l.start, fmt.Sprintf(format, args...)}
+       return nil
+}
+
+// consumes and returns the next item
+func (l *pagelexer) nextItem() item {
+       item := <-l.items
+       l.lastPos = item.pos
+       return item
+}
+
+// scans until an opening shortcode opening bracket.
+// if no shortcodes, it will keep on scanning until EOF
+func lexTextOutsideShortcodes(l *pagelexer) stateFunc {
+       for {
+               if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup) {
+                       if l.pos > l.start {
+                               l.emit(tText)
+                       }
+                       if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) {
+                               l.currLeftDelimItem = tLeftDelimScWithMarkup
+                               l.currRightDelimItem = tRightDelimScWithMarkup
+                       } else {
+                               l.currLeftDelimItem = tLeftDelimScNoMarkup
+                               l.currRightDelimItem = tRightDelimScNoMarkup
+                       }
+                       return lexShortcodeLeftDelim
+
+               }
+               if l.next() == eof {
+                       break
+               }
+       }
+       // Done!
+       if l.pos > l.start {
+               l.emit(tText)
+       }
+       l.emit(tEOF)
+       return nil
+}
+
+func lexShortcodeLeftDelim(l *pagelexer) stateFunc {
+       l.pos += pos(len(l.currentLeftShortcodeDelim()))
+       if strings.HasPrefix(l.input[l.pos:], leftComment) {
+               return lexShortcodeComment
+       }
+       l.emit(l.currentLeftShortcodeDelimItem())
+       l.elementStepNum = 0
+       l.paramElements = 0
+       return lexInsideShortcode
+}
+
+func lexShortcodeComment(l *pagelexer) stateFunc {
+       posRightComment := strings.Index(l.input[l.pos:], rightComment)
+       if posRightComment < 0 {
+               return l.errorf("comment must be closed")
+       }
+       // we emit all as text, except the comment markers
+       l.emit(tText)
+       l.pos += pos(len(leftComment))
+       l.ignore()
+       l.pos += pos(posRightComment - len(leftComment))
+       l.emit(tText)
+       l.pos += pos(len(rightComment))
+       l.ignore()
+       if !strings.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
+               return l.errorf("comment ends before the right shortcode delimiter")
+       }
+       l.pos += pos(len(l.currentRightShortcodeDelim()))
+       l.emit(tText)
+       return lexTextOutsideShortcodes
+}
+
+func lexShortcodeRightDelim(l *pagelexer) stateFunc {
+       l.pos += pos(len(l.currentRightShortcodeDelim()))
+       l.emit(l.currentRightShortcodeDelimItem())
+       return lexTextOutsideShortcodes
+}
+
+// either:
+// 1. param
+// 2. "param" or "param\"
+// 3. param="123" or param="123\"
+// 4. param="Some \"escaped\" text"
+func lexShortcodeParam(l *pagelexer, escapedQuoteStart bool) stateFunc {
+
+       first := true
+       nextEq := false
+
+       var r rune
+
+       for {
+               r = l.next()
+               if first {
+                       if r == '"' {
+                               // a positional param with quotes
+                               if l.paramElements == 2 {
+                                       return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters")
+                               }
+                               l.paramElements = 1
+                               l.backup()
+                               return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam)
+                       }
+                       first = false
+               } else if r == '=' {
+                       // a named param
+                       l.backup()
+                       nextEq = true
+                       break
+               }
+
+               if !isValidParamRune(r) {
+                       l.backup()
+                       break
+               }
+       }
+
+       if l.paramElements == 0 {
+               l.paramElements++
+
+               if nextEq {
+                       l.paramElements++
+               }
+       } else {
+               if nextEq && l.paramElements == 1 {
+                       return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current())
+               } else if !nextEq && l.paramElements == 2 {
+                       return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current())
+               }
+       }
+
+       l.emit(tScParam)
+       return lexInsideShortcode
+
+}
+
+func lexShortcodeQuotedParamVal(l *pagelexer, escapedQuotedValuesAllowed bool, typ itemType) stateFunc {
+       openQuoteFound := false
+       escapedInnerQuoteFound := false
+       escapedQuoteState := 0
+
+Loop:
+       for {
+               switch r := l.next(); {
+               case r == '\\':
+                       if l.peek() == '"' {
+                               if openQuoteFound && !escapedQuotedValuesAllowed {
+                                       l.backup()
+                                       break Loop
+                               } else if openQuoteFound {
+                                       // the coming quoute is inside
+                                       escapedInnerQuoteFound = true
+                                       escapedQuoteState = 1
+                               }
+                       }
+               case r == eof, r == '\n':
+                       return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current())
+               case r == '"':
+                       if escapedQuoteState == 0 {
+                               if openQuoteFound {
+                                       l.backup()
+                                       break Loop
+
+                               } else {
+                                       openQuoteFound = true
+                                       l.ignore()
+                               }
+                       } else {
+                               escapedQuoteState = 0
+                       }
+
+               }
+       }
+
+       if escapedInnerQuoteFound {
+               l.ignoreEscapesAndEmit(typ)
+       } else {
+               l.emit(typ)
+       }
+
+       r := l.next()
+
+       if r == '\\' {
+               if l.peek() == '"' {
+                       // ignore the escaped closing quote
+                       l.ignore()
+                       l.next()
+                       l.ignore()
+               }
+       } else if r == '"' {
+               // ignore closing quote
+               l.ignore()
+       } else {
+               // handled by next state
+               l.backup()
+       }
+
+       return lexInsideShortcode
+}
+
+// scans an alphanumeric inside shortcode
+func lexIdentifierInShortcode(l *pagelexer) stateFunc {
+       lookForEnd := false
+Loop:
+       for {
+               switch r := l.next(); {
+               case isAlphaNumeric(r):
+               default:
+                       l.backup()
+                       word := l.input[l.start:l.pos]
+                       if l.closingState > 0 && !l.openShortcodes[word] {
+                               return l.errorf("closing tag for shortcode '%s' does not match start tag", word)
+                       } else if l.closingState > 0 {
+                               l.openShortcodes[word] = false
+                               lookForEnd = true
+                       }
+
+                       l.closingState = 0
+                       l.currShortcodeName = word
+                       l.openShortcodes[word] = true
+                       l.elementStepNum++
+                       l.emit(tScName)
+                       break Loop
+               }
+       }
+
+       if lookForEnd {
+               return lexEndOfShortcode
+       }
+       return lexInsideShortcode
+}
+
+func lexEndOfShortcode(l *pagelexer) stateFunc {
+       if strings.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
+               return lexShortcodeRightDelim
+       }
+       switch r := l.next(); {
+       case isSpace(r):
+               l.ignore()
+       default:
+               return l.errorf("unclosed shortcode")
+       }
+       return lexEndOfShortcode
+}
+
+// scans the elements inside shortcode tags
+func lexInsideShortcode(l *pagelexer) stateFunc {
+       if strings.HasPrefix(l.input[l.pos:], l.currentRightShortcodeDelim()) {
+               return lexShortcodeRightDelim
+       }
+       switch r := l.next(); {
+       case r == eof:
+               // eol is allowed inside shortcodes; this may go to end of document before it fails
+               return l.errorf("unclosed shortcode action")
+       case isSpace(r), isEndOfLine(r):
+               l.ignore()
+       case r == '=':
+               l.ignore()
+               return lexShortcodeQuotedParamVal(l, l.peek() != '\\', tScParamVal)
+       case r == '/':
+               if l.currShortcodeName == "" {
+                       return l.errorf("got closing shortcode, but none is open")
+               }
+               l.closingState++
+               l.emit(tScClose)
+       case r == '\\':
+               l.ignore()
+               if l.peek() == '"' {
+                       return lexShortcodeParam(l, true)
+               }
+       case l.elementStepNum > 0 && (isValidParamRune(r) || r == '"'): // positional params can have quotes
+               l.backup()
+               return lexShortcodeParam(l, false)
+       case isAlphaNumeric(r):
+               l.backup()
+               return lexIdentifierInShortcode
+       default:
+               return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r)
+       }
+       return lexInsideShortcode
+}
+
+// state helpers
+
+func (l *pagelexer) currentLeftShortcodeDelimItem() itemType {
+       return l.currLeftDelimItem
+}
+
+func (l *pagelexer) currentRightShortcodeDelimItem() itemType {
+       return l.currRightDelimItem
+}
+
+func (l *pagelexer) currentLeftShortcodeDelim() string {
+       if l.currLeftDelimItem == tLeftDelimScWithMarkup {
+               return leftDelimScWithMarkup
+       }
+       return leftDelimScNoMarkup
+
+}
+
+func (l *pagelexer) currentRightShortcodeDelim() string {
+       if l.currRightDelimItem == tRightDelimScWithMarkup {
+               return rightDelimScWithMarkup
+       }
+       return rightDelimScNoMarkup
+}
+
+// helper functions
+
+func isSpace(r rune) bool {
+       return r == ' ' || r == '\t'
+}
+
+func isValidParamRune(r rune) bool {
+       // let unquoted YouTube ids as positional params slip through (they contain hyphens)
+       return isAlphaNumeric(r) || r == '-'
+}
+
+func isEndOfLine(r rune) bool {
+       return r == '\r' || r == '\n'
+}
+
+func isAlphaNumeric(r rune) bool {
+       return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
+}
diff --git a/hugolib/shortcodeparser_test.go b/hugolib/shortcodeparser_test.go
new file mode 100644 (file)
index 0000000..8fa0244
--- /dev/null
@@ -0,0 +1,162 @@
+package hugolib
+
+import (
+       "testing"
+)
+
+type shortCodeLexerTest struct {
+       name  string
+       input string
+       items []item
+}
+
+var (
+       tstEOF       = item{tEOF, 0, ""}
+       tstLeftNoMD  = item{tLeftDelimScNoMarkup, 0, "{{<"}
+       tstRightNoMD = item{tRightDelimScNoMarkup, 0, ">}}"}
+       tstLeftMD    = item{tLeftDelimScWithMarkup, 0, "{{%"}
+       tstRightMD   = item{tRightDelimScWithMarkup, 0, "%}}"}
+       tstSCClose   = item{tScClose, 0, "/"}
+       tstSC1       = item{tScName, 0, "sc1"}
+       tstSC2       = item{tScName, 0, "sc2"}
+       tstSC3       = item{tScName, 0, "sc3"}
+       tstParam1    = item{tScParam, 0, "param1"}
+       tstParam2    = item{tScParam, 0, "param2"}
+       tstVal       = item{tScParamVal, 0, "Hello World"}
+)
+
+var shortCodeLexerTests = []shortCodeLexerTest{
+       {"empty", "", []item{tstEOF}},
+       {"spaces", " \t\n", []item{{tText, 0, " \t\n"}, tstEOF}},
+       {"text", `to be or not`, []item{{tText, 0, "to be or not"}, tstEOF}},
+       {"no markup", `{{< sc1 >}}`, []item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
+       {"with EOL", "{{< sc1 \n >}}", []item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
+
+       {"simple with markup", `{{% sc1 %}}`, []item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
+       {"with spaces", `{{<     sc1     >}}`, []item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
+       {"mismatched rightDelim", `{{< sc1 %}}`, []item{tstLeftNoMD, tstSC1,
+               {tError, 0, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"}}},
+       {"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []item{
+               tstLeftMD,
+               tstSC1,
+               tstRightMD,
+               {tText, 0, " inner "},
+               tstLeftMD,
+               tstSCClose,
+               tstSC1,
+               tstRightMD,
+               tstEOF,
+       }},
+       {"close, but no open", `{{< /sc1 >}}`, []item{
+               tstLeftNoMD, {tError, 0, "got closing shortcode, but none is open"}}},
+       {"close wrong", `{{< sc1 >}}{{< /another >}}`, []item{
+               tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
+               {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
+       {"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []item{
+               tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
+               {tError, 0, "closing tag for shortcode 'another' does not match start tag"}}},
+       {"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []item{
+               tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1,
+               {tError, 0, "unclosed shortcode"}}},
+       {"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []item{
+               tstLeftNoMD, tstSC1, item{tScParam, 0, "-ziL-Q_456igdO-4"}, tstRightNoMD, tstEOF}},
+       {"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []item{
+               tstLeftNoMD, tstSC1, item{tScParam, 0, "-ziL-.%QigdO-4"}, tstRightNoMD, tstEOF}},
+
+       {"two params", `{{< sc1 param1   param2 >}}`, []item{
+               tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF}},
+       {"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []item{
+               tstLeftNoMD, tstSC1, tstRightNoMD,
+               tstLeftNoMD, tstSC2, tstRightNoMD,
+               tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF}},
+       {"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []item{
+               tstLeftNoMD, tstSC1, tstRightNoMD,
+               item{tText, 0, "ab"},
+               tstLeftMD, tstSC2, tstParam1, tstRightMD,
+               item{tText, 0, "cd"},
+               tstLeftNoMD, tstSC3, tstRightNoMD,
+               item{tText, 0, "ef"},
+               tstLeftNoMD, tstSCClose, tstSC3, tstRightNoMD,
+               item{tText, 0, "gh"},
+               tstLeftMD, tstSCClose, tstSC2, tstRightMD,
+               item{tText, 0, "ij"},
+               tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD,
+               item{tText, 0, "kl"}, tstEOF,
+       }},
+
+       {"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []item{
+               tstLeftNoMD, tstSC1, item{tScParam, 0, "param nr. 1"}, item{tScParam, 0, "param nr. 2"}, tstRightNoMD, tstEOF}},
+       {"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []item{
+               tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, {tScParamVal, 0, "p2Val"}, tstRightNoMD, tstEOF}},
+       {"escaped quotes", `{{< sc1 param1=\"Hello World\"  >}}`, []item{
+               tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF}},
+       {"escaped quotes, positional param", `{{< sc1 \"param1\"  >}}`, []item{
+               tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF}},
+       {"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\"  >}}`, []item{
+               tstLeftNoMD, tstSC1, tstParam1,
+               item{tScParamVal, 0, `Hello `}, {tError, 0, `got positional parameter 'escaped'. Cannot mix named and positional parameters`}}},
+       {"escaped quotes inside nonescaped quotes",
+               `{{< sc1 param1="Hello \"escaped\" World"  >}}`, []item{
+                       tstLeftNoMD, tstSC1, tstParam1, item{tScParamVal, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
+       {"escaped quotes inside nonescaped quotes in positional param",
+               `{{< sc1 "Hello \"escaped\" World"  >}}`, []item{
+                       tstLeftNoMD, tstSC1, item{tScParam, 0, `Hello "escaped" World`}, tstRightNoMD, tstEOF}},
+       {"unterminated quote", `{{< sc1 param2="Hello World>}}`, []item{
+               tstLeftNoMD, tstSC1, tstParam2, {tError, 0, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"}}},
+       {"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []item{
+               tstLeftNoMD, tstSC1, tstParam1, tstVal,
+               {tError, 0, "got positional parameter 'p2'. Cannot mix named and positional parameters"}}},
+       {"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []item{
+               tstLeftNoMD, tstSC1, tstParam1, tstVal,
+               {tError, 0, "got quoted positional parameter. Cannot mix named and positional parameters"}}},
+       {"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []item{
+               tstLeftNoMD, tstSC1, tstParam1,
+               {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
+       {"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []item{
+               tstLeftNoMD, tstSC1, tstParam1,
+               {tError, 0, "got named parameter 'param2'. Cannot mix named and positional parameters"}}},
+       {"commented out", `{{</* sc1 */>}}`, []item{
+               item{tText, 0, "{{<"}, item{tText, 0, " sc1 "}, item{tText, 0, ">}}"}, tstEOF}},
+       {"commented out, missing close", `{{</* sc1 >}}`, []item{
+               {tError, 0, "comment must be closed"}}},
+       {"commented out, misplaced close", `{{</* sc1 >}}*/`, []item{
+               item{tText, 0, "{{<"}, item{tText, 0, " sc1 >}}"}, {tError, 0, "comment ends before the right shortcode delimiter"}}},
+}
+
+func TestPagelexer(t *testing.T) {
+       for _, test := range shortCodeLexerTests {
+
+               items := collect(&test)
+               if !equal(items, test.items) {
+                       t.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)
+               }
+       }
+}
+
+func collect(t *shortCodeLexerTest) (items []item) {
+       l := newShortcodeLexer(t.name, t.input, 0)
+       for {
+               item := l.nextItem()
+               items = append(items, item)
+               if item.typ == tEOF || item.typ == tError {
+                       break
+               }
+       }
+       return
+}
+
+// no positional checking, for now ...
+func equal(i1, i2 []item) bool {
+       if len(i1) != len(i2) {
+               return false
+       }
+       for k := range i1 {
+               if i1[k].typ != i2[k].typ {
+                       return false
+               }
+               if i1[k].val != i2[k].val {
+                       return false
+               }
+       }
+       return true
+}
index 9509d8ea294dae3543e31ca37810abef5397dc7d..59221093f3f95c0a32ccbcee40dcfa52a5a6e7f7 100644 (file)
@@ -324,12 +324,6 @@ func Highlight(in interface{}, lang string) template.HTML {
                str = av.String()
        }
 
-       if strings.HasPrefix(strings.TrimSpace(str), "<pre><code>") {
-               str = str[strings.Index(str, "<pre><code>")+11:]
-       }
-       if strings.HasSuffix(strings.TrimSpace(str), "</code></pre>") {
-               str = str[:strings.LastIndex(str, "</code></pre>")]
-       }
        return template.HTML(helpers.Highlight(html.UnescapeString(str), lang))
 }