tpl: Fix countwords to handle special chars
authorJulien Midedji <Julien.Midedji@gmail.com>
Mon, 3 May 2021 07:10:06 +0000 (09:10 +0200)
committerGitHub <noreply@github.com>
Mon, 3 May 2021 07:10:06 +0000 (09:10 +0200)
Fixes #8479

tpl/strings/strings.go
tpl/strings/strings_test.go

index b2f02d8f5263c8411791b07e848617b3165ada59..ac2defed50d6b7e74578ff4124f5f2d0865156c1 100644 (file)
@@ -17,6 +17,7 @@ package strings
 import (
        "errors"
        "html/template"
+       "regexp"
        "strings"
        "unicode/utf8"
 
@@ -75,6 +76,15 @@ func (ns *Namespace) CountWords(s interface{}) (int, error) {
                return 0, _errors.Wrap(err, "Failed to convert content to string")
        }
 
+       isCJKLanguage, err := regexp.MatchString(`\p{Han}|\p{Hangul}|\p{Hiragana}|\p{Katakana}`, ss)
+       if err != nil {
+               return 0, _errors.Wrap(err, "Failed to match regex pattern against string")
+       }
+
+       if !isCJKLanguage {
+               return len(strings.Fields(helpers.StripHTML((ss)))), nil
+       }
+
        counter := 0
        for _, word := range strings.Fields(helpers.StripHTML(ss)) {
                runeCount := utf8.RuneCountInString(word)
index f3bb82c63a1969f53755ee40491bd4d4419cf0ed..6e14a408c5af3bfcaa695a56acee895016a40ba9 100644 (file)
@@ -210,6 +210,9 @@ func TestCountWords(t *testing.T) {
                {"Do Be Do Be Do", 5},
                {"旁边", 2},
                {`<div class="test">旁边</div>`, 2},
+               {"Here's to you...", 3},
+               {"Here’s to you...", 3},
+               {"Here’s to you…", 3},
                // errors
                {tstNoStringer{}, false},
        } {