From 11ca84f8cb19f01b7977b549e63e0ce0d126054e Mon Sep 17 00:00:00 2001
From: Anton Ageev <antage@gmail.com>
Date: Sun, 2 Feb 2014 18:18:01 +0400
Subject: [PATCH] Add unicode support for aliases, indexes, urlize template
 filter. Now aliases and indexes are not restricted ASCII letters and can
 include any unicode letters.

---
 helpers/templates.go      | 31 +++++++++++++++++++++++++--
 helpers/templates_test.go | 45 +++++++++++++++++++++++++++++++++++++++
 hugolib/index.go          |  2 +-
 target/alias_test.go      |  1 +
 target/htmlredirect.go    |  2 +-
 5 files changed, 77 insertions(+), 4 deletions(-)
 create mode 100644 helpers/templates_test.go

diff --git a/helpers/templates.go b/helpers/templates.go
index 793450b4..94d6993c 100644
--- a/helpers/templates.go
+++ b/helpers/templates.go
@@ -14,16 +14,43 @@
 package helpers
 
 import (
+	"net/url"
 	"regexp"
 	"strings"
+	"unicode"
 )
 
 var sanitizeRegexp = regexp.MustCompile("[^a-zA-Z0-9./_-]")
 
-func Urlize(url string) string {
-	return Sanitize(strings.ToLower(strings.Replace(strings.TrimSpace(url), " ", "-", -1)))
+func MakePath(s string) string {
+	return unicodeSanitize(strings.ToLower(strings.Replace(strings.TrimSpace(s), " ", "-", -1)))
+}
+
+func Urlize(uri string) string {
+	sanitized := MakePath(uri)
+
+	// escape unicode letters
+	parsedUri, err := url.Parse(sanitized)
+	if err != nil {
+		// if net/url can not parse URL it's meaning Sanitize works incorrect
+		panic(err)
+	}
+	return parsedUri.String()
 }
 
 func Sanitize(s string) string {
 	return sanitizeRegexp.ReplaceAllString(s, "")
 }
+
+func unicodeSanitize(s string) string {
+	source := []rune(s)
+	target := make([]rune, 0, len(source))
+
+	for _, r := range source {
+		if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '.' || r == '/' || r == '_' || r == '-' {
+			target = append(target, r)
+		}
+	}
+
+	return string(target)
+}
diff --git a/helpers/templates_test.go b/helpers/templates_test.go
new file mode 100644
index 00000000..7252c2d9
--- /dev/null
+++ b/helpers/templates_test.go
@@ -0,0 +1,45 @@
+package helpers
+
+import (
+	"testing"
+)
+
+func TestMakePath(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected string
+	}{
+		{"  foo bar  ", "foo-bar"},
+		{"foo.bar/foo_bar-foo", "foo.bar/foo_bar-foo"},
+		{"foo,bar:foo%bar", "foobarfoobar"},
+		{"foo/bar.html", "foo/bar.html"},
+		{"трям/трям", "трям/трям"},
+	}
+
+	for _, test := range tests {
+		output := MakePath(test.input)
+		if output != test.expected {
+			t.Errorf("Expected %#v, got %#v\n", test.expected, output)
+		}
+	}
+}
+
+func TestUrlize(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected string
+	}{
+		{"  foo bar  ", "foo-bar"},
+		{"foo.bar/foo_bar-foo", "foo.bar/foo_bar-foo"},
+		{"foo,bar:foo%bar", "foobarfoobar"},
+		{"foo/bar.html", "foo/bar.html"},
+		{"трям/трям", "%D1%82%D1%80%D1%8F%D0%BC/%D1%82%D1%80%D1%8F%D0%BC"},
+	}
+
+	for _, test := range tests {
+		output := Urlize(test.input)
+		if output != test.expected {
+			t.Errorf("Expected %#v, got %#v\n", test.expected, output)
+		}
+	}
+}
diff --git a/hugolib/index.go b/hugolib/index.go
index 0189eccb..30cb8f94 100644
--- a/hugolib/index.go
+++ b/hugolib/index.go
@@ -59,7 +59,7 @@ type OrderedIndexEntry struct {
 
 // KeyPrep... Indexes should be case insensitive. Can make it easily conditional later.
 func kp(in string) string {
-	return helpers.Urlize(in)
+	return helpers.MakePath(in)
 }
 
 func (i Index) Get(key string) WeightedPages { return i[kp(key)] }
diff --git a/target/alias_test.go b/target/alias_test.go
index 7f5db79a..d19349f2 100644
--- a/target/alias_test.go
+++ b/target/alias_test.go
@@ -20,6 +20,7 @@ func TestHTMLRedirectAlias(t *testing.T) {
 		{"alias 3.html", "alias-3.html"},
 		{"alias4.html", "alias4.html"},
 		{"/alias 5.html", "/alias-5.html"},
+		{"/трям.html", "/трям.html"},
 	}
 
 	for _, test := range tests {
diff --git a/target/htmlredirect.go b/target/htmlredirect.go
index 53e900f9..55f4896e 100644
--- a/target/htmlredirect.go
+++ b/target/htmlredirect.go
@@ -39,7 +39,7 @@ func (h *HTMLRedirectAlias) Translate(alias string) (aliasPath string, err error
 	} else if !strings.HasSuffix(alias, ".html") {
 		alias = alias + "/index.html"
 	}
-	return path.Join(h.PublishDir, helpers.Urlize(alias)), nil
+	return path.Join(h.PublishDir, helpers.MakePath(alias)), nil
 }
 
 type AliasNode struct {
-- 
2.30.2