Handle schema-less urls when apply absurl
authorNoah Campbell <noahcampbell@gmail.com>
Sat, 9 Nov 2013 14:35:09 +0000 (14:35 +0000)
committerNoah Campbell <noahcampbell@gmail.com>
Sat, 9 Nov 2013 14:35:09 +0000 (14:35 +0000)
Fixes #114

transform/absurl.go
transform/posttrans_test.go

index 80716f1b7ffa61267a7d2e83891ff94d901146b8..8bdd36515363757651112c14289c26405703c620 100644 (file)
@@ -23,15 +23,22 @@ func AbsURL(absURL string) (trs []link, err error) {
                hrefsq = []byte(" href='" + base + "/")
        )
        trs = append(trs, func(content []byte) []byte {
-               content = bytes.Replace(content, []byte(" src=\"/"), srcdq, -1)
-               content = bytes.Replace(content, []byte(" src='/"), srcsq, -1)
-               content = bytes.Replace(content, []byte(" href=\"/"), hrefdq, -1)
-               content = bytes.Replace(content, []byte(" href='/"), hrefsq, -1)
+               content = guardReplace(content, []byte(" src=\"//"), []byte(" src=\"/"), srcdq)
+               content = guardReplace(content, []byte(" src='//"), []byte(" src='/"), srcsq)
+               content = guardReplace(content, []byte(" href=\"//"), []byte(" href=\"/"), hrefdq)
+               content = guardReplace(content, []byte(" href='//"), []byte(" href='/"), hrefsq)
                return content
        })
        return
 }
 
+func guardReplace(content, guard, match, replace []byte) []byte {
+               if !bytes.Contains(content, guard) {
+                       content = bytes.Replace(content, match, replace, -1)
+               }
+               return content
+}
+
 type elattr struct {
        tag, attr string
 }
index d7d06fee7894d462265c981549900ac5dbf6fdfd..297b4f66e3268834a38fb21dac0fbd64d092d245 100644 (file)
@@ -12,8 +12,7 @@ const H5_JS_CONTENT_SINGLE_QUOTE = "<!DOCTYPE html><html><head><script src='foob
 
 const H5_JS_CONTENT_ABS_URL = "<!DOCTYPE html><html><head><script src=\"http://user@host:10234/foobar.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"https://host/foobar\">foobar</a>. Follow up</article></body></html>"
 
-// URL doesn't recognize authorities.  BUG?
-//const H5_JS_CONTENT_ABS_URL = "<!DOCTYPE html><html><head><script src=\"//host/foobar.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"https://host/foobar\">foobar</a>. Follow up</article></body></html>"
+const H5_JS_CONTENT_ABS_URL_SCHEMALESS = "<!DOCTYPE html><html><head><script src=\"//host/foobar.js\"></script><script src='//host2/barfoo.js'></head><body><nav><h1>title</h1></nav><article>content <a href=\"//host/foobar\">foobar</a>. <a href='//host2/foobar'>Follow up</a></article></body></html>"
 
 const CORRECT_OUTPUT_SRC_HREF_DQ = "<!DOCTYPE html><html><head><script src=\"foobar.js\"></script><script src=\"http://base/barfoo.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"foobar\">foobar</a>. <a href=\"http://base/foobar\">Follow up</a></article></body></html>"
 
@@ -34,6 +33,7 @@ var abs_url_tests = []test{
        {H5_JS_CONTENT_DOUBLE_QUOTE, CORRECT_OUTPUT_SRC_HREF_DQ},
        {H5_JS_CONTENT_SINGLE_QUOTE, CORRECT_OUTPUT_SRC_HREF_SQ},
        {H5_JS_CONTENT_ABS_URL, H5_JS_CONTENT_ABS_URL},
+       {H5_JS_CONTENT_ABS_URL_SCHEMALESS, H5_JS_CONTENT_ABS_URL_SCHEMALESS},
 }
 
 type errorf func(string, ...interface{})