From: Bjørn Erik Pedersen Date: Sun, 5 Aug 2018 09:13:49 +0000 (+0200) Subject: Add support for minification of final output X-Git-Tag: v0.47~37 X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=789ef8c639e4621abd36da530bcb5942ac9297da;p=brevno-suite%2Fhugo Add support for minification of final output Hugo Pipes added minification support for resources fetched via ´resources.Get` and similar. This also adds support for minification of the final output for supported output formats: HTML, XML, SVG, CSS, JavaScript, JSON. To enable, run Hugo with the `--minify` flag: ```bash hugo --minify ``` This commit is also a major spring cleaning of the `transform` package to allow the new minification step fit into that processing chain. Fixes #1251 --- diff --git a/commands/commands.go b/commands/commands.go index 88939e60..17c9e15c 100644 --- a/commands/commands.go +++ b/commands/commands.go @@ -177,6 +177,7 @@ Complete documentation is available at http://gohugo.io/.`, cc.cmd.Flags().BoolVarP(&cc.buildWatch, "watch", "w", false, "watch filesystem for changes and recreate as needed") cc.cmd.Flags().Bool("renderToMemory", false, "render to memory (only useful for benchmark testing)") + cc.cmd.Flags().Bool("minify", false, "minify any supported output format (HTML, XML etc.)") // Set bash-completion _ = cc.cmd.PersistentFlags().SetAnnotation("logFile", cobra.BashCompFilenameExt, []string{}) diff --git a/commands/hugo.go b/commands/hugo.go index 31276749..1381bf21 100644 --- a/commands/hugo.go +++ b/commands/hugo.go @@ -209,16 +209,25 @@ func initializeFlags(cmd *cobra.Command, cfg config.Provider) { "verboseLog", } + // Will set a value even if it is the default. + flagKeysForced := []string{ + "minify", + } + for _, key := range persFlagKeys { - setValueFromFlag(cmd.PersistentFlags(), key, cfg, "") + setValueFromFlag(cmd.PersistentFlags(), key, cfg, "", false) } for _, key := range flagKeys { - setValueFromFlag(cmd.Flags(), key, cfg, "") + setValueFromFlag(cmd.Flags(), key, cfg, "", false) + } + + for _, key := range flagKeysForced { + setValueFromFlag(cmd.Flags(), key, cfg, "", true) } // Set some "config aliases" - setValueFromFlag(cmd.Flags(), "destination", cfg, "publishDir") - setValueFromFlag(cmd.Flags(), "i18n-warnings", cfg, "logI18nWarnings") + setValueFromFlag(cmd.Flags(), "destination", cfg, "publishDir", false) + setValueFromFlag(cmd.Flags(), "i18n-warnings", cfg, "logI18nWarnings", false) } @@ -229,9 +238,9 @@ var deprecatedFlags = map[string]bool{ strings.ToLower("canonifyURLs"): true, } -func setValueFromFlag(flags *flag.FlagSet, key string, cfg config.Provider, targetKey string) { +func setValueFromFlag(flags *flag.FlagSet, key string, cfg config.Provider, targetKey string, force bool) { key = strings.TrimSpace(key) - if flags.Changed(key) { + if (force && flags.Lookup(key) != nil) || flags.Changed(key) { if _, deprecated := deprecatedFlags[strings.ToLower(key)]; deprecated { msg := fmt.Sprintf(`Set "%s = true" in your config.toml. If you need to set this configuration value from the command line, set it via an OS environment variable: "HUGO_%s=true hugo"`, key, strings.ToUpper(key)) diff --git a/deps/deps.go b/deps/deps.go index a0904483..3eecffca 100644 --- a/deps/deps.go +++ b/deps/deps.go @@ -162,7 +162,7 @@ func New(cfg DepsCfg) (*Deps, error) { return nil, err } - resourceSpec, err := resource.NewSpec(ps, logger, cfg.MediaTypes) + resourceSpec, err := resource.NewSpec(ps, logger, cfg.OutputFormats, cfg.MediaTypes) if err != nil { return nil, err } @@ -224,7 +224,7 @@ func (d Deps) ForLanguage(cfg DepsCfg) (*Deps, error) { // The resource cache is global so reuse. // TODO(bep) clean up these inits. resourceCache := d.ResourceSpec.ResourceCache - d.ResourceSpec, err = resource.NewSpec(d.PathSpec, d.Log, cfg.MediaTypes) + d.ResourceSpec, err = resource.NewSpec(d.PathSpec, d.Log, cfg.OutputFormats, cfg.MediaTypes) if err != nil { return nil, err } @@ -267,6 +267,9 @@ type DepsCfg struct { // The media types configured. MediaTypes media.Types + // The output formats configured. + OutputFormats output.Formats + // Template handling. TemplateProvider ResourceProvider WithTemplate func(templ tpl.TemplateHandler) error diff --git a/helpers/path.go b/helpers/path.go index 92ce4079..134a2052 100644 --- a/helpers/path.go +++ b/helpers/path.go @@ -515,6 +515,25 @@ func WriteToDisk(inpath string, r io.Reader, fs afero.Fs) (err error) { return afero.WriteReader(fs, inpath, r) } +// OpenFileForWriting opens or creates the given file. If the target directory +// does not exist, it gets created. +func OpenFileForWriting(fs afero.Fs, filename string) (afero.File, error) { + filename = filepath.Clean(filename) + // Create will truncate if file already exists. + f, err := fs.Create(filename) + if err != nil { + if !os.IsNotExist(err) { + return nil, err + } + if err = fs.MkdirAll(filepath.Dir(filename), 0755); err != nil { + return nil, err + } + f, err = fs.Create(filename) + } + + return f, err +} + // GetTempDir returns a temporary directory with the given sub path. func GetTempDir(subPath string, fs afero.Fs) string { return afero.GetTempDir(fs, subPath) diff --git a/hugolib/alias.go b/hugolib/alias.go index 3b053130..b2b29614 100644 --- a/hugolib/alias.go +++ b/hugolib/alias.go @@ -22,6 +22,8 @@ import ( "runtime" "strings" + "github.com/gohugoio/hugo/output" + "github.com/gohugoio/hugo/publisher" "github.com/gohugoio/hugo/tpl" jww "github.com/spf13/jwalterweatherman" @@ -89,11 +91,11 @@ func (a aliasHandler) renderAlias(isXHTML bool, permalink string, page *Page) (i return buffer, nil } -func (s *Site) writeDestAlias(path, permalink string, p *Page) (err error) { - return s.publishDestAlias(false, path, permalink, p) +func (s *Site) writeDestAlias(path, permalink string, outputFormat output.Format, p *Page) (err error) { + return s.publishDestAlias(false, path, permalink, outputFormat, p) } -func (s *Site) publishDestAlias(allowRoot bool, path, permalink string, p *Page) (err error) { +func (s *Site) publishDestAlias(allowRoot bool, path, permalink string, outputFormat output.Format, p *Page) (err error) { handler := newAliasHandler(s.Tmpl, s.Log, allowRoot) isXHTML := strings.HasSuffix(path, ".xhtml") @@ -110,7 +112,14 @@ func (s *Site) publishDestAlias(allowRoot bool, path, permalink string, p *Page) return err } - return s.publish(&s.PathSpec.ProcessingStats.Aliases, targetPath, aliasContent) + pd := publisher.Descriptor{ + Src: aliasContent, + TargetPath: targetPath, + StatCounter: &s.PathSpec.ProcessingStats.Aliases, + OutputFormat: outputFormat, + } + + return s.publisher.Publish(pd) } diff --git a/hugolib/hugo_sites.go b/hugolib/hugo_sites.go index 859e0da7..6ce6657f 100644 --- a/hugolib/hugo_sites.go +++ b/hugolib/hugo_sites.go @@ -24,6 +24,7 @@ import ( "github.com/gohugoio/hugo/deps" "github.com/gohugoio/hugo/helpers" "github.com/gohugoio/hugo/langs" + "github.com/gohugoio/hugo/publisher" "github.com/gohugoio/hugo/i18n" "github.com/gohugoio/hugo/tpl" @@ -182,6 +183,7 @@ func applyDeps(cfg deps.DepsCfg, sites ...*Site) error { cfg.Language = s.Language cfg.MediaTypes = s.mediaTypesConfig + cfg.OutputFormats = s.outputFormatsConfig if d == nil { cfg.WithTemplate = s.withSiteTemplates(cfg.WithTemplate) @@ -208,6 +210,9 @@ func applyDeps(cfg deps.DepsCfg, sites ...*Site) error { s.Deps = d } + // Set up the main publishing chain. + s.publisher = publisher.NewDestinationPublisher(d.PathSpec.BaseFs.PublishFs, s.outputFormatsConfig, s.mediaTypesConfig, cfg.Cfg.GetBool("minify")) + if err := s.initializeSiteInfo(); err != nil { return err } diff --git a/hugolib/minify_publisher_test.go b/hugolib/minify_publisher_test.go new file mode 100644 index 00000000..ce183343 --- /dev/null +++ b/hugolib/minify_publisher_test.go @@ -0,0 +1,71 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hugolib + +import ( + "testing" + + "github.com/spf13/viper" + + "github.com/stretchr/testify/require" +) + +func TestMinifyPublisher(t *testing.T) { + t.Parallel() + assert := require.New(t) + + v := viper.New() + v.Set("minify", true) + v.Set("baseURL", "https://example.org/") + + htmlTemplate := ` + + + + + HTML5 boilerplate – all you really need… + + + + + + +

{{ .Page.Title }}

+ + + +` + + b := newTestSitesBuilder(t) + b.WithViper(v).WithContent("page.md", pageWithAlias) + b.WithTemplates("_default/list.html", htmlTemplate, "_default/single.html", htmlTemplate, "alias.html", htmlTemplate) + b.CreateSites().Build(BuildCfg{}) + + assert.Equal(1, len(b.H.Sites)) + require.Len(t, b.H.Sites[0].RegularPages, 1) + + // Check minification + // HTML + b.AssertFileContent("public/page/index.html", "HTML5 boilerplate – all you really need…

Has Alias

") + // HTML alias. Note the custom template which does no redirect. + b.AssertFileContent("public/foo/bar/index.html", "HTML5 boilerplate ") + + // RSS + b.AssertFileContent("public/index.xml", "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?><rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\"><channel><title/><link>https://example.org/</link>") + + // Sitemap + b.AssertFileContent("public/sitemap.xml", "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?><urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:xhtml=\"http://www.w3.org/1999/xhtml\"><url><loc>https://example.org/</loc><priority>0</priority></url><url>") +} diff --git a/hugolib/resource_chain_test.go b/hugolib/resource_chain_test.go index 3e199d31..0a4c3bd1 100644 --- a/hugolib/resource_chain_test.go +++ b/hugolib/resource_chain_test.go @@ -223,7 +223,7 @@ Min HTML: {{ ( resources.Get "mydata/html1.html" | resources.Minify ).Content | b.AssertFileContent("public/index.html", `Min XML: <hello><world>Hugo Rocks!</<world></hello>`) b.AssertFileContent("public/index.html", `Min SVG: <svg height="100" width="100"><path d="M5 10 20 40z"/></svg>`) b.AssertFileContent("public/index.html", `Min SVG again: <svg height="100" width="100"><path d="M5 10 20 40z"/></svg>`) - b.AssertFileContent("public/index.html", `Min HTML: <a href=#>Cool</a>`) + b.AssertFileContent("public/index.html", `Min HTML: <html><a href=#>Cool</a></html>`) }}, {"concat", func() bool { return true }, func(b *sitesBuilder) { diff --git a/hugolib/site.go b/hugolib/site.go index 4cca648f..b55b6040 100644 --- a/hugolib/site.go +++ b/hugolib/site.go @@ -29,6 +29,7 @@ import ( "time" "github.com/gohugoio/hugo/common/maps" + "github.com/gohugoio/hugo/publisher" "github.com/gohugoio/hugo/resource" "github.com/gohugoio/hugo/langs" @@ -54,15 +55,12 @@ import ( "github.com/gohugoio/hugo/related" "github.com/gohugoio/hugo/source" "github.com/gohugoio/hugo/tpl" - "github.com/gohugoio/hugo/transform" "github.com/spf13/afero" "github.com/spf13/cast" "github.com/spf13/nitro" "github.com/spf13/viper" ) -var _ = transform.AbsURL - // used to indicate if run as a test. var testMode bool @@ -151,6 +149,8 @@ type Site struct { relatedDocsHandler *relatedDocsHandler siteRefLinker + + publisher publisher.Publisher } type siteRenderingContext struct { @@ -195,6 +195,7 @@ func (s *Site) reset() *Site { mediaTypesConfig: s.mediaTypesConfig, Language: s.Language, owner: s.owner, + publisher: s.publisher, siteConfig: s.siteConfig, PageCollections: newPageCollections()} @@ -759,8 +760,9 @@ func (s *Site) processPartial(events []fsnotify.Event) (whatChanged, error) { site := sites[i] var err error depsCfg := deps.DepsCfg{ - Language: site.Language, - MediaTypes: site.mediaTypesConfig, + Language: site.Language, + MediaTypes: site.mediaTypesConfig, + OutputFormats: site.outputFormatsConfig, } site.Deps, err = first.Deps.ForLanguage(depsCfg) if err != nil { @@ -1637,8 +1639,8 @@ func (s *Site) permalink(link string) string { } -func (s *Site) renderAndWriteXML(statCounter *uint64, name string, dest string, d interface{}, layouts ...string) error { - s.Log.DEBUG.Printf("Render XML for %q to %q", name, dest) +func (s *Site) renderAndWriteXML(statCounter *uint64, name string, targetPath string, d interface{}, layouts ...string) error { + s.Log.DEBUG.Printf("Render XML for %q to %q", name, targetPath) renderBuffer := bp.GetBuffer() defer bp.PutBuffer(renderBuffer) renderBuffer.WriteString("<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?>\n") @@ -1648,30 +1650,32 @@ func (s *Site) renderAndWriteXML(statCounter *uint64, name string, dest string, return nil } - outBuffer := bp.GetBuffer() - defer bp.PutBuffer(outBuffer) - - var path []byte + var path string if s.Info.relativeURLs { - path = []byte(helpers.GetDottedRelativePath(dest)) + path = helpers.GetDottedRelativePath(targetPath) } else { s := s.PathSpec.BaseURL.String() if !strings.HasSuffix(s, "/") { s += "/" } - path = []byte(s) + path = s } - transformer := transform.NewChain(transform.AbsURLInXML) - if err := transformer.Apply(outBuffer, renderBuffer, path); err != nil { - s.DistinctErrorLog.Println(err) - return nil + + pd := publisher.Descriptor{ + Src: renderBuffer, + TargetPath: targetPath, + StatCounter: statCounter, + // For the minification part of XML, + // we currently only use the MIME type. + OutputFormat: output.RSSFormat, + AbsURLPath: path, } - return s.publish(statCounter, dest, outBuffer) + return s.publisher.Publish(pd) } -func (s *Site) renderAndWritePage(statCounter *uint64, name string, dest string, p *PageOutput, layouts ...string) error { +func (s *Site) renderAndWritePage(statCounter *uint64, name string, targetPath string, p *PageOutput, layouts ...string) error { renderBuffer := bp.GetBuffer() defer bp.PutBuffer(renderBuffer) @@ -1684,49 +1688,44 @@ func (s *Site) renderAndWritePage(statCounter *uint64, name string, dest string, return nil } - outBuffer := bp.GetBuffer() - defer bp.PutBuffer(outBuffer) + isHTML := p.outputFormat.IsHTML - transformLinks := transform.NewEmptyTransforms() + var path string - isHTML := p.outputFormat.IsHTML + if s.Info.relativeURLs { + path = helpers.GetDottedRelativePath(targetPath) + } else if s.Info.canonifyURLs { + url := s.PathSpec.BaseURL.String() + if !strings.HasSuffix(url, "/") { + url += "/" + } + path = url + } + + pd := publisher.Descriptor{ + Src: renderBuffer, + TargetPath: targetPath, + StatCounter: statCounter, + OutputFormat: p.outputFormat, + } if isHTML { if s.Info.relativeURLs || s.Info.canonifyURLs { - transformLinks = append(transformLinks, transform.AbsURL) + pd.AbsURLPath = path } if s.running() && s.Cfg.GetBool("watch") && !s.Cfg.GetBool("disableLiveReload") { - transformLinks = append(transformLinks, transform.LiveReloadInject(s.Cfg.GetInt("liveReloadPort"))) + pd.LiveReloadPort = s.Cfg.GetInt("liveReloadPort") } // For performance reasons we only inject the Hugo generator tag on the home page. if p.IsHome() { - if !s.Cfg.GetBool("disableHugoGeneratorInject") { - transformLinks = append(transformLinks, transform.HugoGeneratorInject) - } - } - } - - var path []byte - - if s.Info.relativeURLs { - path = []byte(helpers.GetDottedRelativePath(dest)) - } else if s.Info.canonifyURLs { - url := s.PathSpec.BaseURL.String() - if !strings.HasSuffix(url, "/") { - url += "/" + pd.AddHugoGeneratorTag = !s.Cfg.GetBool("disableHugoGeneratorInject") } - path = []byte(url) - } - transformer := transform.NewChain(transformLinks...) - if err := transformer.Apply(outBuffer, renderBuffer, path); err != nil { - s.DistinctErrorLog.Println(err) - return nil } - return s.publish(statCounter, dest, outBuffer) + return s.publisher.Publish(pd) } func (s *Site) renderForLayouts(name string, d interface{}, w io.Writer, layouts ...string) (err error) { diff --git a/hugolib/site_render.go b/hugolib/site_render.go index 2da4064b..a0d6506e 100644 --- a/hugolib/site_render.go +++ b/hugolib/site_render.go @@ -195,7 +195,7 @@ func (s *Site) renderPaginator(p *PageOutput) error { // TODO(bep) do better link := newOutputFormat(p.Page, p.outputFormat).Permalink() - if err := s.writeDestAlias(target, link, nil); err != nil { + if err := s.writeDestAlias(target, link, p.outputFormat, nil); err != nil { return err } @@ -417,7 +417,7 @@ func (s *Site) renderAliases() error { a = path.Join(lang, a) } - if err := s.writeDestAlias(a, plink, p); err != nil { + if err := s.writeDestAlias(a, plink, f, p); err != nil { return err } } @@ -425,18 +425,21 @@ func (s *Site) renderAliases() error { } if s.owner.multilingual.enabled() && !s.owner.IsMultihost() { - mainLang := s.owner.multilingual.DefaultLang - if s.Info.defaultContentLanguageInSubdir { - mainLangURL := s.PathSpec.AbsURL(mainLang.Lang, false) - s.Log.DEBUG.Printf("Write redirect to main language %s: %s", mainLang, mainLangURL) - if err := s.publishDestAlias(true, "/", mainLangURL, nil); err != nil { - return err - } - } else { - mainLangURL := s.PathSpec.AbsURL("", false) - s.Log.DEBUG.Printf("Write redirect to main language %s: %s", mainLang, mainLangURL) - if err := s.publishDestAlias(true, mainLang.Lang, mainLangURL, nil); err != nil { - return err + html, found := s.outputFormatsConfig.GetByName("HTML") + if found { + mainLang := s.owner.multilingual.DefaultLang + if s.Info.defaultContentLanguageInSubdir { + mainLangURL := s.PathSpec.AbsURL(mainLang.Lang, false) + s.Log.DEBUG.Printf("Write redirect to main language %s: %s", mainLang, mainLangURL) + if err := s.publishDestAlias(true, "/", mainLangURL, html, nil); err != nil { + return err + } + } else { + mainLangURL := s.PathSpec.AbsURL("", false) + s.Log.DEBUG.Printf("Write redirect to main language %s: %s", mainLang, mainLangURL) + if err := s.publishDestAlias(true, mainLang.Lang, mainLangURL, html, nil); err != nil { + return err + } } } } diff --git a/hugolib/testhelpers_test.go b/hugolib/testhelpers_test.go index cee1f8c4..4ba95144 100644 --- a/hugolib/testhelpers_test.go +++ b/hugolib/testhelpers_test.go @@ -130,6 +130,7 @@ func (s *sitesBuilder) WithConfigTemplate(data interface{}, format, configTempla func (s *sitesBuilder) WithViper(v *viper.Viper) *sitesBuilder { loadDefaultSettingsFor(v) s.Cfg = v + return s } diff --git a/minifiers/minifiers.go b/minifiers/minifiers.go new file mode 100644 index 00000000..70d42843 --- /dev/null +++ b/minifiers/minifiers.go @@ -0,0 +1,126 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package minifiers contains minifiers mapped to MIME types. This package is used +// in both the resource transformation, i.e. resources.Minify, and in the publishing +// chain. +package minifiers + +import ( + "io" + + "github.com/gohugoio/hugo/output" + "github.com/gohugoio/hugo/transform" + + "github.com/gohugoio/hugo/media" + "github.com/tdewolff/minify" + "github.com/tdewolff/minify/css" + "github.com/tdewolff/minify/html" + "github.com/tdewolff/minify/js" + "github.com/tdewolff/minify/json" + "github.com/tdewolff/minify/svg" + "github.com/tdewolff/minify/xml" +) + +// Client wraps a minifier. +type Client struct { + m *minify.M +} + +// Transformer returns a func that can be used in the transformer publishing chain. +// TODO(bep) minify config etc +func (m Client) Transformer(mediatype media.Type) transform.Transformer { + _, params, min := m.m.Match(mediatype.Type()) + if min == nil { + // No minifier for this MIME type + return nil + } + + return func(ft transform.FromTo) error { + // Note that the source io.Reader will already be buffered, but it implements + // the Bytes() method, which is recognized by the Minify library. + return min.Minify(m.m, ft.To(), ft.From(), params) + } +} + +// Minify tries to minify the src into dst given a MIME type. +func (m Client) Minify(mediatype media.Type, dst io.Writer, src io.Reader) error { + return m.m.Minify(mediatype.Type(), dst, src) +} + +// New creates a new Client with the provided MIME types as the mapping foundation. +// The HTML minifier is also registered for additional HTML types (AMP etc.) in the +// provided list of output formats. +func New(mediaTypes media.Types, outputFormats output.Formats) Client { + m := minify.New() + htmlMin := &html.Minifier{ + KeepDocumentTags: true, + } + + // We use the Type definition of the media types defined in the site if found. + addMinifierFunc(m, mediaTypes, "text/css", "css", css.Minify) + addMinifierFunc(m, mediaTypes, "application/javascript", "js", js.Minify) + addMinifierFunc(m, mediaTypes, "application/json", "json", json.Minify) + addMinifierFunc(m, mediaTypes, "image/svg+xml", "svg", svg.Minify) + addMinifierFunc(m, mediaTypes, "application/xml", "xml", xml.Minify) + addMinifierFunc(m, mediaTypes, "application/rss", "xml", xml.Minify) + + // HTML + addMinifier(m, mediaTypes, "text/html", "html", htmlMin) + for _, of := range outputFormats { + if of.IsHTML { + addMinifier(m, mediaTypes, of.MediaType.Type(), "html", htmlMin) + } + } + return Client{m: m} + +} + +func addMinifier(m *minify.M, mt media.Types, typeString, suffix string, min minify.Minifier) { + resolvedTypeStr := resolveMediaTypeString(mt, typeString, suffix) + m.Add(resolvedTypeStr, min) + if resolvedTypeStr != typeString { + m.Add(typeString, min) + } +} + +func addMinifierFunc(m *minify.M, mt media.Types, typeString, suffix string, fn minify.MinifierFunc) { + resolvedTypeStr := resolveMediaTypeString(mt, typeString, suffix) + m.AddFunc(resolvedTypeStr, fn) + if resolvedTypeStr != typeString { + m.AddFunc(typeString, fn) + } +} + +func resolveMediaTypeString(types media.Types, typeStr, suffix string) string { + if m, found := resolveMediaType(types, typeStr, suffix); found { + return m.Type() + } + // Fall back to the default. + return typeStr +} + +// Make sure we match the matching pattern with what the user have actually defined +// in his or hers media types configuration. +func resolveMediaType(types media.Types, typeStr, suffix string) (media.Type, bool) { + if m, found := types.GetByType(typeStr); found { + return m, true + } + + if m, found := types.GetFirstBySuffix(suffix); found { + return m, true + } + + return media.Type{}, false + +} diff --git a/minifiers/minifiers_test.go b/minifiers/minifiers_test.go new file mode 100644 index 00000000..6d72dc44 --- /dev/null +++ b/minifiers/minifiers_test.go @@ -0,0 +1,35 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package minifiers + +import ( + "bytes" + "strings" + "testing" + + "github.com/gohugoio/hugo/media" + + "github.com/gohugoio/hugo/output" + "github.com/stretchr/testify/require" +) + +func TestNew(t *testing.T) { + assert := require.New(t) + m := New(media.DefaultTypes, output.DefaultFormats) + + var b bytes.Buffer + + assert.NoError(m.Minify(media.CSSType, &b, strings.NewReader("body { color: blue; }"))) + assert.Equal("body{color:blue}", b.String()) +} diff --git a/publisher/publisher.go b/publisher/publisher.go new file mode 100644 index 00000000..be447279 --- /dev/null +++ b/publisher/publisher.go @@ -0,0 +1,160 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package publisher + +import ( + "errors" + "io" + "sync/atomic" + + "github.com/gohugoio/hugo/media" + + "github.com/gohugoio/hugo/minifiers" + + bp "github.com/gohugoio/hugo/bufferpool" + "github.com/gohugoio/hugo/helpers" + + "github.com/spf13/afero" + + "github.com/gohugoio/hugo/output" + "github.com/gohugoio/hugo/transform" + "github.com/gohugoio/hugo/transform/livereloadinject" + "github.com/gohugoio/hugo/transform/metainject" + "github.com/gohugoio/hugo/transform/urlreplacers" +) + +// Descriptor describes the needed publishing chain for an item. +type Descriptor struct { + // The content to publish. + Src io.Reader + + // The OutputFormat of the this content. + OutputFormat output.Format + + // Where to publish this content. This is a filesystem-relative path. + TargetPath string + + // Counter for the end build summary. + StatCounter *uint64 + + // Configuration that trigger pre-processing. + // LiveReload script will be injected if this is > 0 + LiveReloadPort int + + // Enable to inject the Hugo generated tag in the header. Is currently only + // injected on the home page for HTML type of output formats. + AddHugoGeneratorTag bool + + // If set, will replace all relative URLs with this one. + AbsURLPath string + + // Enable to minify the output using the OutputFormat defined above to + // pick the correct minifier configuration. + Minify bool +} + +// DestinationPublisher is the default and currently only publisher in Hugo. This +// publisher prepares and publishes an item to the defined destination, e.g. /public. +type DestinationPublisher struct { + fs afero.Fs + minify bool + min minifiers.Client +} + +func NewDestinationPublisher(fs afero.Fs, outputFormats output.Formats, mediaTypes media.Types, minify bool) DestinationPublisher { + pub := DestinationPublisher{fs: fs} + if minify { + pub.min = minifiers.New(mediaTypes, outputFormats) + pub.minify = true + } + return pub +} + +// Publish applies any relevant transformations and writes the file +// to its destination, e.g. /public. +func (p DestinationPublisher) Publish(d Descriptor) error { + if d.TargetPath == "" { + return errors.New("must provide a TargetPath") + } + + src := d.Src + + transformers := p.createTransformerChain(d) + + if len(transformers) != 0 { + b := bp.GetBuffer() + defer bp.PutBuffer(b) + + if err := transformers.Apply(b, d.Src); err != nil { + return err + } + + // This is now what we write to disk. + src = b + } + + f, err := helpers.OpenFileForWriting(p.fs, d.TargetPath) + if err != nil { + return err + } + + _, err = io.Copy(f, src) + if err == nil && d.StatCounter != nil { + atomic.AddUint64(d.StatCounter, uint64(1)) + } + return err +} + +// Publisher publishes a result file. +type Publisher interface { + Publish(d Descriptor) error +} + +// XML transformer := transform.New(urlreplacers.NewAbsURLInXMLTransformer(path)) +func (p DestinationPublisher) createTransformerChain(f Descriptor) transform.Chain { + transformers := transform.NewEmpty() + + isHTML := f.OutputFormat.IsHTML + + if f.AbsURLPath != "" { + if isHTML { + transformers = append(transformers, urlreplacers.NewAbsURLTransformer(f.AbsURLPath)) + } else { + // Assume XML. + transformers = append(transformers, urlreplacers.NewAbsURLInXMLTransformer(f.AbsURLPath)) + } + } + + if isHTML { + if f.LiveReloadPort > 0 { + transformers = append(transformers, livereloadinject.New(f.LiveReloadPort)) + } + + // This is only injected on the home page. + if f.AddHugoGeneratorTag { + transformers = append(transformers, metainject.HugoGenerator) + } + + } + + if p.minify { + minifyTransformer := p.min.Transformer(f.OutputFormat.MediaType) + if minifyTransformer != nil { + transformers = append(transformers, minifyTransformer) + } + } + + return transformers + +} diff --git a/publisher/publisher_test.go b/publisher/publisher_test.go new file mode 100644 index 00000000..200accc8 --- /dev/null +++ b/publisher/publisher_test.go @@ -0,0 +1,14 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package publisher diff --git a/resource/image.go b/resource/image.go index 6aa38233..57da4f93 100644 --- a/resource/image.go +++ b/resource/image.go @@ -464,7 +464,7 @@ func (i *Image) copyToDestination(src string) error { } defer in.Close() - out, err := openFileForWriting(i.spec.BaseFs.PublishFs, target) + out, err := helpers.OpenFileForWriting(i.spec.BaseFs.PublishFs, target) if err != nil { res = err @@ -487,7 +487,7 @@ func (i *Image) copyToDestination(src string) error { func (i *Image) encodeToDestinations(img image.Image, conf imageConfig, resourceCacheFilename, targetFilename string) error { - file1, err := openFileForWriting(i.spec.BaseFs.PublishFs, targetFilename) + file1, err := helpers.OpenFileForWriting(i.spec.BaseFs.PublishFs, targetFilename) if err != nil { return err } @@ -498,7 +498,7 @@ func (i *Image) encodeToDestinations(img image.Image, conf imageConfig, resource if resourceCacheFilename != "" { // Also save it to the image resource cache for later reuse. - file2, err := openFileForWriting(i.spec.BaseFs.Resources.Fs, resourceCacheFilename) + file2, err := helpers.OpenFileForWriting(i.spec.BaseFs.Resources.Fs, resourceCacheFilename) if err != nil { return err } diff --git a/resource/minifier/minify.go b/resource/minifier/minify.go new file mode 100644 index 00000000..cef22efc --- /dev/null +++ b/resource/minifier/minify.go @@ -0,0 +1,58 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package minifier + +import ( + "github.com/gohugoio/hugo/minifiers" + "github.com/gohugoio/hugo/resource" +) + +// Client for minification of Resource objects. Supported minfiers are: +// css, html, js, json, svg and xml. +type Client struct { + rs *resource.Spec + m minifiers.Client +} + +// New creates a new Client given a specification. Note that it is the media types +// configured for the site that is used to match files to the correct minifier. +func New(rs *resource.Spec) *Client { + return &Client{rs: rs, m: minifiers.New(rs.MediaTypes, rs.OutputFormats)} +} + +type minifyTransformation struct { + rs *resource.Spec + m minifiers.Client +} + +func (t *minifyTransformation) Key() resource.ResourceTransformationKey { + return resource.NewResourceTransformationKey("minify") +} + +func (t *minifyTransformation) Transform(ctx *resource.ResourceTransformationCtx) error { + if err := t.m.Minify(ctx.InMediaType, ctx.To, ctx.From); err != nil { + return err + } + ctx.AddOutPathIdentifier(".min") + return nil +} + +func (c *Client) Minify(res resource.Resource) (resource.Resource, error) { + return c.rs.Transform( + res, + &minifyTransformation{ + rs: c.rs, + m: c.m}, + ) +} diff --git a/resource/minifiers/minify.go b/resource/minifiers/minify.go deleted file mode 100644 index 604ac6f8..00000000 --- a/resource/minifiers/minify.go +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2018 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package minifiers - -import ( - "github.com/gohugoio/hugo/helpers" - "github.com/gohugoio/hugo/media" - - "github.com/gohugoio/hugo/resource" - "github.com/tdewolff/minify" - "github.com/tdewolff/minify/css" - "github.com/tdewolff/minify/html" - "github.com/tdewolff/minify/js" - "github.com/tdewolff/minify/json" - "github.com/tdewolff/minify/svg" - "github.com/tdewolff/minify/xml" -) - -// Client for minification of Resource objects. Supported minfiers are: -// css, html, js, json, svg and xml. -type Client struct { - rs *resource.Spec - m *minify.M -} - -// New creates a new Client given a specification. Note that it is the media types -// configured for the site that is used to match files to the correct minifier. -func New(rs *resource.Spec) *Client { - m := minify.New() - mt := rs.MediaTypes - - // We use the Type definition of the media types defined in the site if found. - addMinifierFunc(m, mt, "text/css", "css", css.Minify) - addMinifierFunc(m, mt, "text/html", "html", html.Minify) - addMinifierFunc(m, mt, "application/javascript", "js", js.Minify) - addMinifierFunc(m, mt, "application/json", "json", json.Minify) - addMinifierFunc(m, mt, "image/svg+xml", "svg", svg.Minify) - addMinifierFunc(m, mt, "application/xml", "xml", xml.Minify) - addMinifierFunc(m, mt, "application/rss", "xml", xml.Minify) - - return &Client{rs: rs, m: m} -} - -func addMinifierFunc(m *minify.M, mt media.Types, typeString, suffix string, fn minify.MinifierFunc) { - resolvedTypeStr := resolveMediaTypeString(mt, typeString, suffix) - m.AddFunc(resolvedTypeStr, fn) - if resolvedTypeStr != typeString { - m.AddFunc(typeString, fn) - } -} - -type minifyTransformation struct { - rs *resource.Spec - m *minify.M -} - -func (t *minifyTransformation) Key() resource.ResourceTransformationKey { - return resource.NewResourceTransformationKey("minify") -} - -func (t *minifyTransformation) Transform(ctx *resource.ResourceTransformationCtx) error { - mtype := resolveMediaTypeString( - t.rs.MediaTypes, - ctx.InMediaType.Type(), - helpers.ExtNoDelimiter(ctx.InPath), - ) - if err := t.m.Minify(mtype, ctx.To, ctx.From); err != nil { - return err - } - ctx.AddOutPathIdentifier(".min") - return nil -} - -func (c *Client) Minify(res resource.Resource) (resource.Resource, error) { - return c.rs.Transform( - res, - &minifyTransformation{ - rs: c.rs, - m: c.m}, - ) -} - -func resolveMediaTypeString(types media.Types, typeStr, suffix string) string { - if m, found := resolveMediaType(types, typeStr, suffix); found { - return m.Type() - } - // Fall back to the default. - return typeStr -} - -// Make sure we match the matching pattern with what the user have actually defined -// in his or hers media types configuration. -func resolveMediaType(types media.Types, typeStr, suffix string) (media.Type, bool) { - if m, found := types.GetByType(typeStr); found { - return m, true - } - - if m, found := types.GetFirstBySuffix(suffix); found { - return m, true - } - - return media.Type{}, false - -} diff --git a/resource/resource.go b/resource/resource.go index aedc7c22..a1e29c52 100644 --- a/resource/resource.go +++ b/resource/resource.go @@ -24,6 +24,7 @@ import ( "strings" "sync" + "github.com/gohugoio/hugo/output" "github.com/gohugoio/hugo/tpl" "github.com/gohugoio/hugo/common/loggers" @@ -259,7 +260,8 @@ func (r1 Resources) MergeByLanguageInterface(in interface{}) (interface{}, error type Spec struct { *helpers.PathSpec - MediaTypes media.Types + MediaTypes media.Types + OutputFormats output.Formats Logger *jww.Notepad @@ -275,7 +277,7 @@ type Spec struct { GenAssetsPath string } -func NewSpec(s *helpers.PathSpec, logger *jww.Notepad, mimeTypes media.Types) (*Spec, error) { +func NewSpec(s *helpers.PathSpec, logger *jww.Notepad, outputFormats output.Formats, mimeTypes media.Types) (*Spec, error) { imaging, err := decodeImaging(s.Cfg.GetStringMap("imaging")) if err != nil { @@ -296,6 +298,7 @@ func NewSpec(s *helpers.PathSpec, logger *jww.Notepad, mimeTypes media.Types) (* GenAssetsPath: genAssetsPath, imaging: &imaging, MediaTypes: mimeTypes, + OutputFormats: outputFormats, imageCache: newImageCache( s, // We're going to write a cache pruning routine later, so make it extremely diff --git a/resource/resource_cache.go b/resource/resource_cache.go index 28c3c23a..e0b86ec9 100644 --- a/resource/resource_cache.go +++ b/resource/resource_cache.go @@ -16,12 +16,12 @@ package resource import ( "encoding/json" "io/ioutil" - "os" "path" "path/filepath" "strings" "sync" + "github.com/gohugoio/hugo/helpers" "github.com/spf13/afero" "github.com/BurntSushi/locker" @@ -176,26 +176,7 @@ func (c *ResourceCache) writeMeta(key string, meta transformedResourceMetadata) } func (c *ResourceCache) openResourceFileForWriting(filename string) (afero.File, error) { - return openFileForWriting(c.rs.Resources.Fs, filename) -} - -// openFileForWriting opens or creates the given file. If the target directory -// does not exist, it gets created. -func openFileForWriting(fs afero.Fs, filename string) (afero.File, error) { - filename = filepath.Clean(filename) - // Create will truncate if file already exists. - f, err := fs.Create(filename) - if err != nil { - if !os.IsNotExist(err) { - return nil, err - } - if err = fs.MkdirAll(filepath.Dir(filename), 0755); err != nil { - return nil, err - } - f, err = fs.Create(filename) - } - - return f, err + return helpers.OpenFileForWriting(c.rs.Resources.Fs, filename) } func (c *ResourceCache) set(key string, r Resource) { diff --git a/resource/testhelpers_test.go b/resource/testhelpers_test.go index e78a536a..2a5d2b3c 100644 --- a/resource/testhelpers_test.go +++ b/resource/testhelpers_test.go @@ -16,6 +16,7 @@ import ( "github.com/gohugoio/hugo/helpers" "github.com/gohugoio/hugo/hugofs" "github.com/gohugoio/hugo/media" + "github.com/gohugoio/hugo/output" "github.com/spf13/afero" "github.com/spf13/viper" "github.com/stretchr/testify/require" @@ -51,7 +52,7 @@ func newTestResourceSpecForBaseURL(assert *require.Assertions, baseURL string) * assert.NoError(err) - spec, err := NewSpec(s, nil, media.DefaultTypes) + spec, err := NewSpec(s, nil, output.DefaultFormats, media.DefaultTypes) assert.NoError(err) return spec } @@ -85,7 +86,7 @@ func newTestResourceOsFs(assert *require.Assertions) *Spec { assert.NoError(err) - spec, err := NewSpec(s, nil, media.DefaultTypes) + spec, err := NewSpec(s, nil, output.DefaultFormats, media.DefaultTypes) assert.NoError(err) return spec @@ -110,7 +111,7 @@ func fetchResourceForSpec(spec *Spec, assert *require.Assertions, name string) C src, err := os.Open(filepath.FromSlash("testdata/" + name)) assert.NoError(err) - out, err := openFileForWriting(spec.BaseFs.Content.Fs, name) + out, err := helpers.OpenFileForWriting(spec.BaseFs.Content.Fs, name) assert.NoError(err) _, err = io.Copy(out, src) out.Close() diff --git a/resource/transform.go b/resource/transform.go index 5ba5ec82..c61a9771 100644 --- a/resource/transform.go +++ b/resource/transform.go @@ -267,7 +267,7 @@ func (r *transformedResource) initContent() error { func (r *transformedResource) transform(setContent bool) (err error) { openPublishFileForWriting := func(relTargetPath string) (io.WriteCloser, error) { - return openFileForWriting(r.cache.rs.PublishFs, r.linker.relTargetPathFor(relTargetPath)) + return helpers.OpenFileForWriting(r.cache.rs.PublishFs, r.linker.relTargetPathFor(relTargetPath)) } // This can be the last resource in a chain. diff --git a/tpl/resources/resources.go b/tpl/resources/resources.go index c9d3275e..5f375a06 100644 --- a/tpl/resources/resources.go +++ b/tpl/resources/resources.go @@ -23,7 +23,7 @@ import ( "github.com/gohugoio/hugo/resource/bundler" "github.com/gohugoio/hugo/resource/create" "github.com/gohugoio/hugo/resource/integrity" - "github.com/gohugoio/hugo/resource/minifiers" + "github.com/gohugoio/hugo/resource/minifier" "github.com/gohugoio/hugo/resource/postcss" "github.com/gohugoio/hugo/resource/templates" "github.com/gohugoio/hugo/resource/tocss/scss" @@ -42,7 +42,7 @@ func New(deps *deps.Deps) (*Namespace, error) { createClient: create.New(deps.ResourceSpec), bundlerClient: bundler.New(deps.ResourceSpec), integrityClient: integrity.New(deps.ResourceSpec), - minifyClient: minifiers.New(deps.ResourceSpec), + minifyClient: minifier.New(deps.ResourceSpec), postcssClient: postcss.New(deps.ResourceSpec), templatesClient: templates.New(deps.ResourceSpec, deps.TextTmpl), }, nil @@ -56,7 +56,7 @@ type Namespace struct { bundlerClient *bundler.Client scssClient *scss.Client integrityClient *integrity.Client - minifyClient *minifiers.Client + minifyClient *minifier.Client postcssClient *postcss.Client templatesClient *templates.Client } diff --git a/transform/absurl.go b/transform/absurl.go deleted file mode 100644 index 255ac33b..00000000 --- a/transform/absurl.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2015 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package transform - -var ar = newAbsURLReplacer() - -// AbsURL replaces relative URLs with absolute ones -// in HTML files, using the baseURL setting. -var AbsURL = func(ct contentTransformer) { - ar.replaceInHTML(ct) -} - -// AbsURLInXML replaces relative URLs with absolute ones -// in XML files, using the baseURL setting. -var AbsURLInXML = func(ct contentTransformer) { - ar.replaceInXML(ct) -} diff --git a/transform/absurlreplacer.go b/transform/absurlreplacer.go deleted file mode 100644 index c659a94e..00000000 --- a/transform/absurlreplacer.go +++ /dev/null @@ -1,312 +0,0 @@ -// Copyright 2015 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package transform - -import ( - "bytes" - "io" - "unicode/utf8" -) - -type matchState int - -const ( - matchStateNone matchState = iota - matchStateWhitespace - matchStatePartial - matchStateFull -) - -type absurllexer struct { - // the source to absurlify - content []byte - // the target for the new absurlified content - w io.Writer - - // path may be set to a "." relative path - path []byte - - pos int // input position - start int // item start position - width int // width of last element - - matchers []absURLMatcher - - ms matchState - matches [3]bool // track matches of the 3 prefixes - idx int // last index in matches checked - -} - -type stateFunc func(*absurllexer) stateFunc - -// prefix is how to identify and which func to handle the replacement. -type prefix struct { - r []rune - f func(l *absurllexer) -} - -// new prefixes can be added below, but note: -// - the matches array above must be expanded. -// - the prefix must with the current logic end with '=' -var prefixes = []*prefix{ - {r: []rune{'s', 'r', 'c', '='}, f: checkCandidateBase}, - {r: []rune{'h', 'r', 'e', 'f', '='}, f: checkCandidateBase}, - {r: []rune{'s', 'r', 'c', 's', 'e', 't', '='}, f: checkCandidateSrcset}, -} - -type absURLMatcher struct { - match []byte - quote []byte -} - -// match check rune inside word. Will be != ' '. -func (l *absurllexer) match(r rune) { - - var found bool - - // note, the prefixes can start off on the same foot, i.e. - // src and srcset. - if l.ms == matchStateWhitespace { - l.idx = 0 - for j, p := range prefixes { - if r == p.r[l.idx] { - l.matches[j] = true - found = true - // checkMatchState will only return true when r=='=', so - // we can safely ignore the return value here. - l.checkMatchState(r, j) - } - } - - if !found { - l.ms = matchStateNone - } - - return - } - - l.idx++ - for j, m := range l.matches { - // still a match? - if m { - if prefixes[j].r[l.idx] == r { - found = true - if l.checkMatchState(r, j) { - return - } - } else { - l.matches[j] = false - } - } - } - - if !found { - l.ms = matchStateNone - } -} - -func (l *absurllexer) checkMatchState(r rune, idx int) bool { - if r == '=' { - l.ms = matchStateFull - for k := range l.matches { - if k != idx { - l.matches[k] = false - } - } - return true - } - - l.ms = matchStatePartial - - return false -} - -func (l *absurllexer) emit() { - l.w.Write(l.content[l.start:l.pos]) - l.start = l.pos -} - -// handle URLs in src and href. -func checkCandidateBase(l *absurllexer) { - for _, m := range l.matchers { - if !bytes.HasPrefix(l.content[l.pos:], m.match) { - continue - } - // check for schemaless URLs - posAfter := l.pos + len(m.match) - if posAfter >= len(l.content) { - return - } - r, _ := utf8.DecodeRune(l.content[posAfter:]) - if r == '/' { - // schemaless: skip - return - } - if l.pos > l.start { - l.emit() - } - l.pos += len(m.match) - l.w.Write(m.quote) - l.w.Write(l.path) - l.start = l.pos - } -} - -// handle URLs in srcset. -func checkCandidateSrcset(l *absurllexer) { - // special case, not frequent (me think) - for _, m := range l.matchers { - if !bytes.HasPrefix(l.content[l.pos:], m.match) { - continue - } - - // check for schemaless URLs - posAfter := l.pos + len(m.match) - if posAfter >= len(l.content) { - return - } - r, _ := utf8.DecodeRune(l.content[posAfter:]) - if r == '/' { - // schemaless: skip - continue - } - - posLastQuote := bytes.Index(l.content[l.pos+1:], m.quote) - - // safe guard - if posLastQuote < 0 || posLastQuote > 2000 { - return - } - - if l.pos > l.start { - l.emit() - } - - section := l.content[l.pos+len(m.quote) : l.pos+posLastQuote+1] - - fields := bytes.Fields(section) - l.w.Write(m.quote) - for i, f := range fields { - if f[0] == '/' { - l.w.Write(l.path) - l.w.Write(f[1:]) - - } else { - l.w.Write(f) - } - - if i < len(fields)-1 { - l.w.Write([]byte(" ")) - } - } - - l.w.Write(m.quote) - l.pos += len(section) + (len(m.quote) * 2) - l.start = l.pos - } -} - -// main loop -func (l *absurllexer) replace() { - contentLength := len(l.content) - var r rune - - for { - if l.pos >= contentLength { - l.width = 0 - break - } - - var width = 1 - r = rune(l.content[l.pos]) - if r >= utf8.RuneSelf { - r, width = utf8.DecodeRune(l.content[l.pos:]) - } - l.width = width - l.pos += l.width - if r == ' ' { - l.ms = matchStateWhitespace - } else if l.ms != matchStateNone { - l.match(r) - if l.ms == matchStateFull { - var p *prefix - for i, m := range l.matches { - if m { - p = prefixes[i] - l.matches[i] = false - } - } - l.ms = matchStateNone - p.f(l) - } - } - } - - // Done! - if l.pos > l.start { - l.emit() - } -} - -func doReplace(ct contentTransformer, matchers []absURLMatcher) { - - lexer := &absurllexer{ - content: ct.Content(), - w: ct, - path: ct.Path(), - matchers: matchers} - - lexer.replace() -} - -type absURLReplacer struct { - htmlMatchers []absURLMatcher - xmlMatchers []absURLMatcher -} - -func newAbsURLReplacer() *absURLReplacer { - - // HTML - dqHTMLMatch := []byte("\"/") - sqHTMLMatch := []byte("'/") - - // XML - dqXMLMatch := []byte(""/") - sqXMLMatch := []byte("'/") - - dqHTML := []byte("\"") - sqHTML := []byte("'") - - dqXML := []byte(""") - sqXML := []byte("'") - - return &absURLReplacer{ - htmlMatchers: []absURLMatcher{ - {dqHTMLMatch, dqHTML}, - {sqHTMLMatch, sqHTML}, - }, - xmlMatchers: []absURLMatcher{ - {dqXMLMatch, dqXML}, - {sqXMLMatch, sqXML}, - }} -} - -func (au *absURLReplacer) replaceInHTML(ct contentTransformer) { - doReplace(ct, au.htmlMatchers) -} - -func (au *absURLReplacer) replaceInXML(ct contentTransformer) { - doReplace(ct, au.xmlMatchers) -} diff --git a/transform/chain.go b/transform/chain.go index f71de94c..74217dc7 100644 --- a/transform/chain.go +++ b/transform/chain.go @@ -1,4 +1,4 @@ -// Copyright 2015 The Hugo Authors. All rights reserved. +// Copyright 2018 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,67 +20,74 @@ import ( bp "github.com/gohugoio/hugo/bufferpool" ) -type trans func(rw contentTransformer) +// Transformer is the func that needs to be implemented by a transformation step. +type Transformer func(ft FromTo) error -type link trans +// BytesReader wraps the Bytes method, usually implemented by bytes.Buffer, and an +// io.Reader. +type BytesReader interface { + // The slice given by Bytes is valid for use only until the next buffer modification. + // That is, if you want to use this value outside of the current transformer step, + // you need to take a copy. + Bytes() []byte -type chain []link + io.Reader +} -// NewChain creates a chained content transformer given the provided transforms. -func NewChain(trs ...link) chain { - return trs +// FromTo is sent to each transformation step in the chain. +type FromTo interface { + From() BytesReader + To() io.Writer } -// NewEmptyTransforms creates a new slice of transforms with a capacity of 20. -func NewEmptyTransforms() []link { - return make([]link, 0, 20) +// Chain is an ordered processing chain. The next transform operation will +// receive the output from the previous. +type Chain []Transformer + +// New creates a content transformer chain given the provided transform funcs. +func New(trs ...Transformer) Chain { + return trs } -// contentTransformer is an interface that enables rotation of pooled buffers -// in the transformer chain. -type contentTransformer interface { - Path() []byte - Content() []byte - io.Writer +// NewEmpty creates a new slice of transformers with a capacity of 20. +func NewEmpty() Chain { + return make(Chain, 0, 20) } // Implements contentTransformer // Content is read from the from-buffer and rewritten to to the to-buffer. type fromToBuffer struct { - path []byte from *bytes.Buffer to *bytes.Buffer } -func (ft fromToBuffer) Path() []byte { - return ft.path +func (ft fromToBuffer) From() BytesReader { + return ft.from } -func (ft fromToBuffer) Write(p []byte) (n int, err error) { - return ft.to.Write(p) +func (ft fromToBuffer) To() io.Writer { + return ft.to } -func (ft fromToBuffer) Content() []byte { - return ft.from.Bytes() -} - -func (c *chain) Apply(w io.Writer, r io.Reader, p []byte) error { +// Apply passes the given from io.Reader through the transformation chain. +// The result is written to to. +func (c *Chain) Apply(to io.Writer, from io.Reader) error { if len(*c) == 0 { - _, err := io.Copy(w, r) + _, err := io.Copy(to, from) return err } b1 := bp.GetBuffer() defer bp.PutBuffer(b1) - if _, err := b1.ReadFrom(r); err != nil { + if _, err := b1.ReadFrom(from); err != nil { return err } b2 := bp.GetBuffer() defer bp.PutBuffer(b2) - fb := &fromToBuffer{path: p, from: b1, to: b2} + fb := &fromToBuffer{from: b1, to: b2} for i, tr := range *c { if i > 0 { @@ -95,9 +102,11 @@ func (c *chain) Apply(w io.Writer, r io.Reader, p []byte) error { } } - tr(fb) + if err := tr(fb); err != nil { + return err + } } - _, err := fb.to.WriteTo(w) + _, err := fb.to.WriteTo(to) return err } diff --git a/transform/chain_test.go b/transform/chain_test.go index ae5f06a2..e3402429 100644 --- a/transform/chain_test.go +++ b/transform/chain_test.go @@ -1,4 +1,4 @@ -// Copyright 2015 The Hugo Authors. All rights reserved. +// Copyright 2018 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,142 +15,44 @@ package transform import ( "bytes" - "path/filepath" "strings" "testing" - bp "github.com/gohugoio/hugo/bufferpool" - "github.com/gohugoio/hugo/helpers" "github.com/stretchr/testify/assert" ) -const ( - h5JsContentDoubleQuote = "<!DOCTYPE html><html><head><script src=\"foobar.js\"></script><script src=\"/barfoo.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"foobar\">foobar</a>. <a href=\"/foobar\">Follow up</a></article></body></html>" - h5JsContentSingleQuote = "<!DOCTYPE html><html><head><script src='foobar.js'></script><script src='/barfoo.js'></script></head><body><nav><h1>title</h1></nav><article>content <a href='foobar'>foobar</a>. <a href='/foobar'>Follow up</a></article></body></html>" - h5JsContentAbsURL = "<!DOCTYPE html><html><head><script src=\"http://user@host:10234/foobar.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"https://host/foobar\">foobar</a>. Follow up</article></body></html>" - h5JsContentAbsURLSchemaless = "<!DOCTYPE html><html><head><script src=\"//host/foobar.js\"></script><script src='//host2/barfoo.js'></head><body><nav><h1>title</h1></nav><article>content <a href=\"//host/foobar\">foobar</a>. <a href='//host2/foobar'>Follow up</a></article></body></html>" - corectOutputSrcHrefDq = "<!DOCTYPE html><html><head><script src=\"foobar.js\"></script><script src=\"http://base/barfoo.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"foobar\">foobar</a>. <a href=\"http://base/foobar\">Follow up</a></article></body></html>" - corectOutputSrcHrefSq = "<!DOCTYPE html><html><head><script src='foobar.js'></script><script src='http://base/barfoo.js'></script></head><body><nav><h1>title</h1></nav><article>content <a href='foobar'>foobar</a>. <a href='http://base/foobar'>Follow up</a></article></body></html>" - - h5XMLXontentAbsURL = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\"><p><a href="/foobar">foobar</a></p> <p>A video: <iframe src='/foo'></iframe></p></content></entry></feed>" - correctOutputSrcHrefInXML = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\"><p><a href="http://base/foobar">foobar</a></p> <p>A video: <iframe src='http://base/foo'></iframe></p></content></entry></feed>" - h5XMLContentGuarded = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\"><p><a href="//foobar">foobar</a></p> <p>A video: <iframe src='//foo'></iframe></p></content></entry></feed>" -) - -const ( - // additional sanity tests for replacements testing - replace1 = "No replacements." - replace2 = "ᚠᛇᚻ ᛒᛦᚦ ᚠᚱᚩᚠᚢᚱ\nᚠᛁᚱᚪ ᚷᛖᚻᚹᛦᛚᚳᚢᛗ" - replace3 = `End of file: src="/` - replace4 = `End of file: srcset="/` - replace5 = `Srcsett with no closing quote: srcset="/img/small.jpg do be do be do.` - - // Issue: 816, schemaless links combined with others - replaceSchemalessHTML = `Pre. src='//schemaless' src='/normal' <a href="//schemaless">Schemaless</a>. <a href="/normal">normal</a>. Post.` - replaceSchemalessHTMLCorrect = `Pre. src='//schemaless' src='http://base/normal' <a href="//schemaless">Schemaless</a>. <a href="http://base/normal">normal</a>. Post.` - replaceSchemalessXML = `Pre. src='//schemaless' src='/normal' <a href='//schemaless'>Schemaless</a>. <a href='/normal'>normal</a>. Post.` - replaceSchemalessXMLCorrect = `Pre. src='//schemaless' src='http://base/normal' <a href='//schemaless'>Schemaless</a>. <a href='http://base/normal'>normal</a>. Post.` -) - -const ( - // srcset= - srcsetBasic = `Pre. <img srcset="/img/small.jpg 200w, /img/medium.jpg 300w, /img/big.jpg 700w" alt="text" src="/img/foo.jpg">` - srcsetBasicCorrect = `Pre. <img srcset="http://base/img/small.jpg 200w, http://base/img/medium.jpg 300w, http://base/img/big.jpg 700w" alt="text" src="http://base/img/foo.jpg">` - srcsetSingleQuote = `Pre. <img srcset='/img/small.jpg 200w, /img/big.jpg 700w' alt="text" src="/img/foo.jpg"> POST.` - srcsetSingleQuoteCorrect = `Pre. <img srcset='http://base/img/small.jpg 200w, http://base/img/big.jpg 700w' alt="text" src="http://base/img/foo.jpg"> POST.` - srcsetXMLBasic = `Pre. <img srcset="/img/small.jpg 200w, /img/big.jpg 700w" alt="text" src="/img/foo.jpg">` - srcsetXMLBasicCorrect = `Pre. <img srcset="http://base/img/small.jpg 200w, http://base/img/big.jpg 700w" alt="text" src="http://base/img/foo.jpg">` - srcsetXMLSingleQuote = `Pre. <img srcset="/img/small.jpg 200w, /img/big.jpg 700w" alt="text" src="/img/foo.jpg">` - srcsetXMLSingleQuoteCorrect = `Pre. <img srcset="http://base/img/small.jpg 200w, http://base/img/big.jpg 700w" alt="text" src="http://base/img/foo.jpg">` - srcsetVariations = `Pre. -Missing start quote: <img srcset=/img/small.jpg 200w, /img/big.jpg 700w" alt="text"> src='/img/foo.jpg'> FOO. -<img srcset='/img.jpg'> -schemaless: <img srcset='//img.jpg' src='//basic.jpg'> -schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST -` -) - -const ( - srcsetVariationsCorrect = `Pre. -Missing start quote: <img srcset=/img/small.jpg 200w, /img/big.jpg 700w" alt="text"> src='http://base/img/foo.jpg'> FOO. -<img srcset='http://base/img.jpg'> -schemaless: <img srcset='//img.jpg' src='//basic.jpg'> -schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST -` - srcsetXMLVariations = `Pre. -Missing start quote: <img srcset=/img/small.jpg 200w /img/big.jpg 700w" alt="text"> src='/img/foo.jpg'> FOO. -<img srcset='/img.jpg'> -schemaless: <img srcset='//img.jpg' src='//basic.jpg'> -schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST -` - srcsetXMLVariationsCorrect = `Pre. -Missing start quote: <img srcset=/img/small.jpg 200w /img/big.jpg 700w" alt="text"> src='http://base/img/foo.jpg'> FOO. -<img srcset='http://base/img.jpg'> -schemaless: <img srcset='//img.jpg' src='//basic.jpg'> -schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST -` - - relPathVariations = `PRE. a href="/img/small.jpg" POST.` - relPathVariationsCorrect = `PRE. a href="../../img/small.jpg" POST.` - - testBaseURL = "http://base/" -) - -var ( - absURLlBenchTests = []test{ - {h5JsContentDoubleQuote, corectOutputSrcHrefDq}, - {h5JsContentSingleQuote, corectOutputSrcHrefSq}, - {h5JsContentAbsURL, h5JsContentAbsURL}, - {h5JsContentAbsURLSchemaless, h5JsContentAbsURLSchemaless}, - } - - xmlAbsURLBenchTests = []test{ - {h5XMLXontentAbsURL, correctOutputSrcHrefInXML}, - {h5XMLContentGuarded, h5XMLContentGuarded}, - } - - sanityTests = []test{{replace1, replace1}, {replace2, replace2}, {replace3, replace3}, {replace3, replace3}, {replace5, replace5}} - extraTestsHTML = []test{{replaceSchemalessHTML, replaceSchemalessHTMLCorrect}} - absURLTests = append(absURLlBenchTests, append(sanityTests, extraTestsHTML...)...) - extraTestsXML = []test{{replaceSchemalessXML, replaceSchemalessXMLCorrect}} - xmlAbsURLTests = append(xmlAbsURLBenchTests, append(sanityTests, extraTestsXML...)...) - srcsetTests = []test{{srcsetBasic, srcsetBasicCorrect}, {srcsetSingleQuote, srcsetSingleQuoteCorrect}, {srcsetVariations, srcsetVariationsCorrect}} - srcsetXMLTests = []test{ - {srcsetXMLBasic, srcsetXMLBasicCorrect}, - {srcsetXMLSingleQuote, srcsetXMLSingleQuoteCorrect}, - {srcsetXMLVariations, srcsetXMLVariationsCorrect}} - - relurlTests = []test{{relPathVariations, relPathVariationsCorrect}} -) - func TestChainZeroTransformers(t *testing.T) { - tr := NewChain() + tr := New() in := new(bytes.Buffer) out := new(bytes.Buffer) - if err := tr.Apply(in, out, []byte("")); err != nil { + if err := tr.Apply(in, out); err != nil { t.Errorf("A zero transformer chain returned an error.") } } func TestChaingMultipleTransformers(t *testing.T) { - f1 := func(ct contentTransformer) { - ct.Write(bytes.Replace(ct.Content(), []byte("f1"), []byte("f1r"), -1)) + f1 := func(ct FromTo) error { + _, err := ct.To().Write(bytes.Replace(ct.From().Bytes(), []byte("f1"), []byte("f1r"), -1)) + return err } - f2 := func(ct contentTransformer) { - ct.Write(bytes.Replace(ct.Content(), []byte("f2"), []byte("f2r"), -1)) + f2 := func(ct FromTo) error { + _, err := ct.To().Write(bytes.Replace(ct.From().Bytes(), []byte("f2"), []byte("f2r"), -1)) + return err } - f3 := func(ct contentTransformer) { - ct.Write(bytes.Replace(ct.Content(), []byte("f3"), []byte("f3r"), -1)) + f3 := func(ct FromTo) error { + _, err := ct.To().Write(bytes.Replace(ct.From().Bytes(), []byte("f3"), []byte("f3r"), -1)) + return err } - f4 := func(ct contentTransformer) { - ct.Write(bytes.Replace(ct.Content(), []byte("f4"), []byte("f4r"), -1)) + f4 := func(ct FromTo) error { + _, err := ct.To().Write(bytes.Replace(ct.From().Bytes(), []byte("f4"), []byte("f4r"), -1)) + return err } - tr := NewChain(f1, f2, f3, f4) + tr := New(f1, f2, f3, f4) out := new(bytes.Buffer) - if err := tr.Apply(out, strings.NewReader("Test: f4 f3 f1 f2 f1 The End."), []byte("")); err != nil { + if err := tr.Apply(out, strings.NewReader("Test: f4 f3 f1 f2 f1 The End.")); err != nil { t.Errorf("Multi transformer chain returned an error: %s", err) } @@ -161,107 +63,7 @@ func TestChaingMultipleTransformers(t *testing.T) { } } -func BenchmarkAbsURL(b *testing.B) { - tr := NewChain(AbsURL) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - apply(b.Errorf, tr, absURLlBenchTests) - } -} - -func BenchmarkAbsURLSrcset(b *testing.B) { - tr := NewChain(AbsURL) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - apply(b.Errorf, tr, srcsetTests) - } -} - -func BenchmarkXMLAbsURLSrcset(b *testing.B) { - tr := NewChain(AbsURLInXML) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - apply(b.Errorf, tr, srcsetXMLTests) - } -} - -func TestAbsURL(t *testing.T) { - tr := NewChain(AbsURL) - - apply(t.Errorf, tr, absURLTests) - -} - -func TestRelativeURL(t *testing.T) { - tr := NewChain(AbsURL) - - applyWithPath(t.Errorf, tr, relurlTests, helpers.GetDottedRelativePath(filepath.FromSlash("/post/sub/"))) - -} - -func TestAbsURLSrcSet(t *testing.T) { - tr := NewChain(AbsURL) - - apply(t.Errorf, tr, srcsetTests) -} - -func TestAbsXMLURLSrcSet(t *testing.T) { - tr := NewChain(AbsURLInXML) - - apply(t.Errorf, tr, srcsetXMLTests) -} - -func BenchmarkXMLAbsURL(b *testing.B) { - tr := NewChain(AbsURLInXML) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - apply(b.Errorf, tr, xmlAbsURLBenchTests) - } -} - -func TestXMLAbsURL(t *testing.T) { - tr := NewChain(AbsURLInXML) - apply(t.Errorf, tr, xmlAbsURLTests) -} - func TestNewEmptyTransforms(t *testing.T) { - transforms := NewEmptyTransforms() + transforms := NewEmpty() assert.Equal(t, 20, cap(transforms)) } - -type errorf func(string, ...interface{}) - -func applyWithPath(ef errorf, tr chain, tests []test, path string) { - out := bp.GetBuffer() - defer bp.PutBuffer(out) - - in := bp.GetBuffer() - defer bp.PutBuffer(in) - - for _, test := range tests { - var err error - in.WriteString(test.content) - err = tr.Apply(out, in, []byte(path)) - if err != nil { - ef("Unexpected error: %s", err) - } - if test.expected != out.String() { - ef("Expected:\n%s\nGot:\n%s", test.expected, out.String()) - } - out.Reset() - in.Reset() - } -} - -func apply(ef errorf, tr chain, tests []test) { - applyWithPath(ef, tr, tests, testBaseURL) -} - -type test struct { - content string - expected string -} diff --git a/transform/hugogeneratorinject.go b/transform/hugogeneratorinject.go deleted file mode 100644 index 87405308..00000000 --- a/transform/hugogeneratorinject.go +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2016 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package transform - -import ( - "bytes" - "fmt" - "regexp" - - "github.com/gohugoio/hugo/helpers" -) - -var metaTagsCheck = regexp.MustCompile(`(?i)<meta\s+name=['|"]?generator['|"]?`) -var hugoGeneratorTag = fmt.Sprintf(`<meta name="generator" content="Hugo %s" />`, helpers.CurrentHugoVersion) - -// HugoGeneratorInject injects a meta generator tag for Hugo if none present. -func HugoGeneratorInject(ct contentTransformer) { - if metaTagsCheck.Match(ct.Content()) { - if _, err := ct.Write(ct.Content()); err != nil { - helpers.DistinctWarnLog.Println("Failed to inject Hugo generator tag:", err) - } - return - } - - head := "<head>" - replace := []byte(fmt.Sprintf("%s\n\t%s", head, hugoGeneratorTag)) - newcontent := bytes.Replace(ct.Content(), []byte(head), replace, 1) - - if len(newcontent) == len(ct.Content()) { - head := "<HEAD>" - replace := []byte(fmt.Sprintf("%s\n\t%s", head, hugoGeneratorTag)) - newcontent = bytes.Replace(ct.Content(), []byte(head), replace, 1) - } - - if _, err := ct.Write(newcontent); err != nil { - helpers.DistinctWarnLog.Println("Failed to inject Hugo generator tag:", err) - } - -} diff --git a/transform/hugogeneratorinject_test.go b/transform/hugogeneratorinject_test.go deleted file mode 100644 index d37fea24..00000000 --- a/transform/hugogeneratorinject_test.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2016 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package transform - -import ( - "bytes" - "strings" - "testing" -) - -func TestHugoGeneratorInject(t *testing.T) { - hugoGeneratorTag = "META" - for i, this := range []struct { - in string - expect string - }{ - {`<head> - <foo /> -</head>`, `<head> - META - <foo /> -</head>`}, - {`<HEAD> - <foo /> -</HEAD>`, `<HEAD> - META - <foo /> -</HEAD>`}, - {`<head><meta name="generator" content="Jekyll" /></head>`, `<head><meta name="generator" content="Jekyll" /></head>`}, - {`<head><meta name='generator' content='Jekyll' /></head>`, `<head><meta name='generator' content='Jekyll' /></head>`}, - {`<head><meta name=generator content=Jekyll /></head>`, `<head><meta name=generator content=Jekyll /></head>`}, - {`<head><META NAME="GENERATOR" content="Jekyll" /></head>`, `<head><META NAME="GENERATOR" content="Jekyll" /></head>`}, - {"", ""}, - {"</head>", "</head>"}, - {"<head>", "<head>\n\tMETA"}, - } { - in := strings.NewReader(this.in) - out := new(bytes.Buffer) - - tr := NewChain(HugoGeneratorInject) - tr.Apply(out, in, []byte("")) - - if out.String() != this.expect { - t.Errorf("[%d] Expected \n%q got \n%q", i, this.expect, out.String()) - } - } - -} diff --git a/transform/livereloadinject.go b/transform/livereloadinject.go deleted file mode 100644 index 4efd0151..00000000 --- a/transform/livereloadinject.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2015 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package transform - -import ( - "bytes" - "fmt" -) - -// LiveReloadInject returns a function that can be used -// to inject a script tag for the livereload JavaScript in a HTML document. -func LiveReloadInject(port int) func(ct contentTransformer) { - return func(ct contentTransformer) { - endBodyTag := "</body>" - match := []byte(endBodyTag) - replaceTemplate := `<script data-no-instant>document.write('<script src="/livereload.js?port=%d&mindelay=10"></' + 'script>')</script>%s` - replace := []byte(fmt.Sprintf(replaceTemplate, port, endBodyTag)) - - newcontent := bytes.Replace(ct.Content(), match, replace, 1) - if len(newcontent) == len(ct.Content()) { - endBodyTag = "</BODY>" - replace := []byte(fmt.Sprintf(replaceTemplate, port, endBodyTag)) - match := []byte(endBodyTag) - newcontent = bytes.Replace(ct.Content(), match, replace, 1) - } - - ct.Write(newcontent) - } -} diff --git a/transform/livereloadinject/livereloadinject.go b/transform/livereloadinject/livereloadinject.go new file mode 100644 index 00000000..e04b977f --- /dev/null +++ b/transform/livereloadinject/livereloadinject.go @@ -0,0 +1,47 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package livereloadinject + +import ( + "bytes" + "fmt" + + "github.com/gohugoio/hugo/helpers" + "github.com/gohugoio/hugo/transform" +) + +// New creates a function that can be used +// to inject a script tag for the livereload JavaScript in a HTML document. +func New(port int) transform.Transformer { + return func(ft transform.FromTo) error { + b := ft.From().Bytes() + endBodyTag := "</body>" + match := []byte(endBodyTag) + replaceTemplate := `<script data-no-instant>document.write('<script src="/livereload.js?port=%d&mindelay=10"></' + 'script>')</script>%s` + replace := []byte(fmt.Sprintf(replaceTemplate, port, endBodyTag)) + + newcontent := bytes.Replace(b, match, replace, 1) + if len(newcontent) == len(b) { + endBodyTag = "</BODY>" + replace := []byte(fmt.Sprintf(replaceTemplate, port, endBodyTag)) + match := []byte(endBodyTag) + newcontent = bytes.Replace(b, match, replace, 1) + } + + if _, err := ft.To().Write(newcontent); err != nil { + helpers.DistinctWarnLog.Println("Failed to inject LiveReload script:", err) + } + return nil + } +} diff --git a/transform/livereloadinject/livereloadinject_test.go b/transform/livereloadinject/livereloadinject_test.go new file mode 100644 index 00000000..0e0f708d --- /dev/null +++ b/transform/livereloadinject/livereloadinject_test.go @@ -0,0 +1,41 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package livereloadinject + +import ( + "bytes" + "fmt" + "strings" + "testing" + + "github.com/gohugoio/hugo/transform" +) + +func TestLiveReloadInject(t *testing.T) { + doTestLiveReloadInject(t, "</body>") + doTestLiveReloadInject(t, "</BODY>") +} + +func doTestLiveReloadInject(t *testing.T, bodyEndTag string) { + out := new(bytes.Buffer) + in := strings.NewReader(bodyEndTag) + + tr := transform.New(New(1313)) + tr.Apply(out, in) + + expected := fmt.Sprintf(`<script data-no-instant>document.write('<script src="/livereload.js?port=1313&mindelay=10"></' + 'script>')</script>%s`, bodyEndTag) + if string(out.Bytes()) != expected { + t.Errorf("Expected %s got %s", expected, string(out.Bytes())) + } +} diff --git a/transform/livereloadinject_test.go b/transform/livereloadinject_test.go deleted file mode 100644 index 3337243b..00000000 --- a/transform/livereloadinject_test.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2015 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package transform - -import ( - "bytes" - "fmt" - "strings" - "testing" -) - -func TestLiveReloadInject(t *testing.T) { - doTestLiveReloadInject(t, "</body>") - doTestLiveReloadInject(t, "</BODY>") -} - -func doTestLiveReloadInject(t *testing.T, bodyEndTag string) { - out := new(bytes.Buffer) - in := strings.NewReader(bodyEndTag) - - tr := NewChain(LiveReloadInject(1313)) - tr.Apply(out, in, []byte("path")) - - expected := fmt.Sprintf(`<script data-no-instant>document.write('<script src="/livereload.js?port=1313&mindelay=10"></' + 'script>')</script>%s`, bodyEndTag) - if string(out.Bytes()) != expected { - t.Errorf("Expected %s got %s", expected, string(out.Bytes())) - } -} diff --git a/transform/metainject/hugogenerator.go b/transform/metainject/hugogenerator.go new file mode 100644 index 00000000..513b2122 --- /dev/null +++ b/transform/metainject/hugogenerator.go @@ -0,0 +1,54 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metainject + +import ( + "bytes" + "fmt" + "regexp" + + "github.com/gohugoio/hugo/helpers" + "github.com/gohugoio/hugo/transform" +) + +var metaTagsCheck = regexp.MustCompile(`(?i)<meta\s+name=['|"]?generator['|"]?`) +var hugoGeneratorTag = fmt.Sprintf(`<meta name="generator" content="Hugo %s" />`, helpers.CurrentHugoVersion) + +// HugoGenerator injects a meta generator tag for Hugo if none present. +func HugoGenerator(ft transform.FromTo) error { + b := ft.From().Bytes() + if metaTagsCheck.Match(b) { + if _, err := ft.To().Write(b); err != nil { + helpers.DistinctWarnLog.Println("Failed to inject Hugo generator tag:", err) + } + return nil + } + + head := "<head>" + replace := []byte(fmt.Sprintf("%s\n\t%s", head, hugoGeneratorTag)) + newcontent := bytes.Replace(b, []byte(head), replace, 1) + + if len(newcontent) == len(b) { + head := "<HEAD>" + replace := []byte(fmt.Sprintf("%s\n\t%s", head, hugoGeneratorTag)) + newcontent = bytes.Replace(b, []byte(head), replace, 1) + } + + if _, err := ft.To().Write(newcontent); err != nil { + helpers.DistinctWarnLog.Println("Failed to inject Hugo generator tag:", err) + } + + return nil + +} diff --git a/transform/metainject/hugogenerator_test.go b/transform/metainject/hugogenerator_test.go new file mode 100644 index 00000000..ffb4c142 --- /dev/null +++ b/transform/metainject/hugogenerator_test.go @@ -0,0 +1,61 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metainject + +import ( + "bytes" + "strings" + "testing" + + "github.com/gohugoio/hugo/transform" +) + +func TestHugoGeneratorInject(t *testing.T) { + hugoGeneratorTag = "META" + for i, this := range []struct { + in string + expect string + }{ + {`<head> + <foo /> +</head>`, `<head> + META + <foo /> +</head>`}, + {`<HEAD> + <foo /> +</HEAD>`, `<HEAD> + META + <foo /> +</HEAD>`}, + {`<head><meta name="generator" content="Jekyll" /></head>`, `<head><meta name="generator" content="Jekyll" /></head>`}, + {`<head><meta name='generator' content='Jekyll' /></head>`, `<head><meta name='generator' content='Jekyll' /></head>`}, + {`<head><meta name=generator content=Jekyll /></head>`, `<head><meta name=generator content=Jekyll /></head>`}, + {`<head><META NAME="GENERATOR" content="Jekyll" /></head>`, `<head><META NAME="GENERATOR" content="Jekyll" /></head>`}, + {"", ""}, + {"</head>", "</head>"}, + {"<head>", "<head>\n\tMETA"}, + } { + in := strings.NewReader(this.in) + out := new(bytes.Buffer) + + tr := transform.New(HugoGenerator) + tr.Apply(out, in) + + if out.String() != this.expect { + t.Errorf("[%d] Expected \n%q got \n%q", i, this.expect, out.String()) + } + } + +} diff --git a/transform/urlreplacers/absurl.go b/transform/urlreplacers/absurl.go new file mode 100644 index 00000000..029d94da --- /dev/null +++ b/transform/urlreplacers/absurl.go @@ -0,0 +1,36 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package urlreplacers + +import "github.com/gohugoio/hugo/transform" + +var ar = newAbsURLReplacer() + +// NewAbsURLTransformer replaces relative URLs with absolute ones +// in HTML files, using the baseURL setting. +func NewAbsURLTransformer(path string) transform.Transformer { + return func(ft transform.FromTo) error { + ar.replaceInHTML(path, ft) + return nil + } +} + +// NewAbsURLInXMLTransformer replaces relative URLs with absolute ones +// in XML files, using the baseURL setting. +func NewAbsURLInXMLTransformer(path string) transform.Transformer { + return func(ft transform.FromTo) error { + ar.replaceInXML(path, ft) + return nil + } +} diff --git a/transform/urlreplacers/absurlreplacer.go b/transform/urlreplacers/absurlreplacer.go new file mode 100644 index 00000000..1de6b0ca --- /dev/null +++ b/transform/urlreplacers/absurlreplacer.go @@ -0,0 +1,314 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package urlreplacers + +import ( + "bytes" + "io" + "unicode/utf8" + + "github.com/gohugoio/hugo/transform" +) + +type matchState int + +const ( + matchStateNone matchState = iota + matchStateWhitespace + matchStatePartial + matchStateFull +) + +type absurllexer struct { + // the source to absurlify + content []byte + // the target for the new absurlified content + w io.Writer + + // path may be set to a "." relative path + path []byte + + pos int // input position + start int // item start position + width int // width of last element + + matchers []absURLMatcher + + ms matchState + matches [3]bool // track matches of the 3 prefixes + idx int // last index in matches checked + +} + +type stateFunc func(*absurllexer) stateFunc + +// prefix is how to identify and which func to handle the replacement. +type prefix struct { + r []rune + f func(l *absurllexer) +} + +// new prefixes can be added below, but note: +// - the matches array above must be expanded. +// - the prefix must with the current logic end with '=' +var prefixes = []*prefix{ + {r: []rune{'s', 'r', 'c', '='}, f: checkCandidateBase}, + {r: []rune{'h', 'r', 'e', 'f', '='}, f: checkCandidateBase}, + {r: []rune{'s', 'r', 'c', 's', 'e', 't', '='}, f: checkCandidateSrcset}, +} + +type absURLMatcher struct { + match []byte + quote []byte +} + +// match check rune inside word. Will be != ' '. +func (l *absurllexer) match(r rune) { + + var found bool + + // note, the prefixes can start off on the same foot, i.e. + // src and srcset. + if l.ms == matchStateWhitespace { + l.idx = 0 + for j, p := range prefixes { + if r == p.r[l.idx] { + l.matches[j] = true + found = true + // checkMatchState will only return true when r=='=', so + // we can safely ignore the return value here. + l.checkMatchState(r, j) + } + } + + if !found { + l.ms = matchStateNone + } + + return + } + + l.idx++ + for j, m := range l.matches { + // still a match? + if m { + if prefixes[j].r[l.idx] == r { + found = true + if l.checkMatchState(r, j) { + return + } + } else { + l.matches[j] = false + } + } + } + + if !found { + l.ms = matchStateNone + } +} + +func (l *absurllexer) checkMatchState(r rune, idx int) bool { + if r == '=' { + l.ms = matchStateFull + for k := range l.matches { + if k != idx { + l.matches[k] = false + } + } + return true + } + + l.ms = matchStatePartial + + return false +} + +func (l *absurllexer) emit() { + l.w.Write(l.content[l.start:l.pos]) + l.start = l.pos +} + +// handle URLs in src and href. +func checkCandidateBase(l *absurllexer) { + for _, m := range l.matchers { + if !bytes.HasPrefix(l.content[l.pos:], m.match) { + continue + } + // check for schemaless URLs + posAfter := l.pos + len(m.match) + if posAfter >= len(l.content) { + return + } + r, _ := utf8.DecodeRune(l.content[posAfter:]) + if r == '/' { + // schemaless: skip + return + } + if l.pos > l.start { + l.emit() + } + l.pos += len(m.match) + l.w.Write(m.quote) + l.w.Write(l.path) + l.start = l.pos + } +} + +// handle URLs in srcset. +func checkCandidateSrcset(l *absurllexer) { + // special case, not frequent (me think) + for _, m := range l.matchers { + if !bytes.HasPrefix(l.content[l.pos:], m.match) { + continue + } + + // check for schemaless URLs + posAfter := l.pos + len(m.match) + if posAfter >= len(l.content) { + return + } + r, _ := utf8.DecodeRune(l.content[posAfter:]) + if r == '/' { + // schemaless: skip + continue + } + + posLastQuote := bytes.Index(l.content[l.pos+1:], m.quote) + + // safe guard + if posLastQuote < 0 || posLastQuote > 2000 { + return + } + + if l.pos > l.start { + l.emit() + } + + section := l.content[l.pos+len(m.quote) : l.pos+posLastQuote+1] + + fields := bytes.Fields(section) + l.w.Write(m.quote) + for i, f := range fields { + if f[0] == '/' { + l.w.Write(l.path) + l.w.Write(f[1:]) + + } else { + l.w.Write(f) + } + + if i < len(fields)-1 { + l.w.Write([]byte(" ")) + } + } + + l.w.Write(m.quote) + l.pos += len(section) + (len(m.quote) * 2) + l.start = l.pos + } +} + +// main loop +func (l *absurllexer) replace() { + contentLength := len(l.content) + var r rune + + for { + if l.pos >= contentLength { + l.width = 0 + break + } + + var width = 1 + r = rune(l.content[l.pos]) + if r >= utf8.RuneSelf { + r, width = utf8.DecodeRune(l.content[l.pos:]) + } + l.width = width + l.pos += l.width + if r == ' ' { + l.ms = matchStateWhitespace + } else if l.ms != matchStateNone { + l.match(r) + if l.ms == matchStateFull { + var p *prefix + for i, m := range l.matches { + if m { + p = prefixes[i] + l.matches[i] = false + } + } + l.ms = matchStateNone + p.f(l) + } + } + } + + // Done! + if l.pos > l.start { + l.emit() + } +} + +func doReplace(path string, ct transform.FromTo, matchers []absURLMatcher) { + + lexer := &absurllexer{ + content: ct.From().Bytes(), + w: ct.To(), + path: []byte(path), + matchers: matchers} + + lexer.replace() +} + +type absURLReplacer struct { + htmlMatchers []absURLMatcher + xmlMatchers []absURLMatcher +} + +func newAbsURLReplacer() *absURLReplacer { + + // HTML + dqHTMLMatch := []byte("\"/") + sqHTMLMatch := []byte("'/") + + // XML + dqXMLMatch := []byte(""/") + sqXMLMatch := []byte("'/") + + dqHTML := []byte("\"") + sqHTML := []byte("'") + + dqXML := []byte(""") + sqXML := []byte("'") + + return &absURLReplacer{ + htmlMatchers: []absURLMatcher{ + {dqHTMLMatch, dqHTML}, + {sqHTMLMatch, sqHTML}, + }, + xmlMatchers: []absURLMatcher{ + {dqXMLMatch, dqXML}, + {sqXMLMatch, sqXML}, + }} +} + +func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) { + doReplace(path, ct, au.htmlMatchers) +} + +func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) { + doReplace(path, ct, au.xmlMatchers) +} diff --git a/transform/urlreplacers/absurlreplacer_test.go b/transform/urlreplacers/absurlreplacer_test.go new file mode 100644 index 00000000..7a530862 --- /dev/null +++ b/transform/urlreplacers/absurlreplacer_test.go @@ -0,0 +1,223 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package urlreplacers + +import ( + "path/filepath" + "testing" + + bp "github.com/gohugoio/hugo/bufferpool" + + "github.com/gohugoio/hugo/helpers" + "github.com/gohugoio/hugo/transform" +) + +const ( + h5JsContentDoubleQuote = "<!DOCTYPE html><html><head><script src=\"foobar.js\"></script><script src=\"/barfoo.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"foobar\">foobar</a>. <a href=\"/foobar\">Follow up</a></article></body></html>" + h5JsContentSingleQuote = "<!DOCTYPE html><html><head><script src='foobar.js'></script><script src='/barfoo.js'></script></head><body><nav><h1>title</h1></nav><article>content <a href='foobar'>foobar</a>. <a href='/foobar'>Follow up</a></article></body></html>" + h5JsContentAbsURL = "<!DOCTYPE html><html><head><script src=\"http://user@host:10234/foobar.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"https://host/foobar\">foobar</a>. Follow up</article></body></html>" + h5JsContentAbsURLSchemaless = "<!DOCTYPE html><html><head><script src=\"//host/foobar.js\"></script><script src='//host2/barfoo.js'></head><body><nav><h1>title</h1></nav><article>content <a href=\"//host/foobar\">foobar</a>. <a href='//host2/foobar'>Follow up</a></article></body></html>" + corectOutputSrcHrefDq = "<!DOCTYPE html><html><head><script src=\"foobar.js\"></script><script src=\"http://base/barfoo.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"foobar\">foobar</a>. <a href=\"http://base/foobar\">Follow up</a></article></body></html>" + corectOutputSrcHrefSq = "<!DOCTYPE html><html><head><script src='foobar.js'></script><script src='http://base/barfoo.js'></script></head><body><nav><h1>title</h1></nav><article>content <a href='foobar'>foobar</a>. <a href='http://base/foobar'>Follow up</a></article></body></html>" + + h5XMLXontentAbsURL = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\"><p><a href="/foobar">foobar</a></p> <p>A video: <iframe src='/foo'></iframe></p></content></entry></feed>" + correctOutputSrcHrefInXML = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\"><p><a href="http://base/foobar">foobar</a></p> <p>A video: <iframe src='http://base/foo'></iframe></p></content></entry></feed>" + h5XMLContentGuarded = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\"><p><a href="//foobar">foobar</a></p> <p>A video: <iframe src='//foo'></iframe></p></content></entry></feed>" +) + +const ( + // additional sanity tests for replacements testing + replace1 = "No replacements." + replace2 = "ᚠᛇᚻ ᛒᛦᚦ ᚠᚱᚩᚠᚢᚱ\nᚠᛁᚱᚪ ᚷᛖᚻᚹᛦᛚᚳᚢᛗ" + replace3 = `End of file: src="/` + replace4 = `End of file: srcset="/` + replace5 = `Srcsett with no closing quote: srcset="/img/small.jpg do be do be do.` + + // Issue: 816, schemaless links combined with others + replaceSchemalessHTML = `Pre. src='//schemaless' src='/normal' <a href="//schemaless">Schemaless</a>. <a href="/normal">normal</a>. Post.` + replaceSchemalessHTMLCorrect = `Pre. src='//schemaless' src='http://base/normal' <a href="//schemaless">Schemaless</a>. <a href="http://base/normal">normal</a>. Post.` + replaceSchemalessXML = `Pre. src='//schemaless' src='/normal' <a href='//schemaless'>Schemaless</a>. <a href='/normal'>normal</a>. Post.` + replaceSchemalessXMLCorrect = `Pre. src='//schemaless' src='http://base/normal' <a href='//schemaless'>Schemaless</a>. <a href='http://base/normal'>normal</a>. Post.` +) + +const ( + // srcset= + srcsetBasic = `Pre. <img srcset="/img/small.jpg 200w, /img/medium.jpg 300w, /img/big.jpg 700w" alt="text" src="/img/foo.jpg">` + srcsetBasicCorrect = `Pre. <img srcset="http://base/img/small.jpg 200w, http://base/img/medium.jpg 300w, http://base/img/big.jpg 700w" alt="text" src="http://base/img/foo.jpg">` + srcsetSingleQuote = `Pre. <img srcset='/img/small.jpg 200w, /img/big.jpg 700w' alt="text" src="/img/foo.jpg"> POST.` + srcsetSingleQuoteCorrect = `Pre. <img srcset='http://base/img/small.jpg 200w, http://base/img/big.jpg 700w' alt="text" src="http://base/img/foo.jpg"> POST.` + srcsetXMLBasic = `Pre. <img srcset="/img/small.jpg 200w, /img/big.jpg 700w" alt="text" src="/img/foo.jpg">` + srcsetXMLBasicCorrect = `Pre. <img srcset="http://base/img/small.jpg 200w, http://base/img/big.jpg 700w" alt="text" src="http://base/img/foo.jpg">` + srcsetXMLSingleQuote = `Pre. <img srcset="/img/small.jpg 200w, /img/big.jpg 700w" alt="text" src="/img/foo.jpg">` + srcsetXMLSingleQuoteCorrect = `Pre. <img srcset="http://base/img/small.jpg 200w, http://base/img/big.jpg 700w" alt="text" src="http://base/img/foo.jpg">` + srcsetVariations = `Pre. +Missing start quote: <img srcset=/img/small.jpg 200w, /img/big.jpg 700w" alt="text"> src='/img/foo.jpg'> FOO. +<img srcset='/img.jpg'> +schemaless: <img srcset='//img.jpg' src='//basic.jpg'> +schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST +` +) + +const ( + srcsetVariationsCorrect = `Pre. +Missing start quote: <img srcset=/img/small.jpg 200w, /img/big.jpg 700w" alt="text"> src='http://base/img/foo.jpg'> FOO. +<img srcset='http://base/img.jpg'> +schemaless: <img srcset='//img.jpg' src='//basic.jpg'> +schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST +` + srcsetXMLVariations = `Pre. +Missing start quote: <img srcset=/img/small.jpg 200w /img/big.jpg 700w" alt="text"> src='/img/foo.jpg'> FOO. +<img srcset='/img.jpg'> +schemaless: <img srcset='//img.jpg' src='//basic.jpg'> +schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST +` + srcsetXMLVariationsCorrect = `Pre. +Missing start quote: <img srcset=/img/small.jpg 200w /img/big.jpg 700w" alt="text"> src='http://base/img/foo.jpg'> FOO. +<img srcset='http://base/img.jpg'> +schemaless: <img srcset='//img.jpg' src='//basic.jpg'> +schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST +` + + relPathVariations = `PRE. a href="/img/small.jpg" POST.` + relPathVariationsCorrect = `PRE. a href="../../img/small.jpg" POST.` + + testBaseURL = "http://base/" +) + +var ( + absURLlBenchTests = []test{ + {h5JsContentDoubleQuote, corectOutputSrcHrefDq}, + {h5JsContentSingleQuote, corectOutputSrcHrefSq}, + {h5JsContentAbsURL, h5JsContentAbsURL}, + {h5JsContentAbsURLSchemaless, h5JsContentAbsURLSchemaless}, + } + + xmlAbsURLBenchTests = []test{ + {h5XMLXontentAbsURL, correctOutputSrcHrefInXML}, + {h5XMLContentGuarded, h5XMLContentGuarded}, + } + + sanityTests = []test{{replace1, replace1}, {replace2, replace2}, {replace3, replace3}, {replace3, replace3}, {replace5, replace5}} + extraTestsHTML = []test{{replaceSchemalessHTML, replaceSchemalessHTMLCorrect}} + absURLTests = append(absURLlBenchTests, append(sanityTests, extraTestsHTML...)...) + extraTestsXML = []test{{replaceSchemalessXML, replaceSchemalessXMLCorrect}} + xmlAbsURLTests = append(xmlAbsURLBenchTests, append(sanityTests, extraTestsXML...)...) + srcsetTests = []test{{srcsetBasic, srcsetBasicCorrect}, {srcsetSingleQuote, srcsetSingleQuoteCorrect}, {srcsetVariations, srcsetVariationsCorrect}} + srcsetXMLTests = []test{ + {srcsetXMLBasic, srcsetXMLBasicCorrect}, + {srcsetXMLSingleQuote, srcsetXMLSingleQuoteCorrect}, + {srcsetXMLVariations, srcsetXMLVariationsCorrect}} + + relurlTests = []test{{relPathVariations, relPathVariationsCorrect}} +) + +func BenchmarkAbsURL(b *testing.B) { + tr := transform.New(NewAbsURLTransformer(testBaseURL)) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + apply(b.Errorf, tr, absURLlBenchTests) + } +} + +func BenchmarkAbsURLSrcset(b *testing.B) { + tr := transform.New(NewAbsURLTransformer(testBaseURL)) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + apply(b.Errorf, tr, srcsetTests) + } +} + +func BenchmarkXMLAbsURLSrcset(b *testing.B) { + tr := transform.New(NewAbsURLInXMLTransformer(testBaseURL)) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + apply(b.Errorf, tr, srcsetXMLTests) + } +} + +func TestAbsURL(t *testing.T) { + tr := transform.New(NewAbsURLTransformer(testBaseURL)) + + apply(t.Errorf, tr, absURLTests) + +} + +func TestRelativeURL(t *testing.T) { + tr := transform.New(NewAbsURLTransformer(helpers.GetDottedRelativePath(filepath.FromSlash("/post/sub/")))) + + applyWithPath(t.Errorf, tr, relurlTests) + +} + +func TestAbsURLSrcSet(t *testing.T) { + tr := transform.New(NewAbsURLTransformer(testBaseURL)) + + apply(t.Errorf, tr, srcsetTests) +} + +func TestAbsXMLURLSrcSet(t *testing.T) { + tr := transform.New(NewAbsURLInXMLTransformer(testBaseURL)) + + apply(t.Errorf, tr, srcsetXMLTests) +} + +func BenchmarkXMLAbsURL(b *testing.B) { + tr := transform.New(NewAbsURLInXMLTransformer("")) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + apply(b.Errorf, tr, xmlAbsURLBenchTests) + } +} + +func TestXMLAbsURL(t *testing.T) { + tr := transform.New(NewAbsURLInXMLTransformer(testBaseURL)) + apply(t.Errorf, tr, xmlAbsURLTests) +} + +func apply(ef errorf, tr transform.Chain, tests []test) { + applyWithPath(ef, tr, tests) +} + +func applyWithPath(ef errorf, tr transform.Chain, tests []test) { + out := bp.GetBuffer() + defer bp.PutBuffer(out) + + in := bp.GetBuffer() + defer bp.PutBuffer(in) + + for _, test := range tests { + var err error + in.WriteString(test.content) + err = tr.Apply(out, in) + if err != nil { + ef("Unexpected error: %s", err) + } + if test.expected != out.String() { + ef("Expected:\n%s\nGot:\n%s", test.expected, out.String()) + } + out.Reset() + in.Reset() + } +} + +type test struct { + content string + expected string +} + +type errorf func(string, ...interface{})