From a5744697971d296eb973e04e4259fe9e516b908f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Sun, 23 Dec 2018 10:40:32 +0100 Subject: [PATCH] Add CSV support to transform.Unmarshal Fixes #5555 --- commands/convert.go | 2 +- commands/hugo.go | 2 +- commands/import_jekyll.go | 2 +- config/configLoader.go | 4 +- go.sum | 1 - hugolib/config.go | 2 +- hugolib/page_content.go | 2 +- hugolib/resource_chain_test.go | 10 +++- hugolib/site.go | 2 +- parser/metadecoders/decoder.go | 64 +++++++++++++++++--- parser/metadecoders/decoder_test.go | 10 +++- parser/metadecoders/format.go | 10 +++- parser/metadecoders/format_test.go | 3 +- tpl/transform/remarshal.go | 4 +- tpl/transform/unmarshal.go | 93 +++++++++++++++++++++++++++-- tpl/transform/unmarshal_test.go | 71 +++++++++++++++++----- 16 files changed, 238 insertions(+), 44 deletions(-) diff --git a/commands/convert.go b/commands/convert.go index 74925f83..c4f88a24 100644 --- a/commands/convert.go +++ b/commands/convert.go @@ -238,7 +238,7 @@ func parseContentFile(r io.Reader) (parsedFile, error) { iter.PeekWalk(walkFn) - metadata, err := metadecoders.UnmarshalToMap(pf.frontMatterSource, pf.frontMatterFormat) + metadata, err := metadecoders.Default.UnmarshalToMap(pf.frontMatterSource, pf.frontMatterFormat) if err != nil { return pf, err } diff --git a/commands/hugo.go b/commands/hugo.go index 74173fa8..b9435683 100644 --- a/commands/hugo.go +++ b/commands/hugo.go @@ -1045,7 +1045,7 @@ func (c *commandeer) isThemeVsHugoVersionMismatch(fs afero.Fs) (dir string, mism b, err := afero.ReadFile(fs, path) - tomlMeta, err := metadecoders.UnmarshalToMap(b, metadecoders.TOML) + tomlMeta, err := metadecoders.Default.UnmarshalToMap(b, metadecoders.TOML) if err != nil { continue diff --git a/commands/import_jekyll.go b/commands/import_jekyll.go index 6a708ac0..d3301b48 100644 --- a/commands/import_jekyll.go +++ b/commands/import_jekyll.go @@ -257,7 +257,7 @@ func (i *importCmd) loadJekyllConfig(fs afero.Fs, jekyllRoot string) map[string] return nil } - c, err := metadecoders.UnmarshalToMap(b, metadecoders.YAML) + c, err := metadecoders.Default.UnmarshalToMap(b, metadecoders.YAML) if err != nil { return nil diff --git a/config/configLoader.go b/config/configLoader.go index b60aa3fe..31e3e00e 100644 --- a/config/configLoader.go +++ b/config/configLoader.go @@ -57,7 +57,7 @@ func FromFileToMap(fs afero.Fs, filename string) (map[string]interface{}, error) } func readConfig(format metadecoders.Format, data []byte) (map[string]interface{}, error) { - m, err := metadecoders.UnmarshalToMap(data, format) + m, err := metadecoders.Default.UnmarshalToMap(data, format) if err != nil { return nil, err } @@ -69,7 +69,7 @@ func readConfig(format metadecoders.Format, data []byte) (map[string]interface{} } func loadConfigFromFile(fs afero.Fs, filename string) (map[string]interface{}, error) { - m, err := metadecoders.UnmarshalFileToMap(fs, filename) + m, err := metadecoders.Default.UnmarshalFileToMap(fs, filename) if err != nil { return nil, err } diff --git a/go.sum b/go.sum index ea33a1ab..f7cfa6da 100644 --- a/go.sum +++ b/go.sum @@ -72,7 +72,6 @@ github.com/magefile/mage v1.4.0 h1:RI7B1CgnPAuu2O9lWszwya61RLmfL0KCdo+QyyI/Bhk= github.com/magefile/mage v1.4.0/go.mod h1:IUDi13rsHje59lecXokTfGX0QIzO45uVPlXnJYsXepA= github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= -github.com/markbates/inflect v0.0.0-20171215194931-a12c3aec81a6 h1:LZhVjIISSbj8qLf2qDPP0D8z0uvOWAW5C85ly5mJW6c= github.com/markbates/inflect v0.0.0-20171215194931-a12c3aec81a6/go.mod h1:oTeZL2KHA7CUX6X+fovmK9OvIOFuqu0TwdQrZjLTh88= github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2/go.mod h1:0KeJpeMD6o+O4hW7qJOT7vyQPKrWmj26uf5wMc/IiIs= github.com/mattn/go-isatty v0.0.4 h1:bnP0vzxcAdeI1zdubAl5PjU6zsERjGZb7raWodagDYs= diff --git a/hugolib/config.go b/hugolib/config.go index 3a452d5f..f71881e2 100644 --- a/hugolib/config.go +++ b/hugolib/config.go @@ -285,7 +285,7 @@ func (l configLoader) loadConfigFromConfigDir(v *viper.Viper) ([]string, error) name := helpers.Filename(filepath.Base(path)) - item, err := metadecoders.UnmarshalFileToMap(sourceFs, path) + item, err := metadecoders.Default.UnmarshalFileToMap(sourceFs, path) if err != nil { return l.wrapFileError(err, path) } diff --git a/hugolib/page_content.go b/hugolib/page_content.go index af13d8a3..924400ae 100644 --- a/hugolib/page_content.go +++ b/hugolib/page_content.go @@ -91,7 +91,7 @@ Loop: result.Write(it.Val) case it.IsFrontMatter(): f := metadecoders.FormatFromFrontMatterType(it.Type) - m, err := metadecoders.UnmarshalToMap(it.Val, f) + m, err := metadecoders.Default.UnmarshalToMap(it.Val, f) if err != nil { if fe, ok := err.(herrors.FileError); ok { return herrors.ToFileErrorWithOffset(fe, iter.LineNumber()-1) diff --git a/hugolib/resource_chain_test.go b/hugolib/resource_chain_test.go index 74129dc1..e3123952 100644 --- a/hugolib/resource_chain_test.go +++ b/hugolib/resource_chain_test.go @@ -342,11 +342,19 @@ Publish 2: {{ $cssPublish2.Permalink }} {"unmarshal", func() bool { return true }, func(b *sitesBuilder) { b.WithTemplates("home.html", ` {{ $toml := "slogan = \"Hugo Rocks!\"" | resources.FromString "slogan.toml" | transform.Unmarshal }} +{{ $csv1 := "\"Hugo Rocks\",\"Hugo is Fast!\"" | resources.FromString "slogans.csv" | transform.Unmarshal }} +{{ $csv2 := "a;b;c" | resources.FromString "abc.csv" | transform.Unmarshal (dict "csvComma" ";") }} + Slogan: {{ $toml.slogan }} +CSV1: {{ $csv1 }} {{ len (index $csv1 0) }} +CSV2: {{ $csv2 }} `) }, func(b *sitesBuilder) { - b.AssertFileContent("public/index.html", `Slogan: Hugo Rocks!`) + b.AssertFileContent("public/index.html", + `Slogan: Hugo Rocks!`, + `[[Hugo Rocks Hugo is Fast!]] 2`, + ) }}, {"template", func() bool { return true }, func(b *sitesBuilder) {}, func(b *sitesBuilder) { diff --git a/hugolib/site.go b/hugolib/site.go index 2fd9f17e..c6d203d8 100644 --- a/hugolib/site.go +++ b/hugolib/site.go @@ -1014,7 +1014,7 @@ func (s *Site) readData(f source.ReadableFile) (interface{}, error) { content := helpers.ReaderToBytes(file) format := metadecoders.FormatFromString(f.Extension()) - return metadecoders.Unmarshal(content, format) + return metadecoders.Default.Unmarshal(content, format) } func (s *Site) readDataFromSourceFS() error { diff --git a/parser/metadecoders/decoder.go b/parser/metadecoders/decoder.go index 6da791c7..0ca8575f 100644 --- a/parser/metadecoders/decoder.go +++ b/parser/metadecoders/decoder.go @@ -14,6 +14,8 @@ package metadecoders import ( + "bytes" + "encoding/csv" "encoding/json" "fmt" @@ -27,22 +29,37 @@ import ( yaml "gopkg.in/yaml.v2" ) +// Decoder provides some configuration options for the decoders. +type Decoder struct { + // Comma is the field delimiter used in the CSV decoder. It defaults to ','. + Comma rune + + // Comment, if not 0, is the comment character ued in the CSV decoder. Lines beginning with the + // Comment character without preceding whitespace are ignored. + Comment rune +} + +// Default is a Decoder in its default configuration. +var Default = Decoder{ + Comma: ',', +} + // UnmarshalToMap will unmarshall data in format f into a new map. This is // what's needed for Hugo's front matter decoding. -func UnmarshalToMap(data []byte, f Format) (map[string]interface{}, error) { +func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]interface{}, error) { m := make(map[string]interface{}) if data == nil { return m, nil } - err := unmarshal(data, f, &m) + err := d.unmarshal(data, f, &m) return m, err } // UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from // the given filename. -func UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, error) { +func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, error) { format := FormatFromString(filename) if format == "" { return nil, errors.Errorf("%q is not a valid configuration format", filename) @@ -52,23 +69,29 @@ func UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, e if err != nil { return nil, err } - return UnmarshalToMap(data, format) + return d.UnmarshalToMap(data, format) } // Unmarshal will unmarshall data in format f into an interface{}. // This is what's needed for Hugo's /data handling. -func Unmarshal(data []byte, f Format) (interface{}, error) { +func (d Decoder) Unmarshal(data []byte, f Format) (interface{}, error) { if data == nil { - return make(map[string]interface{}), nil + switch f { + case CSV: + return make([][]string, 0), nil + default: + return make(map[string]interface{}), nil + } + } var v interface{} - err := unmarshal(data, f, &v) + err := d.unmarshal(data, f, &v) return v, err } // unmarshal unmarshals data in format f into v. -func unmarshal(data []byte, f Format, v interface{}) error { +func (d Decoder) unmarshal(data []byte, f Format, v interface{}) error { var err error @@ -116,6 +139,9 @@ func unmarshal(data []byte, f Format, v interface{}) error { *v.(*interface{}) = mm } } + case CSV: + return d.unmarshalCSV(data, v) + default: return errors.Errorf("unmarshal of format %q is not supported", f) } @@ -128,6 +154,28 @@ func unmarshal(data []byte, f Format, v interface{}) error { } +func (d Decoder) unmarshalCSV(data []byte, v interface{}) error { + r := csv.NewReader(bytes.NewReader(data)) + r.Comma = d.Comma + r.Comment = d.Comment + + records, err := r.ReadAll() + if err != nil { + return err + } + + switch v.(type) { + case *interface{}: + *v.(*interface{}) = records + default: + return errors.Errorf("CSV cannot be unmarshaled into %T", v) + + } + + return nil + +} + func toFileError(f Format, err error) error { return herrors.ToFileError(string(f), err) } diff --git a/parser/metadecoders/decoder_test.go b/parser/metadecoders/decoder_test.go index 94cfd5a9..38d002dd 100644 --- a/parser/metadecoders/decoder_test.go +++ b/parser/metadecoders/decoder_test.go @@ -26,6 +26,8 @@ func TestUnmarshalToMap(t *testing.T) { expect := map[string]interface{}{"a": "b"} + d := Default + for i, test := range []struct { data string format Format @@ -40,9 +42,10 @@ func TestUnmarshalToMap(t *testing.T) { {`#+a: b`, ORG, expect}, // errors {`a = b`, TOML, false}, + {`a,b,c`, CSV, false}, // Use Unmarshal for CSV } { msg := fmt.Sprintf("%d: %s", i, test.format) - m, err := UnmarshalToMap([]byte(test.data), test.format) + m, err := d.UnmarshalToMap([]byte(test.data), test.format) if b, ok := test.expect.(bool); ok && !b { assert.Error(err, msg) } else { @@ -57,6 +60,8 @@ func TestUnmarshalToInterface(t *testing.T) { expect := map[string]interface{}{"a": "b"} + d := Default + for i, test := range []struct { data string format Format @@ -67,12 +72,13 @@ func TestUnmarshalToInterface(t *testing.T) { {`#+a: b`, ORG, expect}, {`a = "b"`, TOML, expect}, {`a: "b"`, YAML, expect}, + {`a,b,c`, CSV, [][]string{[]string{"a", "b", "c"}}}, {"a: Easy!\nb:\n c: 2\n d: [3, 4]", YAML, map[string]interface{}{"a": "Easy!", "b": map[string]interface{}{"c": 2, "d": []interface{}{3, 4}}}}, // errors {`a = "`, TOML, false}, } { msg := fmt.Sprintf("%d: %s", i, test.format) - m, err := Unmarshal([]byte(test.data), test.format) + m, err := d.Unmarshal([]byte(test.data), test.format) if b, ok := test.expect.(bool); ok && !b { assert.Error(err, msg) } else { diff --git a/parser/metadecoders/format.go b/parser/metadecoders/format.go index 4a30898f..719fbf10 100644 --- a/parser/metadecoders/format.go +++ b/parser/metadecoders/format.go @@ -31,6 +31,7 @@ const ( JSON Format = "json" TOML Format = "toml" YAML Format = "yaml" + CSV Format = "csv" ) // FormatFromString turns formatStr, typically a file extension without any ".", @@ -51,6 +52,8 @@ func FormatFromString(formatStr string) Format { return TOML case "org": return ORG + case "csv": + return CSV } return "" @@ -88,11 +91,16 @@ func FormatFromFrontMatterType(typ pageparser.ItemType) Format { // FormatFromContentString tries to detect the format (JSON, YAML or TOML) // in the given string. // It return an empty string if no format could be detected. -func FormatFromContentString(data string) Format { +func (d Decoder) FormatFromContentString(data string) Format { + csvIdx := strings.IndexRune(data, d.Comma) jsonIdx := strings.Index(data, "{") yamlIdx := strings.Index(data, ":") tomlIdx := strings.Index(data, "=") + if isLowerIndexThan(csvIdx, jsonIdx, yamlIdx, tomlIdx) { + return CSV + } + if isLowerIndexThan(jsonIdx, yamlIdx, tomlIdx) { return JSON } diff --git a/parser/metadecoders/format_test.go b/parser/metadecoders/format_test.go index 6243b3f1..7794843b 100644 --- a/parser/metadecoders/format_test.go +++ b/parser/metadecoders/format_test.go @@ -88,12 +88,13 @@ func TestFormatFromContentString(t *testing.T) { {`foo: "bar"`, YAML}, {`foo:"bar"`, YAML}, {`{ "foo": "bar"`, JSON}, + {`a,b,c"`, CSV}, {`asdfasdf`, Format("")}, {``, Format("")}, } { errMsg := fmt.Sprintf("[%d] %s", i, test.data) - result := FormatFromContentString(test.data) + result := Default.FormatFromContentString(test.data) assert.Equal(test.expect, result, errMsg) } diff --git a/tpl/transform/remarshal.go b/tpl/transform/remarshal.go index 144964f0..62d826b4 100644 --- a/tpl/transform/remarshal.go +++ b/tpl/transform/remarshal.go @@ -35,12 +35,12 @@ func (ns *Namespace) Remarshal(format string, data interface{}) (string, error) return "", err } - fromFormat := metadecoders.FormatFromContentString(from) + fromFormat := metadecoders.Default.FormatFromContentString(from) if fromFormat == "" { return "", errors.New("failed to detect format from content") } - meta, err := metadecoders.UnmarshalToMap([]byte(from), fromFormat) + meta, err := metadecoders.Default.UnmarshalToMap([]byte(from), fromFormat) var result bytes.Buffer if err := parser.InterfaceToConfig(meta, mark, &result); err != nil { diff --git a/tpl/transform/unmarshal.go b/tpl/transform/unmarshal.go index bf7db892..d83cafd3 100644 --- a/tpl/transform/unmarshal.go +++ b/tpl/transform/unmarshal.go @@ -15,8 +15,10 @@ package transform import ( "io/ioutil" + "strings" "github.com/gohugoio/hugo/common/hugio" + "github.com/mitchellh/mapstructure" "github.com/gohugoio/hugo/helpers" "github.com/gohugoio/hugo/parser/metadecoders" @@ -27,8 +29,33 @@ import ( ) // Unmarshal unmarshals the data given, which can be either a string -// or a Resource. Supported formats are JSON, TOML and YAML. -func (ns *Namespace) Unmarshal(data interface{}) (interface{}, error) { +// or a Resource. Supported formats are JSON, TOML, YAML, and CSV. +// You can optional provide an Options object as the first argument. +func (ns *Namespace) Unmarshal(args ...interface{}) (interface{}, error) { + if len(args) < 1 || len(args) > 2 { + return nil, errors.New("unmarshal takes 1 or 2 arguments") + } + + var data interface{} + var decoder = metadecoders.Default + + if len(args) == 1 { + data = args[0] + } else { + m, ok := args[0].(map[string]interface{}) + if !ok { + return nil, errors.New("first argument must be a map") + } + + var err error + + data = args[1] + decoder, err = decodeDecoder(m) + if err != nil { + return nil, errors.WithMessage(err, "failed to decode options") + } + + } // All the relevant Resource types implements ReadSeekCloserResource, // which should be the most effective way to get the content. @@ -75,7 +102,7 @@ func (ns *Namespace) Unmarshal(data interface{}) (interface{}, error) { return nil, err } - return metadecoders.Unmarshal(b, f) + return decoder.Unmarshal(b, f) }) } @@ -88,11 +115,67 @@ func (ns *Namespace) Unmarshal(data interface{}) (interface{}, error) { key := helpers.MD5String(dataStr) return ns.cache.GetOrCreate(key, func() (interface{}, error) { - f := metadecoders.FormatFromContentString(dataStr) + f := decoder.FormatFromContentString(dataStr) if f == "" { return nil, errors.New("unknown format") } - return metadecoders.Unmarshal([]byte(dataStr), f) + return decoder.Unmarshal([]byte(dataStr), f) }) } + +func decodeDecoder(m map[string]interface{}) (metadecoders.Decoder, error) { + opts := metadecoders.Default + + if m == nil { + return opts, nil + } + + // mapstructure does not support string to rune conversion, so do that manually. + // See https://github.com/mitchellh/mapstructure/issues/151 + for k, v := range m { + if strings.EqualFold(k, "Comma") { + r, err := stringToRune(v) + if err != nil { + return opts, err + } + opts.Comma = r + delete(m, k) + + } else if strings.EqualFold(k, "Comment") { + r, err := stringToRune(v) + if err != nil { + return opts, err + } + opts.Comment = r + delete(m, k) + } + } + + err := mapstructure.WeakDecode(m, &opts) + + return opts, err +} + +func stringToRune(v interface{}) (rune, error) { + s, err := cast.ToStringE(v) + if err != nil { + return 0, err + } + + if len(s) == 0 { + return 0, nil + } + + var r rune + + for i, rr := range s { + if i == 0 { + r = rr + } else { + return 0, errors.Errorf("invalid character: %q", v) + } + } + + return r, nil +} diff --git a/tpl/transform/unmarshal_test.go b/tpl/transform/unmarshal_test.go index 77e14eda..00424c69 100644 --- a/tpl/transform/unmarshal_test.go +++ b/tpl/transform/unmarshal_test.go @@ -89,38 +89,74 @@ func TestUnmarshal(t *testing.T) { } for i, test := range []struct { - data interface{} - expect interface{} + data interface{} + options interface{} + expect interface{} }{ - {`{ "slogan": "Hugo Rocks!" }`, func(m map[string]interface{}) { + {`{ "slogan": "Hugo Rocks!" }`, nil, func(m map[string]interface{}) { assertSlogan(m) }}, - {`slogan: "Hugo Rocks!"`, func(m map[string]interface{}) { + {`slogan: "Hugo Rocks!"`, nil, func(m map[string]interface{}) { assertSlogan(m) }}, - {`slogan = "Hugo Rocks!"`, func(m map[string]interface{}) { + {`slogan = "Hugo Rocks!"`, nil, func(m map[string]interface{}) { assertSlogan(m) }}, - {testContentResource{content: `slogan: "Hugo Rocks!"`, mime: media.YAMLType}, func(m map[string]interface{}) { + {testContentResource{content: `slogan: "Hugo Rocks!"`, mime: media.YAMLType}, nil, func(m map[string]interface{}) { assertSlogan(m) }}, - {testContentResource{content: `{ "slogan": "Hugo Rocks!" }`, mime: media.JSONType}, func(m map[string]interface{}) { + {testContentResource{content: `{ "slogan": "Hugo Rocks!" }`, mime: media.JSONType}, nil, func(m map[string]interface{}) { assertSlogan(m) }}, - {testContentResource{content: `slogan = "Hugo Rocks!"`, mime: media.TOMLType}, func(m map[string]interface{}) { + {testContentResource{content: `slogan = "Hugo Rocks!"`, mime: media.TOMLType}, nil, func(m map[string]interface{}) { assertSlogan(m) }}, + {testContentResource{content: `1997,Ford,E350,"ac, abs, moon",3000.00 +1999,Chevy,"Venture ""Extended Edition""","",4900.00`, mime: media.CSVType}, nil, func(r [][]string) { + assert.Equal(2, len(r)) + first := r[0] + assert.Equal(5, len(first)) + assert.Equal("Ford", first[1]) + }}, + {testContentResource{content: `a;b;c`, mime: media.CSVType}, map[string]interface{}{"comma": ";"}, func(r [][]string) { + assert.Equal(r, [][]string{[]string{"a", "b", "c"}}) + + }}, + {"a,b,c", nil, func(r [][]string) { + assert.Equal(r, [][]string{[]string{"a", "b", "c"}}) + + }}, + {"a;b;c", map[string]interface{}{"comma": ";"}, func(r [][]string) { + assert.Equal(r, [][]string{[]string{"a", "b", "c"}}) + + }}, + {testContentResource{content: ` +% This is a comment +a;b;c`, mime: media.CSVType}, map[string]interface{}{"CommA": ";", "Comment": "%"}, func(r [][]string) { + assert.Equal(r, [][]string{[]string{"a", "b", "c"}}) + + }}, // errors - {"thisisnotavaliddataformat", false}, - {testContentResource{content: `invalid&toml"`, mime: media.TOMLType}, false}, - {testContentResource{content: `unsupported: MIME"`, mime: media.CalendarType}, false}, - {"thisisnotavaliddataformat", false}, - {`{ notjson }`, false}, - {tstNoStringer{}, false}, + {"thisisnotavaliddataformat", nil, false}, + {testContentResource{content: `invalid&toml"`, mime: media.TOMLType}, nil, false}, + {testContentResource{content: `unsupported: MIME"`, mime: media.CalendarType}, nil, false}, + {"thisisnotavaliddataformat", nil, false}, + {`{ notjson }`, nil, false}, + {tstNoStringer{}, nil, false}, } { errMsg := fmt.Sprintf("[%d]", i) - result, err := ns.Unmarshal(test.data) + ns.cache.Clear() + + var args []interface{} + + if test.options != nil { + args = []interface{}{test.options, test.data} + } else { + args = []interface{}{test.data} + } + + result, err := ns.Unmarshal(args...) if b, ok := test.expect.(bool); ok && !b { assert.Error(err, errMsg) @@ -129,6 +165,11 @@ func TestUnmarshal(t *testing.T) { m, ok := result.(map[string]interface{}) assert.True(ok, errMsg) fn(m) + } else if fn, ok := test.expect.(func(r [][]string)); ok { + assert.NoError(err, errMsg) + r, ok := result.([][]string) + assert.True(ok, errMsg) + fn(r) } else { assert.NoError(err, errMsg) assert.Equal(test.expect, result, errMsg) -- 2.30.2