From c96643250c7294e7bd1e4f4a037f788e438306eb Mon Sep 17 00:00:00 2001 From: emilis Date: Wed, 3 Aug 2022 15:20:03 +0100 Subject: [PATCH] wip chunker i gotta go faste bye --- pkg/jsonld/unmarshal.go | 1 - pkg/ld/internal/consts/consts.go | 5 + pkg/ld/internal/parse/chunk/chunk.go | 223 ++++++++++++++++++++++ pkg/ld/internal/parse/chunk/chunk_test.go | 37 ++++ pkg/ld/internal/parse/parse.go | 73 +++++++ pkg/{jsonld => ld}/jsonld.go | 6 +- pkg/ld/unmarshal.go | 1 + pkg/{jsonld => ld}/unmarshal_test.go | 2 +- 8 files changed, 345 insertions(+), 3 deletions(-) delete mode 100644 pkg/jsonld/unmarshal.go create mode 100644 pkg/ld/internal/consts/consts.go create mode 100644 pkg/ld/internal/parse/chunk/chunk.go create mode 100644 pkg/ld/internal/parse/chunk/chunk_test.go create mode 100644 pkg/ld/internal/parse/parse.go rename pkg/{jsonld => ld}/jsonld.go (68%) create mode 100644 pkg/ld/unmarshal.go rename pkg/{jsonld => ld}/unmarshal_test.go (99%) diff --git a/pkg/jsonld/unmarshal.go b/pkg/jsonld/unmarshal.go deleted file mode 100644 index b78077e..0000000 --- a/pkg/jsonld/unmarshal.go +++ /dev/null @@ -1 +0,0 @@ -package asld diff --git a/pkg/ld/internal/consts/consts.go b/pkg/ld/internal/consts/consts.go new file mode 100644 index 0000000..392cd81 --- /dev/null +++ b/pkg/ld/internal/consts/consts.go @@ -0,0 +1,5 @@ +package consts + +const ( + PkgTag = "ld" +) diff --git a/pkg/ld/internal/parse/chunk/chunk.go b/pkg/ld/internal/parse/chunk/chunk.go new file mode 100644 index 0000000..f65a921 --- /dev/null +++ b/pkg/ld/internal/parse/chunk/chunk.go @@ -0,0 +1,223 @@ +package chunk + +import ( + "errors" + "fmt" + "unicode" + "unicode/utf8" + + "sectorinf.com/emilis/flabk/pkg/coll" +) + +var ( + ErrNoMatch = errors.New("no matching") + ErrMapMemberNotString = errors.New("map member not a string") + ErrUnexpectedSymbol = errors.New("unexpected symbol") +) + +type Chunk struct { + vector coll.Vector[byte] + posLeft int + posRight int + // todo global pos impl + globLeft int + globRight int +} + +// Match finds the matching closer to the symbol at the left position and sets the +// right position to this index +func (c Chunk) Match() (Chunk, error) { + s, ok := matchers[c.vector[c.posLeft]] + if !ok { + panic(fmt.Sprintf("Match called on %c with no matcher defined", c.vector[c.posLeft])) + } + if s.StartFromRight { + for c.posRight > 0 { + if c.vector[c.posRight] == s.MatchByte { + return c, nil + } + c.posRight-- + c.globRight-- + } + } else { + for index := c.posLeft + 1; index < len(c.vector); index++ { + if c.vector[index] == s.MatchByte { + c.posRight = index + c.globRight += (c.posLeft + 1) - index + return c, nil + } + } + } + + return c, fmt.Errorf("%w %c", ErrNoMatch, c.vector[c.posLeft]) +} + +func (c Chunk) Copy() Chunk { + return c +} + +func (c Chunk) Left() byte { + return c.vector[c.posLeft] +} + +// Sub returns an inclusive subchunk of [left;right] +func (c Chunk) Sub() Chunk { + return New(c.vector[c.posLeft : c.posRight+1]) +} + +// CookieCutter returns a subchunk of (left;right) +func (c Chunk) CookieCutter() Chunk { + return New(c.vector[c.posLeft+1 : c.posRight]) +} + +func (c Chunk) AtSpace() bool { + r, _ := utf8.DecodeRune(c.vector[c.posLeft:]) + return unicode.IsSpace(r) +} + +func (c Chunk) Seek() Chunk { + for c.posLeft < len(c.vector) && c.AtSpace() { + c.posLeft++ + } + return c +} + +func (c Chunk) ValueEnd() (Chunk, error) { + switch c.vector[c.posLeft] { + case '"', '{', '[': + return c.Match() + default: + for index := c.posLeft; index <= c.posRight; index++ { + if c.vector[index] == ',' { + return New(c.vector[c.posLeft:index]), nil + } + } + return c, nil + } +} + +// Skip skips the current left position and then Seeks +func (c Chunk) Skip() Chunk { + if c.posLeft+1 < len(c.vector) { + c.posLeft++ + } + return c.Seek() +} + +type MatchRule struct { + MatchByte byte + StartFromRight bool +} + +var ( + matchers = map[byte]MatchRule{ + '{': { + MatchByte: '}', + StartFromRight: true, + }, + '[': { + MatchByte: ']', + StartFromRight: true, + }, + '"': { + MatchByte: '"', + }, + } +) + +func New(v []byte) Chunk { + posRight := len(v) - 1 + if len(v) == 0 { + posRight = 0 + } + return Chunk{ + vector: coll.Vector[byte](v), + posLeft: 0, + posRight: posRight, + } +} + +func (c Chunk) Child(left, right int) Chunk { + sub := c.vector[left:right] + return Chunk{ + vector: sub, + posLeft: 0, + posRight: len(sub), + globLeft: (c.globLeft - c.posLeft) + left, + globRight: (c.globRight - c.posRight) + right, + } +} + +type ParseFunc[T any] func(T) (T, error) + +func Parse[T any](c Chunk) ParseFunc[T] { + switch c.vector[c.posLeft] { + case '{': + return ParseMap[T](c.CookieCutter()) + default: + panic("not implemented") + } + // return can be: + // * {} + // * [] + // * "" + // * 123 + // * true + // * false + // * null +} + +type Row struct { + Name string + Value Chunk +} + +func (c Chunk) String() string { + return string(c.vector[c.posLeft : c.posRight+1]) +} + +func (c Chunk) Row() (Row, error) { + c = c.Seek() + if c.vector[c.posLeft] != '"' { + return Row{}, fmt.Errorf("%w: %c", ErrMapMemberNotString, c.vector[c.posLeft]) + } + name, err := c.Match() + if err != nil { + return Row{}, fmt.Errorf("match: %w", err) + } + postName := c.Copy() + postName.posLeft = name.posRight + postName = postName.Skip() + // Next we must get a : + if postName.vector[postName.posLeft] != ':' { + return Row{}, fmt.Errorf("%w '%c', expected ':'", ErrUnexpectedSymbol, postName.vector[postName.posLeft]) + } + value, err := postName.Skip().ValueEnd() + if err != nil { + return Row{}, fmt.Errorf("value: %w", err) + } + + return Row{ + Name: name.String(), + Value: value, + }, nil +} + +func (c Chunk) After(v Chunk) Chunk { + c.posLeft = v.posLeft + return c +} + +func ParseMap[T any](c Chunk) ParseFunc[T] { + return func(t T) (T, error) { + // mapper := parse.GetMap(t) + // for { + // row, err := c.Row() + // if err != nil { + // return t, err + // } + + // } + panic("todo") + } +} diff --git a/pkg/ld/internal/parse/chunk/chunk_test.go b/pkg/ld/internal/parse/chunk/chunk_test.go new file mode 100644 index 0000000..7b8ed0d --- /dev/null +++ b/pkg/ld/internal/parse/chunk/chunk_test.go @@ -0,0 +1,37 @@ +package chunk_test + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + "sectorinf.com/emilis/flabk/pkg/ld/internal/parse/chunk" +) + +func TestSpace(t *testing.T) { + that := require.New(t) + ch := chunk.New([]byte(" hello world")) + that.True(ch.AtSpace()) +} + +func TestParseMap(t *testing.T) { + type Hello struct { + Hello string + } + ch := chunk.New([]byte(`"Hello": "world"`)) + h, err := chunk.ParseMap[Hello](ch)(Hello{}) + if err != nil { + panic(err) + } + fmt.Println(h) +} + +func TestChild(t *testing.T) { + type Hello struct { + Hello string + } + ch := chunk.New([]byte(`"Hello": "world"`)) + child := ch.Child(5, 10) + + fmt.Println(child) +} diff --git a/pkg/ld/internal/parse/parse.go b/pkg/ld/internal/parse/parse.go new file mode 100644 index 0000000..71d313e --- /dev/null +++ b/pkg/ld/internal/parse/parse.go @@ -0,0 +1,73 @@ +package parse + +import ( + "fmt" + "reflect" + "strconv" + "strings" + + "sectorinf.com/emilis/flabk/pkg/ld/internal/consts" +) + +type LazyMapFunc func(name string, value string) + +func GetMap(v any) LazyMapFunc { + val := reflect.ValueOf(v) + // typ := reflect.TypeOf(v) + switch val.Kind() { + case reflect.Map: + return func(name string, value string) { + // val.SetMapIndex(reflect.ValueOf(name), value) + } + case reflect.Struct: + // fields := GetStructFields(val, typ) + return func(name, value string) { + // val, ok := fields[name] + // if ok { + // val.Set(value) + // } + } + default: + panic("wrong") + } +} + +func ParseAsValue(v string, valType reflect.Type) (any, error) { + // Might not be necessary to trim + v = strings.TrimSpace(v) + switch valType.Kind() { + case reflect.String: + return v[1 : len(v)-1], nil + case reflect.Bool: + b, err := strconv.ParseBool(strings.ToLower(v)) + if err != nil { + fmt.Errorf("boolean: %w", err) + } + return b, nil + case reflect.Struct, reflect.Map: + panic("todo") + default: + panic("todo") + } +} + +func GetStructFields(val reflect.Value, typ reflect.Type) map[string]reflect.Value { + total := val.NumField() + out := map[string]reflect.Value{} + for index := 0; index < total; index++ { + cName := StructName(typ.Field(index)) + if cName != "-" { + out[cName] = val.Field(index) + } + } + return out +} + +func StructName(v reflect.StructField) string { + tag := v.Tag.Get(consts.PkgTag) + if tag != "" { + return tag + } + // Default to field name + return v.Name +} diff --git a/pkg/jsonld/jsonld.go b/pkg/ld/jsonld.go similarity index 68% rename from pkg/jsonld/jsonld.go rename to pkg/ld/jsonld.go index 60dc7fc..0341b7d 100644 --- a/pkg/jsonld/jsonld.go +++ b/pkg/ld/jsonld.go @@ -1,4 +1,8 @@ // // Package asld handles JSON-LD for asflab // // // // This will not go well -package asld +package ld + +const ( + pkgTag = "ld" +) diff --git a/pkg/ld/unmarshal.go b/pkg/ld/unmarshal.go new file mode 100644 index 0000000..7330ae2 --- /dev/null +++ b/pkg/ld/unmarshal.go @@ -0,0 +1 @@ +package ld diff --git a/pkg/jsonld/unmarshal_test.go b/pkg/ld/unmarshal_test.go similarity index 99% rename from pkg/jsonld/unmarshal_test.go rename to pkg/ld/unmarshal_test.go index 3d6ac9e..151d9d8 100644 --- a/pkg/jsonld/unmarshal_test.go +++ b/pkg/ld/unmarshal_test.go @@ -1,4 +1,4 @@ -package asld_test +package ld_test // import ( // "encoding/json"