From 02df70bb827baf0cdf6fac135ff98d72eabc254d Mon Sep 17 00:00:00 2001 From: puffaboo Date: Sun, 10 Jul 2022 21:23:00 +0100 Subject: [PATCH] checkpoint for it not working yet --- pkg/asld/asld.go | 274 +++++---------------------------- pkg/asld/unmarshal.go | 299 +++++++++++++++++++++++++++++++++++++ pkg/asld/unmarshal_test.go | 140 ++++++++++++++++- pkg/asld/walker.go | 170 ++++++++++++++++----- pkg/asld/walker_test.go | 6 +- 5 files changed, 612 insertions(+), 277 deletions(-) create mode 100644 pkg/asld/unmarshal.go diff --git a/pkg/asld/asld.go b/pkg/asld/asld.go index 429268d..fd55477 100644 --- a/pkg/asld/asld.go +++ b/pkg/asld/asld.go @@ -6,38 +6,41 @@ package asld import ( "bytes" "errors" - "fmt" - "reflect" - "strings" ) const ( - tagName = "asld" + tagName = "asld" + omitEmpty = "omitempty" + collapsible = "collapsible" + nullToken = "null" ) var ( - ErrNoMatching = errors.New("could not find matching") - ErrSyntaxError = errors.New("syntax error") - ErrEntryWithoutValue = errors.New("entry without value") + ErrNoMatching = errors.New("could not find matching") + ErrSyntaxError = errors.New("syntax error") + ErrEntryWithoutValue = errors.New("entry without value") + ErrMapNotStringIndexed = errors.New("map is not string indexed") ) // assigned by init var ( - byByte map[byte]Symbol - symbolsRaw []byte - iriPrefixes [][]byte - openSymbols []Symbol - openRaw []byte + stoppableByByte map[byte]SymbolInfo + stoppableRaw []byte + iriPrefixes [][]byte + openSymbols []SymbolInfo + openRaw []byte + null []byte ) func init() { - byByte = map[byte]Symbol{} - for _, symbol := range symbols { - byByte[symbol.self] = symbol + stoppableByByte = map[byte]SymbolInfo{} + for index, sym := range stoppable { + stoppable[index].enum = symbol(index) + stoppableByByte[sym.self] = sym } - symbolsRaw = make([]byte, len(symbols)) - for index, symbol := range symbols { - symbolsRaw[index] = symbol.self + stoppableRaw = make([]byte, len(stoppable)) + for index, symbol := range stoppable { + stoppableRaw[index] = symbol.self } iriPrefixesStrings := []string{ // Currently only doing https @@ -47,70 +50,25 @@ func init() { for index, prefix := range iriPrefixesStrings { iriPrefixes[index] = []byte(prefix) } - openSymbolEnums := []int{ + openSymbolEnums := []symbol{ symbolOpenParen, symbolOpenArray, symbolString, } - openSymbols = make([]Symbol, len(openSymbolEnums)) + openSymbols = make([]SymbolInfo, len(openSymbolEnums)) for index, enum := range openSymbolEnums { - openSymbols[index] = symbols[enum] + openSymbols[index] = stoppable[enum] } - openRaw = _map(openSymbols, func(s Symbol) byte { + openRaw = _map(openSymbols, func(s SymbolInfo) byte { return s.self }) + null = []byte(nullToken) } -type Symbol struct { +type SymbolInfo struct { self byte closer byte + enum symbol } -const ( - symbolOpenParen = iota - symbolClosedParen - symbolOpenArray - symbolClosedArray - symbolString - symbolColon -) - -const ( - statusOK = iota - statusError - statusWalkerNotAffected -) - -type status int -type walkerStatus struct { - walker - status -} - -var ( - symbols = []Symbol{ - symbolOpenParen: { - self: '{', - closer: '}', - }, - symbolClosedParen: { - self: '}', - }, - symbolOpenArray: { - self: '[', - closer: ']', - }, - symbolClosedArray: { - self: ']', - }, - symbolString: { - self: '"', - closer: '"', - }, - symbolColon: { - self: ':', - }, - } -) - func in[T comparable](this []T, has T) bool { for _, elem := range this { if elem == has { @@ -120,6 +78,15 @@ func in[T comparable](this []T, has T) bool { return false } +func firstIn[T comparable](this []T, has T) (T, bool) { + for _, elem := range this { + if elem == has { + return elem, true + } + } + return has, false +} + func _map[T any, V any](v []T, f func(T) V) []V { output := make([]V, len(v)) for index, elem := range v { @@ -136,170 +103,3 @@ func isIRI(v []byte) bool { } return false } - -func arrayMembers(w walker) ([]walker, error) { - var ( - members = []walker{} - final bool - ) - - elements, ok := w.SliceInner() - if !ok { - return nil, fmt.Errorf("%s %w", string(w.content[w.position]), ErrNoMatching) - } - - for !final { - elem, err := elements.CommaOrEnd() - if err != nil { - return nil, fmt.Errorf("comma or end %s: %w", string(elem.content), err) - } - final = elem.status == statusWalkerNotAffected - value := elem.walker.Until().Reset() - if !final { - // Incremenet elem.walker.position here so - // that it skips over the comma that we're - // on right now. as there's not really anything - // valid to stop on in arrays aside from them - // so we can't just call ToNext() later. - elem.walker.position++ - } else { - value = elem.walker - } - elements = elem.walker.Sub() - members = append(members, value) - } - - return members, nil -} - -func mapMembers(w walker) (map[string]walker, error) { - var ( - members = map[string]walker{} - lastLine bool - ) - elements, ok := w.SliceInner() - if !ok { - return nil, fmt.Errorf("%s %w", string(w.content[w.position]), ErrNoMatching) - } - - for !lastLine { - lineInfo, err := elements.CommaOrEnd() - if err != nil { - return nil, fmt.Errorf("comma or end: %s: %w", string(elements.content), err) - } - lastLine = lineInfo.status == statusWalkerNotAffected - line := lineInfo.walker - if !lastLine { - line = line.Until() - } - - name, ok := line.Reset().ToOrStay('"') - if !ok { - continue - } - nameString, ok := name.SliceInner() - if !ok { - // TODO: maybe these should have global position - return nil, fmt.Errorf("%s %w", string(name.Current()), ErrNoMatching) - } - - // We know this is OK because the above SliceInner called it - name, _ = name.To('"') - - wNext, ok := name.Next() - if !ok && wNext.Current() != ':' && isIRI(nameString.content) { - panic("IRI expansion not implemented") - } else if !ok || wNext.Current() != ':' { - return nil, fmt.Errorf("%s: %w", string(nameString.content), ErrEntryWithoutValue) - } - - value, ok := wNext.Next() - if !ok { - if value.position < value.len-1 { - value.position++ - value = value.Sub() - } else { - return nil, fmt.Errorf("non-IRI %s: %w", string(nameString.content), ErrEntryWithoutValue) - } - } - elements = lineInfo.walker.Sub() - members[string(nameString.content)] = value - } - - return members, nil -} - -func unmarshalMap(out reflect.Value, w walker) error { - members, err := mapMembers(w) - if err != nil { - return fmt.Errorf("getting members: %w", err) - } - - outType := out.Type() - // Deconstruct the struct fields - for index := 0; index < out.NumField(); index++ { - field := out.Field(index) - fType := outType.Field(index) - - tagInfo := fType.Tag.Get(tagName) - // TODO: support expandible/collapsible/whatever I name it - // and omitempty probably - tagParts := strings.Split(tagInfo, ",") - name := tagParts[0] - if tagInfo == "" || name == "" { - name = fType.Name - } - // mimic encoding/json behavior - if name == "-" && len(tagParts) == 1 { - continue - } - - wField, exists := members[name] - if !exists { - continue - } - setValue(fType, field, wField) - } - - return nil -} - -func setValue(fType reflect.StructField, field reflect.Value, w walker) error { - switch field.Kind() { - case reflect.String: - if w.content[0] != '"' { - return fmt.Errorf("%s is not a string", string(w.content)) - } - field.SetString(w.String()) - default: - panic("not implemented") - } - - return nil -} - -func unmarshal(out reflect.Value, w walker) error { - switch out.Kind() { - case reflect.Struct: - return unmarshalMap(out, w) - case reflect.Array, reflect.Slice: - // do array stuff here - default: - panic(out.Kind().String() + " not yet supported") - } - - return nil -} - -func (w walker) Reset() walker { - w.position = 0 - return w -} - -func Unmarshal[T any](data []byte) (T, error) { - tPtr := new(T) - tValue := reflect.Indirect(reflect.ValueOf(tPtr)) - w := newWalker(data) - - return *tPtr, unmarshal(tValue, w) -} diff --git a/pkg/asld/unmarshal.go b/pkg/asld/unmarshal.go new file mode 100644 index 0000000..64310df --- /dev/null +++ b/pkg/asld/unmarshal.go @@ -0,0 +1,299 @@ +package asld + +import ( + "bytes" + "fmt" + "reflect" + "strconv" + "strings" +) + +const ( + intSize = 32 << (^uint(0) >> 63) // unexported from "math" +) + +var ( + bitSize = map[reflect.Kind]int{ + reflect.Int: intSize, + reflect.Int8: 8, + reflect.Int16: 16, + reflect.Int32: 32, + reflect.Int64: 64, + reflect.Uint: intSize, + reflect.Uint8: 8, + reflect.Uint16: 16, + reflect.Uint32: 32, + reflect.Uint64: 64, + reflect.Float32: 32, + reflect.Float64: 64, + reflect.Complex64: 64, + reflect.Complex128: 128, + } + nullWalker = newWalker([]byte{'n', 'u', 'l', 'l'}) +) + +func arrayMembers(w walker) ([]walker, error) { + var ( + members = []walker{} + final bool + ) + + elements, ok := w.SliceInner() + if !ok { + return nil, fmt.Errorf("%s %w", string(w.content[w.position]), ErrNoMatching) + } + + for !final { + elem, err := elements.CommaOrEnd() + if err != nil { + return nil, fmt.Errorf("comma or end %s: %w", string(elem.content), err) + } + final = elem.status == statusWalkerNotAffected + value := elem.walker.Until().Reset() + if !final { + // Incremenet elem.walker.position here so + // that it skips over the comma that we're + // on right now. as there's not really anything + // valid to stop on in arrays aside from them + // so we can't just call ToNext() later. + elem.walker = elem.walker.Pos(elem.walker.position + 1) + } else { + value = elem.walker + } + elements = elem.walker.Sub() + members = append(members, value) + } + + return members, nil +} + +func mapMembers(w walker) (map[string]walker, error) { + var ( + members = map[string]walker{} + lastLine bool + //debug + prev string + ) + w, sym := w.StayOrNext() + if sym == symbolEOB { + w, sym = w.StayOrNext() + panic("idk") + } + // Because this gets called recursively with all kinds of + // inputs and spacing, we should do a Stay/Walk If Space situation + elements, ok := w.SliceInner() + if !ok { + return nil, fmt.Errorf("%s %w", string(w.content[w.position]), ErrNoMatching) + } + + w.Debug() + for !lastLine { + lineInfo, err := elements.CommaOrEnd() + if err != nil { + return nil, fmt.Errorf("comma or end: %s: %w", string(elements.content), err) + } + lastLine = lineInfo.status == statusWalkerNotAffected + line := lineInfo.walker + if !lastLine { + line = line.Until() + } + + name, ok := line.Reset().ToOrStay('"') + if !ok { + continue + } + nameString, ok := name.SliceInner() + if !ok { + // TODO: maybe these should have global position + return nil, fmt.Errorf("%s %w", string(name.Current()), ErrNoMatching) + } + + // We know this is OK because the above SliceInner called it + name, _ = name.To('"') + + wNext, sym := name.Next() + if sym != symbolEOB && wNext.Current() != ':' && isIRI(nameString.content) { + panic("IRI expansion not implemented") + } else if sym != symbolEOB && wNext.Current() != ':' { + return nil, fmt.Errorf("%s at pos %d: %w", string(nameString.content), wNext.globPos, ErrEntryWithoutValue) + } + + value, sym := wNext.Next() + if sym == symbolEOB { + if value.position < value.len-1 { + value = value.Pos(value.position + 1).Sub() + } else { + return nil, fmt.Errorf("non-IRI %s: %w", string(nameString.content), ErrEntryWithoutValue) + } + } else if sym == symbolNullStart { + value = nullWalker + } + // Walk to next viable token + value = value.WalkThroughSpaces().Sub() + elements = lineInfo.walker.Sub() + n := string(nameString.content) + print(n + "\n") + fmt.Println(value) + members[n] = value + prev = n + } + + fmt.Print(prev) + return members, nil +} + +func unmarshalStruct(out reflect.Value, w walker) error { + members, err := mapMembers(w) + if err != nil { + return fmt.Errorf("getting members: %w", err) + } + + outType := out.Type() + // Deconstruct the struct fields + for index := 0; index < out.NumField(); index++ { + field := out.Field(index) + fType := outType.Field(index) + + tagInfo := fType.Tag.Get(tagName) + // TODO: support expandible/collapsible/whatever I name it + // and omitempty probably + tagParts := strings.Split(tagInfo, ",") + name := tagParts[0] + if tagInfo == "" || name == "" { + name = fType.Name + } + // mimic encoding/json behavior + if name == "-" && len(tagParts) == 1 { + continue + } + + wField, exists := members[name] + if !exists { + continue + } + fmt.Println(wField, field) + // if err := setValue(field, wField, fType); err != nil { + // return fmt.Errorf("field %s set: %w", name, err) + // } + } + + return nil +} + +func setMap(field reflect.Value, w walker) error { + if field.IsNil() { + field.Set(reflect.MakeMap(field.Type())) + } + keyType := field.Type().Key() + if keyType.Kind() != reflect.String { + return ErrMapNotStringIndexed + } + members, err := mapMembers(w) + if err != nil { + return fmt.Errorf("getting members for map: %w", err) + } + + valueType := field.Type().Elem() + for key, member := range members { + def := reflect.Indirect(reflect.New(valueType)) + err = setValue(def, member) + if err != nil { + return fmt.Errorf( + "could not set %s to %s: %w", + string(w.content), + valueType.Kind().String(), + err, + ) + } + field.SetMapIndex(reflect.ValueOf(key), def) + } + + return nil +} + +func setValue(field reflect.Value, w walker, a ...reflect.StructField) error { + var ( + err error + setter func() + ) + if w.len >= 4 && bytes.Equal(null, w.content[:4]) { + // default value + return nil + } + + k := field.Kind() + switch k { + case reflect.String: + if w.content[0] != '"' { + err = fmt.Errorf("%s is not a string", string(w.content)) + } + setter = func() { field.SetString(w.String()) } + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + var i int64 + i, err = strconv.ParseInt(string(w.content), 10, bitSize[field.Kind()]) + setter = func() { field.SetInt(i) } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + var i uint64 + i, err = strconv.ParseUint(string(w.content), 10, bitSize[field.Kind()]) + setter = func() { field.SetUint(i) } + case reflect.Float32, reflect.Float64: + var f float64 + f, err = strconv.ParseFloat(string(w.content), bitSize[field.Kind()]) + setter = func() { field.SetFloat(f) } + case reflect.Complex64, reflect.Complex128: + var c complex128 + c, err = strconv.ParseComplex(string(w.content), bitSize[field.Kind()]) + setter = func() { field.SetComplex(c) } + case reflect.Pointer: + if field.IsNil() { + field.Set(reflect.New(field.Type().Elem())) + } + // prob oops if its nil + fDeref := reflect.Indirect(field) + return setValue(fDeref, w) + case reflect.Struct, reflect.Map, reflect.Array, reflect.Slice: + if k == reflect.Array || k == reflect.Slice && len(a) > 0 { + fmt.Println("$$$$$$$$$$$", a[0].Name) + } + return unmarshal(field, w) + case reflect.Bool: + var b bool + b, err = strconv.ParseBool(string(w.content)) + setter = func() { field.SetBool(b) } + case reflect.Chan, reflect.Func, reflect.UnsafePointer: + // Ignore + return nil + default: + panic("not implemented") + } + + if err != nil { + return fmt.Errorf("could not parse %s as %s: %w", string(w.content), field.Kind().String(), err) + } + setter() + + return nil +} + +func unmarshal(out reflect.Value, w walker) error { + switch out.Kind() { + case reflect.Struct: + return unmarshalStruct(out, w) + case reflect.Map: + return setMap(out, w) + case reflect.Array, reflect.Slice: + // do array stuff here + panic("todo") + default: + panic(out.Kind().String() + " not yet supported") + } +} + +func Unmarshal[T any](data []byte) (T, error) { + tPtr := new(T) + tValue := reflect.Indirect(reflect.ValueOf(tPtr)) + w := newWalker(data) + + // Might be array/map/interface at top level, should check + return *tPtr, unmarshal(tValue, w) +} diff --git a/pkg/asld/unmarshal_test.go b/pkg/asld/unmarshal_test.go index 21c8b17..6595b86 100644 --- a/pkg/asld/unmarshal_test.go +++ b/pkg/asld/unmarshal_test.go @@ -2,7 +2,10 @@ package asld_test import ( "encoding/json" + "fmt" + "math" "testing" + "time" "git.sectorinf.com/emilis/asflab/pkg/asld" "git.sectorinf.com/emilis/asflab/pkg/epk" @@ -10,7 +13,38 @@ import ( ) type testObj struct { - String string `asld:"string" json:"string"` + String string `asld:"string" json:"string"` + NoTag string + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint uint + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Float32 float32 + Float64 float64 + // Complex64 complex64 + // Complex128 complex128 + IntPtr *int + Int8Ptr *int8 + Int16Ptr *int16 + Int32Ptr *int32 + Int64Ptr *int64 + UintPtr *uint + Uint8Ptr *uint8 + Uint16Ptr *uint16 + Uint32Ptr *uint32 + Uint64Ptr *uint64 + Float32Ptr *float32 + Float64Ptr *float64 + // Complex64Ptr *complex64 + // Complex128Ptr *complex128 + TestPtr *testObj + TestArray []testObj } func TestUnmarshal(t *testing.T) { @@ -20,7 +54,40 @@ func TestUnmarshal(t *testing.T) { }{ "string children": { obj: testObj{ - "hello", + String: "hello", + NoTag: "no_tag", + Int: math.MaxInt, + Int8: math.MaxInt8, + Int16: math.MaxInt16, + Int32: math.MaxInt32, + Int64: math.MaxInt64, + Uint: math.MaxUint, + Uint8: math.MaxUint8, + Uint16: math.MaxUint16, + Uint32: math.MaxUint32, + Uint64: math.MaxUint64, + Float32: math.MaxFloat32, + Float64: math.MaxFloat64, + TestPtr: &testObj{ + String: "hello2", + }, + TestArray: []testObj{ + { + String: "hello3", + }, + { + String: "hello4", + TestPtr: &testObj{ + TestArray: []testObj{ + { + String: "hello5", + }, + }, + }, + }, + }, + // Complex64: complex(math.MaxFloat32, math.MaxFloat32), + // Complex128: complex(math.MaxFloat64, math.MaxFloat64), }, }, } @@ -29,7 +96,7 @@ func TestUnmarshal(t *testing.T) { test := test t.Run(name, func(tt *testing.T) { that := require.New(tt) - objJSON, err := json.Marshal(test.obj) + objJSON, err := json.MarshalIndent(test.obj, "", " ") that.NoError(err) result, err := asld.Unmarshal[testObj](objJSON) that.NoError(err) @@ -37,3 +104,70 @@ func TestUnmarshal(t *testing.T) { }) } } + +func TestBench(t *testing.T) { + obj := testObj{ + String: "hello", + NoTag: "no_tag", + Int: math.MaxInt, + Int8: math.MaxInt8, + Int16: math.MaxInt16, + Int32: math.MaxInt32, + Int64: math.MaxInt64, + Uint: math.MaxUint, + Uint8: math.MaxUint8, + Uint16: math.MaxUint16, + Uint32: math.MaxUint32, + Uint64: math.MaxUint64, + Float32: math.MaxFloat32, + Float64: math.MaxFloat64, + // Complex64: complex(math.MaxFloat32, math.MaxFloat32), + // Complex128: complex(math.MaxFloat64, math.MaxFloat64), + } + that := require.New(t) + asldTotal := int64(0) + jsonTotal := int64(0) + + jsonMax := int64(0) + jsonMin := math.MaxInt64 + + asldMax := int64(0) + asldMin := math.MaxInt64 + + count := int64(1 << 20) + for index := int64(0); index < count; index++ { + objJSON, err := json.Marshal(obj) + that.NoError(err) + + asldStart := time.Now() + _, err = asld.Unmarshal[testObj](objJSON) + asldDur := time.Since(asldStart) + asldTotal += int64(asldDur) + if asldDur < time.Duration(asldMin) { + asldMin = int(asldDur) + } + if asldDur > time.Duration(asldMax) { + asldMax = int64(asldDur) + } + + that.NoError(err) + a := testObj{} + + jsonStart := time.Now() + err = json.Unmarshal(objJSON, &a) + jsonDur := time.Since(jsonStart) + jsonTotal += int64(jsonDur) + + if jsonDur < time.Duration(jsonMin) { + jsonMin = int(jsonDur) + } + if jsonDur > time.Duration(jsonMax) { + jsonMax = int64(jsonDur) + } + + that.NoError(err) + } + fmt.Println(count, "runs") + fmt.Printf("json avg (%s), min (%s), max (%s)\n", time.Duration(jsonTotal/count), time.Duration(jsonMin), time.Duration(jsonMax)) + fmt.Printf("asld avg (%s), min (%s), max (%s)\n", time.Duration(asldTotal/count), time.Duration(asldMin), time.Duration(asldMax)) +} diff --git a/pkg/asld/walker.go b/pkg/asld/walker.go index 0aefbc9..a1bb490 100644 --- a/pkg/asld/walker.go +++ b/pkg/asld/walker.go @@ -1,26 +1,87 @@ package asld -import "fmt" +import ( + "bytes" + "fmt" +) // Walker.... texas ranger. // Except he's a cringe conservative. // // This is also cringe but not for cringe reasons. -func (w walker) Debug_PRINT() { +type symbol byte + +const ( + symbolOpenParen symbol = iota + symbolClosedParen + symbolOpenArray + symbolClosedArray + symbolString + symbolColon + symbolNullStart + symbolEOB // End-Of-Buffer +) + +const ( + statusOK status = iota + statusError + statusWalkerNotAffected +) + +var ( + stoppable = []SymbolInfo{ + symbolOpenParen: { + self: '{', + closer: '}', + }, + symbolClosedParen: { + self: '}', + }, + symbolOpenArray: { + self: '[', + closer: ']', + }, + symbolClosedArray: { + self: ']', + }, + symbolString: { + self: '"', + closer: '"', + }, + symbolColon: { + self: ':', + }, + } +) + +type status int +type walkerStatus struct { + walker + status +} + +func (w walker) Reset() walker { + w.position = 0 + return w +} + +func (w walker) Debug() { for index, b := range w.content { out := string(b) if w.position == index { - out = "<[>" + out + "<]>" + out = "<[_" + out + "_]>" } print(out) } print("\n") + print("globPos", w.globPos) + print("\n") } func (w walker) SliceInner() (walker, bool) { // the !ok scenario here is only if the code is bad - s, ok := byByte[w.Current()] + s, ok := stoppableByByte[w.Current()] // Debug if !ok { panic(w) @@ -34,7 +95,7 @@ func (w walker) SliceInner() (walker, bool) { } if curr == s.closer { if height == 0 { - return newWalker(w.content[w.position+1 : pos]), ok + return w.Between(w.position+1, pos), ok } height-- } @@ -42,6 +103,31 @@ func (w walker) SliceInner() (walker, bool) { return w, false } +type walker struct { + content []byte + len int + position int + globPos int +} + +func newWalker(data []byte) walker { + return walker{ + content: data, + len: len(data), + } +} + +func (w walker) Between(lower, upper int) walker { + // As lower, and upper, are offsets from the beginning + // of this walker's buffer, we can diff lower and w.position + // for an offset to set global position at + w.content = w.content[lower:upper] + offset := lower - w.position + w.position = 0 + w.globPos += offset + return w +} + // Sub returns a subwalker from the current position func (w walker) Sub() walker { w.content = w.content[w.position:] @@ -50,19 +136,6 @@ func (w walker) Sub() walker { return w } -type walker struct { - content []byte - len int - position int -} - -func newWalker(data []byte) walker { - return walker{ - content: data, - len: len(data), - } -} - // Until returns a subwalker from position 0 to the current position func (w walker) Until() walker { w.content = w.content[:w.position] @@ -70,6 +143,13 @@ func (w walker) Until() walker { return w } +func (w walker) Pos(v int) walker { + offset := v - w.position + w.position = v + w.globPos += offset + return w +} + // ToOrStay will stay at where the walker is if b is the // same as the current walker position. // @@ -84,26 +164,49 @@ func (w walker) ToOrStay(b byte) (walker, bool) { func (w walker) To(b byte) (walker, bool) { for pos := w.position + 1; pos < w.len; pos++ { if w.content[pos] == b { - w.position = pos - return w, true + return w.Pos(pos), true } } return w, false } -func (w walker) Next() (walker, bool) { - w, ok := w.ToNext() - return w.Sub(), ok +func (w walker) WalkThroughSpaces() walker { + for pos := w.position; pos < w.len; pos++ { + b := w.content[pos] + if _, ok := stoppableByByte[b]; ok || (b >= '0' && b <= '9') || b == '-' || w.IsNullAt(pos) { + return w.Pos(pos) + } + } + return w } -func (w walker) ToNext() (walker, bool) { +func (w walker) StayOrNext() (walker, symbol) { + if sym, ok := stoppableByByte[w.content[w.position]]; ok { + return w, sym.enum + } + + return w.Next() +} + +func (w walker) Next() (walker, symbol) { + w, s := w.ToNext() + return w.Sub(), s +} + +func (w walker) IsNullAt(pos int) bool { + return w.content[pos] == 'n' && w.len-pos >= 4 && bytes.Equal(w.content[pos:pos+4], null) +} + +func (w walker) ToNext() (walker, symbol) { for pos := w.position + 1; pos < w.len; pos++ { - if in(symbolsRaw, w.content[pos]) { - w.position = pos - return w, true + if w.IsNullAt(pos) { + return w.Pos(pos), symbolNullStart + } + if s, ok := stoppableByByte[w.content[pos]]; ok { + return w.Pos(pos), s.enum } } - return w, false + return w, symbolEOB } // CommaOrEnd walks to the next comma, or, if none @@ -111,11 +214,11 @@ func (w walker) ToNext() (walker, bool) { // itself with status statusWalkerNotAffected. func (w walker) CommaOrEnd() (walkerStatus, error) { if w.Current() == ',' { - w.position++ + w = w.Pos(w.position + 1) } for pos := w.position; pos < w.len; pos++ { if in(openRaw, w.content[pos]) { - sb, ok := byByte[w.content[pos]] + sb, ok := stoppableByByte[w.content[pos]] if !ok { panic("ok someone fucked up somewhere") } @@ -130,10 +233,9 @@ func (w walker) CommaOrEnd() (walkerStatus, error) { continue } if w.content[pos] == ',' { - w.position = pos return walkerStatus{ status: statusOK, - walker: w, + walker: w.Pos(pos), }, nil } } @@ -148,8 +250,8 @@ func (w walker) Current() byte { return w.content[w.position] } -func (w walker) CurrentSymbol() (Symbol, bool) { - b, ok := byByte[w.Current()] +func (w walker) CurrentSymbol() (SymbolInfo, bool) { + b, ok := stoppableByByte[w.Current()] return b, ok } diff --git a/pkg/asld/walker_test.go b/pkg/asld/walker_test.go index 36cceb9..11f9a63 100644 --- a/pkg/asld/walker_test.go +++ b/pkg/asld/walker_test.go @@ -73,8 +73,8 @@ func TestNexts(t *testing.T) { w := newWalker([]byte(test.contents)) w, ok := w.To('"') that.True(ok) - w, ok = w.ToNext() - that.True(ok) + w, sym := w.ToNext() + that.Equal(symbolEOB, sym) that.Equal(test.expPos, w.position, "positions are not the same") }) } @@ -116,7 +116,7 @@ func TestMapMembers(t *testing.T) { that.NoError(err) walker, exists := members[key] that.True(exists) - that.Equal(valJson, walker.content) + that.Equal(valJson, walker.content, key) } }) }