2022-08-03 15:20:03 +01:00
|
|
|
package chunk
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
|
|
|
|
"sectorinf.com/emilis/flabk/pkg/coll"
|
|
|
|
)
|
|
|
|
|
2022-08-05 20:53:44 +01:00
|
|
|
const (
|
|
|
|
escapeChar = '\\'
|
|
|
|
unicodeEscape = 'u'
|
|
|
|
)
|
|
|
|
|
2022-08-03 15:20:03 +01:00
|
|
|
var (
|
|
|
|
ErrNoMatch = errors.New("no matching")
|
|
|
|
ErrMapMemberNotString = errors.New("map member not a string")
|
|
|
|
ErrUnexpectedSymbol = errors.New("unexpected symbol")
|
2022-08-05 20:53:44 +01:00
|
|
|
ErrMissingValue = errors.New("missing value")
|
|
|
|
ErrEscapeAtEnd = errors.New("escape character at the end of value")
|
|
|
|
ErrIncompleteEscape = errors.New("incomplete unicode escape sequence")
|
2022-08-03 15:20:03 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
type Chunk struct {
|
|
|
|
vector coll.Vector[byte]
|
|
|
|
posLeft int
|
|
|
|
posRight int
|
|
|
|
// todo global pos impl
|
|
|
|
globLeft int
|
|
|
|
globRight int
|
|
|
|
}
|
|
|
|
|
2022-08-05 20:53:44 +01:00
|
|
|
func (c Chunk) LeftByte() byte {
|
|
|
|
return c.vector[c.posLeft]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c Chunk) Null() bool {
|
|
|
|
return (c.posRight+1)-c.posLeft >= 4 && c.vector[c.posLeft] == 'n' && c.vector[c.posLeft+1] == 'u' && c.vector[c.posLeft+2] == 'l' && c.vector[c.posLeft+3] == 'l'
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c Chunk) LeftPosf(format string, args ...any) error {
|
|
|
|
return fmt.Errorf(fmt.Sprintf("[%d] %s", c.globLeft, format), args...)
|
|
|
|
}
|
|
|
|
|
2022-08-03 15:20:03 +01:00
|
|
|
// Match finds the matching closer to the symbol at the left position and sets the
|
|
|
|
// right position to this index
|
|
|
|
func (c Chunk) Match() (Chunk, error) {
|
2022-08-05 20:53:44 +01:00
|
|
|
start := c.vector[c.posLeft]
|
|
|
|
matcher, ok := matchers[start]
|
2022-08-03 15:20:03 +01:00
|
|
|
if !ok {
|
|
|
|
panic(fmt.Sprintf("Match called on %c with no matcher defined", c.vector[c.posLeft]))
|
|
|
|
}
|
2022-08-05 20:53:44 +01:00
|
|
|
|
|
|
|
for index := c.posLeft + 1; index < len(c.vector); index++ {
|
|
|
|
if start != '"' && c.vector[index] == start {
|
|
|
|
sub, err := c.Child(index, len(c.vector)).Match()
|
|
|
|
if err != nil {
|
|
|
|
return c, fmt.Errorf("[%d] child %w", c.globLeft+(c.posLeft-index), err)
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
2022-08-05 20:53:44 +01:00
|
|
|
index += sub.posRight
|
|
|
|
continue
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
2022-08-05 20:53:44 +01:00
|
|
|
if c.vector[index] == escapeChar {
|
|
|
|
if index+1 == len(c.vector) {
|
|
|
|
return c, fmt.Errorf("[%d] %w", c.globLeft+(c.posLeft-index), ErrEscapeAtEnd)
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
2022-08-05 20:53:44 +01:00
|
|
|
if c.vector[index+1] == unicodeEscape {
|
|
|
|
if index+6 >= len(c.vector) {
|
|
|
|
return c, fmt.Errorf("[%d] %w", c.globLeft+(c.posLeft+index), ErrIncompleteEscape)
|
|
|
|
}
|
|
|
|
index += 5
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
index++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if c.vector[index] == matcher {
|
|
|
|
c.globRight -= c.posRight - index
|
|
|
|
c.posRight = index
|
|
|
|
return c, nil
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-05 20:53:44 +01:00
|
|
|
return c, c.LeftPosf("%w %c", ErrNoMatch, c.vector[c.posLeft])
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c Chunk) Copy() Chunk {
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sub returns an inclusive subchunk of [left;right]
|
|
|
|
func (c Chunk) Sub() Chunk {
|
2022-08-05 20:53:44 +01:00
|
|
|
c = c.Child(c.posLeft, c.posRight+1)
|
|
|
|
// To be inclusive we incremented posRight above, so
|
|
|
|
// to restore the original position, we must decrement
|
|
|
|
// it here before returning. Only if we have room to decrement it
|
|
|
|
if c.posRight > 0 {
|
|
|
|
c.posRight--
|
|
|
|
c.globRight--
|
|
|
|
}
|
|
|
|
return c
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// CookieCutter returns a subchunk of (left;right)
|
|
|
|
func (c Chunk) CookieCutter() Chunk {
|
2022-08-05 20:53:44 +01:00
|
|
|
return c.Child(c.posLeft+1, c.posRight)
|
|
|
|
}
|
|
|
|
|
|
|
|
func spaceAtIndex(v coll.Vector[byte], index int) bool {
|
|
|
|
return v[index] == 0x9 || v[index] == 0xa || v[index] == 0xd || v[index] == 0x20
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c Chunk) AtSpace() bool {
|
2022-08-05 20:53:44 +01:00
|
|
|
return spaceAtIndex(c.vector, c.posLeft)
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c Chunk) Seek() Chunk {
|
|
|
|
for c.posLeft < len(c.vector) && c.AtSpace() {
|
2022-08-05 20:53:44 +01:00
|
|
|
c.globLeft++
|
2022-08-03 15:20:03 +01:00
|
|
|
c.posLeft++
|
|
|
|
}
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
2022-08-05 20:53:44 +01:00
|
|
|
// Step increments the left position by 1 if b
|
|
|
|
// is the byte at the current left position
|
|
|
|
func (c Chunk) StepIf(b byte) Chunk {
|
|
|
|
if c.vector[c.posLeft] != b {
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
c.posLeft++
|
|
|
|
c.globLeft++
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
2022-08-03 15:20:03 +01:00
|
|
|
func (c Chunk) ValueEnd() (Chunk, error) {
|
|
|
|
switch c.vector[c.posLeft] {
|
|
|
|
case '"', '{', '[':
|
|
|
|
return c.Match()
|
|
|
|
default:
|
|
|
|
for index := c.posLeft; index <= c.posRight; index++ {
|
2022-08-05 20:53:44 +01:00
|
|
|
if c.vector[index] == ',' || spaceAtIndex(c.vector, index) {
|
|
|
|
if index == c.posLeft {
|
|
|
|
return c, c.LeftPosf("%w", ErrMissingValue)
|
|
|
|
}
|
|
|
|
return c.Child(c.posLeft, index), nil
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return c, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Skip skips the current left position and then Seeks
|
|
|
|
func (c Chunk) Skip() Chunk {
|
|
|
|
if c.posLeft+1 < len(c.vector) {
|
|
|
|
c.posLeft++
|
2022-08-05 20:53:44 +01:00
|
|
|
c.globLeft++
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
return c.Seek()
|
|
|
|
}
|
|
|
|
|
|
|
|
type MatchRule struct {
|
|
|
|
MatchByte byte
|
|
|
|
StartFromRight bool
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
2022-08-05 20:53:44 +01:00
|
|
|
matchers = map[byte]byte{
|
|
|
|
'{': '}',
|
|
|
|
'[': ']',
|
|
|
|
'"': '"',
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
func New(v []byte) Chunk {
|
|
|
|
posRight := len(v) - 1
|
|
|
|
if len(v) == 0 {
|
|
|
|
posRight = 0
|
|
|
|
}
|
|
|
|
return Chunk{
|
2022-08-05 20:53:44 +01:00
|
|
|
vector: coll.Vector[byte](v),
|
|
|
|
posLeft: 0,
|
|
|
|
posRight: posRight,
|
|
|
|
globRight: posRight,
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c Chunk) Child(left, right int) Chunk {
|
|
|
|
sub := c.vector[left:right]
|
|
|
|
return Chunk{
|
|
|
|
vector: sub,
|
|
|
|
posLeft: 0,
|
2022-08-05 20:53:44 +01:00
|
|
|
posRight: len(sub) - 1,
|
2022-08-03 15:20:03 +01:00
|
|
|
globLeft: (c.globLeft - c.posLeft) + left,
|
2022-08-05 20:53:44 +01:00
|
|
|
globRight: (c.globRight - c.posRight) + (right - 1),
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type ParseFunc[T any] func(T) (T, error)
|
|
|
|
|
|
|
|
type Row struct {
|
|
|
|
Name string
|
|
|
|
Value Chunk
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c Chunk) String() string {
|
|
|
|
return string(c.vector[c.posLeft : c.posRight+1])
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c Chunk) Row() (Row, error) {
|
|
|
|
if c.vector[c.posLeft] != '"' {
|
2022-08-05 20:53:44 +01:00
|
|
|
return Row{}, c.LeftPosf("%w: %c", ErrMapMemberNotString, c.vector[c.posLeft])
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
name, err := c.Match()
|
|
|
|
if err != nil {
|
2022-08-05 20:53:44 +01:00
|
|
|
return Row{}, c.LeftPosf("match: %w", err)
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
postName := c.Copy()
|
|
|
|
postName.posLeft = name.posRight
|
2022-08-05 20:53:44 +01:00
|
|
|
postName.globLeft = name.globRight
|
2022-08-03 15:20:03 +01:00
|
|
|
postName = postName.Skip()
|
|
|
|
// Next we must get a :
|
|
|
|
if postName.vector[postName.posLeft] != ':' {
|
2022-08-05 20:53:44 +01:00
|
|
|
return Row{}, postName.LeftPosf("%w '%c', expected ':'", ErrUnexpectedSymbol, postName.vector[postName.posLeft])
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
value, err := postName.Skip().ValueEnd()
|
|
|
|
if err != nil {
|
2022-08-05 20:53:44 +01:00
|
|
|
return Row{}, postName.LeftPosf("value: %w", err)
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return Row{
|
2022-08-05 20:53:44 +01:00
|
|
|
Name: name.String()[1 : name.posRight-name.posLeft],
|
2022-08-03 15:20:03 +01:00
|
|
|
Value: value,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2022-08-05 20:53:44 +01:00
|
|
|
// After returns the chunk with its left position, if possible,
|
|
|
|
// right after the global right position of v
|
2022-08-03 15:20:03 +01:00
|
|
|
func (c Chunk) After(v Chunk) Chunk {
|
2022-08-05 20:53:44 +01:00
|
|
|
// Add two, as one is for dealing with right side being exclusive
|
|
|
|
// in slice indexes, and another one to go on to the next
|
|
|
|
offset := (v.globRight - c.globLeft) + 1
|
|
|
|
// Then, make sure we don't go too far
|
|
|
|
if c.posLeft+offset >= len(c.vector) {
|
|
|
|
offset--
|
|
|
|
}
|
|
|
|
c.posLeft += offset
|
|
|
|
c.globLeft += offset
|
2022-08-03 15:20:03 +01:00
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
2022-08-05 20:53:44 +01:00
|
|
|
func (c Chunk) EOF() bool {
|
|
|
|
return c.posLeft >= len(c.vector)-1
|
2022-08-03 15:20:03 +01:00
|
|
|
}
|