package chunk import ( "errors" "fmt" "sectorinf.com/emilis/flabk/pkg/coll" ) const ( escapeChar = '\\' unicodeEscape = 'u' ) var ( ErrNoMatch = errors.New("no matching") ErrMapMemberNotString = errors.New("map member not a string") ErrUnexpectedSymbol = errors.New("unexpected symbol") ErrMissingValue = errors.New("missing value") ErrEscapeAtEnd = errors.New("escape character at the end of value") ErrIncompleteEscape = errors.New("incomplete unicode escape sequence") ) type Chunk struct { vector coll.Vector[byte] posLeft int posRight int // todo global pos impl globLeft int globRight int } func (c Chunk) LeftByte() byte { return c.vector[c.posLeft] } func (c Chunk) Null() bool { return (c.posRight+1)-c.posLeft >= 4 && c.vector[c.posLeft] == 'n' && c.vector[c.posLeft+1] == 'u' && c.vector[c.posLeft+2] == 'l' && c.vector[c.posLeft+3] == 'l' } func (c Chunk) LeftPosf(format string, args ...any) error { return fmt.Errorf(fmt.Sprintf("[%d] %s", c.globLeft, format), args...) } // Match finds the matching closer to the symbol at the left position and sets the // right position to this index func (c Chunk) Match() (Chunk, error) { start := c.vector[c.posLeft] matcher, ok := matchers[start] if !ok { panic(fmt.Sprintf("Match called on %c with no matcher defined", c.vector[c.posLeft])) } for index := c.posLeft + 1; index < len(c.vector); index++ { if start != '"' && c.vector[index] == start { sub, err := c.Child(index, len(c.vector)).Match() if err != nil { return c, fmt.Errorf("[%d] child %w", c.globLeft+(c.posLeft-index), err) } index += sub.posRight continue } if c.vector[index] == escapeChar { if index+1 == len(c.vector) { return c, fmt.Errorf("[%d] %w", c.globLeft+(c.posLeft-index), ErrEscapeAtEnd) } if c.vector[index+1] == unicodeEscape { if index+6 >= len(c.vector) { return c, fmt.Errorf("[%d] %w", c.globLeft+(c.posLeft+index), ErrIncompleteEscape) } index += 5 continue } index++ continue } if c.vector[index] == matcher { c.globRight -= c.posRight - index c.posRight = index return c, nil } } return c, c.LeftPosf("%w %c", ErrNoMatch, c.vector[c.posLeft]) } func (c Chunk) Copy() Chunk { return c } // Sub returns an inclusive subchunk of [left;right] func (c Chunk) Sub() Chunk { c = c.Child(c.posLeft, c.posRight+1) // To be inclusive we incremented posRight above, so // to restore the original position, we must decrement // it here before returning. Only if we have room to decrement it if c.posRight > 0 { c.posRight-- c.globRight-- } return c } // CookieCutter returns a subchunk of (left;right) func (c Chunk) CookieCutter() Chunk { return c.Child(c.posLeft+1, c.posRight) } func spaceAtIndex(v coll.Vector[byte], index int) bool { return v[index] == 0x9 || v[index] == 0xa || v[index] == 0xd || v[index] == 0x20 } func (c Chunk) AtSpace() bool { return spaceAtIndex(c.vector, c.posLeft) } func (c Chunk) Seek() Chunk { for c.posLeft < len(c.vector) && c.AtSpace() { c.globLeft++ c.posLeft++ } return c } // Step increments the left position by 1 if b // is the byte at the current left position func (c Chunk) StepIf(b byte) Chunk { if c.vector[c.posLeft] != b { return c } c.posLeft++ c.globLeft++ return c } func (c Chunk) ValueEnd() (Chunk, error) { switch c.vector[c.posLeft] { case '"', '{', '[': return c.Match() default: for index := c.posLeft; index <= c.posRight; index++ { if c.vector[index] == ',' || spaceAtIndex(c.vector, index) { if index == c.posLeft { return c, c.LeftPosf("%w", ErrMissingValue) } return c.Child(c.posLeft, index), nil } } return c, nil } } // Skip skips the current left position and then Seeks func (c Chunk) Skip() Chunk { if c.posLeft+1 < len(c.vector) { c.posLeft++ c.globLeft++ } return c.Seek() } type MatchRule struct { MatchByte byte StartFromRight bool } var ( matchers = map[byte]byte{ '{': '}', '[': ']', '"': '"', } ) func New(v []byte) Chunk { posRight := len(v) - 1 if len(v) == 0 { posRight = 0 } return Chunk{ vector: coll.Vector[byte](v), posLeft: 0, posRight: posRight, globRight: posRight, } } func (c Chunk) Child(left, right int) Chunk { sub := c.vector[left:right] return Chunk{ vector: sub, posLeft: 0, posRight: len(sub) - 1, globLeft: (c.globLeft - c.posLeft) + left, globRight: (c.globRight - c.posRight) + (right - 1), } } type ParseFunc[T any] func(T) (T, error) type Row struct { Name string Value Chunk } func (c Chunk) String() string { return string(c.vector[c.posLeft : c.posRight+1]) } func (c Chunk) Row() (Row, error) { if c.vector[c.posLeft] != '"' { return Row{}, c.LeftPosf("%w: %c", ErrMapMemberNotString, c.vector[c.posLeft]) } name, err := c.Match() if err != nil { return Row{}, c.LeftPosf("match: %w", err) } postName := c.Copy() postName.posLeft = name.posRight postName.globLeft = name.globRight postName = postName.Skip() // Next we must get a : if postName.vector[postName.posLeft] != ':' { return Row{}, postName.LeftPosf("%w '%c', expected ':'", ErrUnexpectedSymbol, postName.vector[postName.posLeft]) } value, err := postName.Skip().ValueEnd() if err != nil { return Row{}, postName.LeftPosf("value: %w", err) } return Row{ Name: name.String()[1 : name.posRight-name.posLeft], Value: value, }, nil } // After returns the chunk with its left position, if possible, // right after the global right position of v func (c Chunk) After(v Chunk) Chunk { // Add two, as one is for dealing with right side being exclusive // in slice indexes, and another one to go on to the next offset := (v.globRight - c.globLeft) + 1 // Then, make sure we don't go too far if c.posLeft+offset >= len(c.vector) { offset-- } c.posLeft += offset c.globLeft += offset return c } func (c Chunk) EOF() bool { return c.posLeft >= len(c.vector)-1 }