flabk/pkg/ld/internal/parse/chunk/chunk.go

255 lines
5.9 KiB
Go

package chunk
import (
"errors"
"fmt"
"sectorinf.com/emilis/flabk/pkg/coll"
)
const (
escapeChar = '\\'
unicodeEscape = 'u'
)
var (
ErrNoMatch = errors.New("no matching")
ErrMapMemberNotString = errors.New("map member not a string")
ErrUnexpectedSymbol = errors.New("unexpected symbol")
ErrMissingValue = errors.New("missing value")
ErrEscapeAtEnd = errors.New("escape character at the end of value")
ErrIncompleteEscape = errors.New("incomplete unicode escape sequence")
)
type Chunk struct {
vector coll.Vector[byte]
posLeft int
posRight int
// todo global pos impl
globLeft int
globRight int
}
func (c Chunk) LeftByte() byte {
return c.vector[c.posLeft]
}
func (c Chunk) Null() bool {
return (c.posRight+1)-c.posLeft >= 4 && c.vector[c.posLeft] == 'n' && c.vector[c.posLeft+1] == 'u' && c.vector[c.posLeft+2] == 'l' && c.vector[c.posLeft+3] == 'l'
}
func (c Chunk) LeftPosf(format string, args ...any) error {
return fmt.Errorf(fmt.Sprintf("[%d] %s", c.globLeft, format), args...)
}
// Match finds the matching closer to the symbol at the left position and sets the
// right position to this index
func (c Chunk) Match() (Chunk, error) {
start := c.vector[c.posLeft]
matcher, ok := matchers[start]
if !ok {
panic(fmt.Sprintf("Match called on %c with no matcher defined", c.vector[c.posLeft]))
}
for index := c.posLeft + 1; index < len(c.vector); index++ {
if start != '"' && c.vector[index] == start {
sub, err := c.Child(index, len(c.vector)).Match()
if err != nil {
return c, fmt.Errorf("[%d] child %w", c.globLeft+(c.posLeft-index), err)
}
index += sub.posRight
continue
}
if c.vector[index] == escapeChar {
if index+1 == len(c.vector) {
return c, fmt.Errorf("[%d] %w", c.globLeft+(c.posLeft-index), ErrEscapeAtEnd)
}
if c.vector[index+1] == unicodeEscape {
if index+6 >= len(c.vector) {
return c, fmt.Errorf("[%d] %w", c.globLeft+(c.posLeft+index), ErrIncompleteEscape)
}
index += 5
continue
}
index++
continue
}
if c.vector[index] == matcher {
c.globRight -= c.posRight - index
c.posRight = index
return c, nil
}
}
return c, c.LeftPosf("%w %c", ErrNoMatch, c.vector[c.posLeft])
}
func (c Chunk) Copy() Chunk {
return c
}
// Sub returns an inclusive subchunk of [left;right]
func (c Chunk) Sub() Chunk {
c = c.Child(c.posLeft, c.posRight+1)
// To be inclusive we incremented posRight above, so
// to restore the original position, we must decrement
// it here before returning. Only if we have room to decrement it
if c.posRight > 0 {
c.posRight--
c.globRight--
}
return c
}
// CookieCutter returns a subchunk of (left;right)
func (c Chunk) CookieCutter() Chunk {
return c.Child(c.posLeft+1, c.posRight)
}
func spaceAtIndex(v coll.Vector[byte], index int) bool {
return v[index] == 0x9 || v[index] == 0xa || v[index] == 0xd || v[index] == 0x20
}
func (c Chunk) AtSpace() bool {
return spaceAtIndex(c.vector, c.posLeft)
}
func (c Chunk) Seek() Chunk {
for c.posLeft < len(c.vector) && c.AtSpace() {
c.globLeft++
c.posLeft++
}
return c
}
// Step increments the left position by 1 if b
// is the byte at the current left position
func (c Chunk) StepIf(b byte) Chunk {
if c.vector[c.posLeft] != b {
return c
}
c.posLeft++
c.globLeft++
return c
}
func (c Chunk) ValueEnd() (Chunk, error) {
switch c.vector[c.posLeft] {
case '"', '{', '[':
return c.Match()
default:
for index := c.posLeft; index <= c.posRight; index++ {
if c.vector[index] == ',' || spaceAtIndex(c.vector, index) {
if index == c.posLeft {
return c, c.LeftPosf("%w", ErrMissingValue)
}
return c.Child(c.posLeft, index), nil
}
}
return c, nil
}
}
// Skip skips the current left position and then Seeks
func (c Chunk) Skip() Chunk {
if c.posLeft+1 < len(c.vector) {
c.posLeft++
c.globLeft++
}
return c.Seek()
}
type MatchRule struct {
MatchByte byte
StartFromRight bool
}
var (
matchers = map[byte]byte{
'{': '}',
'[': ']',
'"': '"',
}
)
func New(v []byte) Chunk {
posRight := len(v) - 1
if len(v) == 0 {
posRight = 0
}
return Chunk{
vector: coll.Vector[byte](v),
posLeft: 0,
posRight: posRight,
globRight: posRight,
}
}
func (c Chunk) Child(left, right int) Chunk {
sub := c.vector[left:right]
return Chunk{
vector: sub,
posLeft: 0,
posRight: len(sub) - 1,
globLeft: (c.globLeft - c.posLeft) + left,
globRight: (c.globRight - c.posRight) + (right - 1),
}
}
type ParseFunc[T any] func(T) (T, error)
type Row struct {
Name string
Value Chunk
}
func (c Chunk) String() string {
return string(c.vector[c.posLeft : c.posRight+1])
}
func (c Chunk) Row() (Row, error) {
if c.vector[c.posLeft] != '"' {
return Row{}, c.LeftPosf("%w: %c", ErrMapMemberNotString, c.vector[c.posLeft])
}
name, err := c.Match()
if err != nil {
return Row{}, c.LeftPosf("match: %w", err)
}
postName := c.Copy()
postName.posLeft = name.posRight
postName.globLeft = name.globRight
postName = postName.Skip()
// Next we must get a :
if postName.vector[postName.posLeft] != ':' {
return Row{}, postName.LeftPosf("%w '%c', expected ':'", ErrUnexpectedSymbol, postName.vector[postName.posLeft])
}
value, err := postName.Skip().ValueEnd()
if err != nil {
return Row{}, postName.LeftPosf("value: %w", err)
}
return Row{
Name: name.String()[1 : name.posRight-name.posLeft],
Value: value,
}, nil
}
// After returns the chunk with its left position, if possible,
// right after the global right position of v
func (c Chunk) After(v Chunk) Chunk {
// Add two, as one is for dealing with right side being exclusive
// in slice indexes, and another one to go on to the next
offset := (v.globRight - c.globLeft) + 1
// Then, make sure we don't go too far
if c.posLeft+offset >= len(c.vector) {
offset--
}
c.posLeft += offset
c.globLeft += offset
return c
}
func (c Chunk) EOF() bool {
return c.posLeft >= len(c.vector)-1
}