flabk/pkg/ld/internal/parse/chunk/chunk.go

package chunk

import (
	"errors"
	"fmt"

	"sectorinf.com/emilis/flabk/pkg/coll"
)

const (
	escapeChar    = '\\'
	unicodeEscape = 'u'
)

var (
	ErrNoMatch            = errors.New("no matching")
	ErrMapMemberNotString = errors.New("map member not a string")
	ErrUnexpectedSymbol   = errors.New("unexpected symbol")
	ErrMissingValue       = errors.New("missing value")
	ErrEscapeAtEnd        = errors.New("escape character at the end of value")
	ErrIncompleteEscape   = errors.New("incomplete unicode escape sequence")
)

type Chunk struct {
	vector   coll.Vector[byte]
	posLeft  int
	posRight int
	// todo global pos impl
	globLeft  int
	globRight int
}

func (c Chunk) LeftByte() byte {
	return c.vector[c.posLeft]
}

func (c Chunk) Null() bool {
	return (c.posRight+1)-c.posLeft >= 4 && c.vector[c.posLeft] == 'n' && c.vector[c.posLeft+1] == 'u' && c.vector[c.posLeft+2] == 'l' && c.vector[c.posLeft+3] == 'l'
}

func (c Chunk) LeftPosf(format string, args ...any) error {
	return fmt.Errorf(fmt.Sprintf("[%d] %s", c.globLeft, format), args...)
}

// Match finds the matching closer to the symbol at the left position and sets the
// right position to this index
func (c Chunk) Match() (Chunk, error) {
	start := c.vector[c.posLeft]
	matcher, ok := matchers[start]
	if !ok {
		panic(fmt.Sprintf("Match called on %c with no matcher defined", c.vector[c.posLeft]))
	}

	for index := c.posLeft + 1; index < len(c.vector); index++ {
		if start != '"' && c.vector[index] == start {
			sub, err := c.Child(index, len(c.vector)).Match()
			if err != nil {
				return c, fmt.Errorf("[%d] child %w", c.globLeft+(c.posLeft-index), err)
			}
			index += sub.posRight
			continue
		}
		if c.vector[index] == escapeChar {
			if index+1 == len(c.vector) {
				return c, fmt.Errorf("[%d] %w", c.globLeft+(c.posLeft-index), ErrEscapeAtEnd)
			}
			if c.vector[index+1] == unicodeEscape {
				if index+6 >= len(c.vector) {
					return c, fmt.Errorf("[%d] %w", c.globLeft+(c.posLeft+index), ErrIncompleteEscape)
				}
				index += 5
				continue
			}
			index++
			continue
		}
		if c.vector[index] == matcher {
			c.globRight -= c.posRight - index
			c.posRight = index
			return c, nil
		}
	}

	return c, c.LeftPosf("%w %c", ErrNoMatch, c.vector[c.posLeft])
}

func (c Chunk) Copy() Chunk {
	return c
}

// Sub returns an inclusive subchunk of [left;right]
func (c Chunk) Sub() Chunk {
	c = c.Child(c.posLeft, c.posRight+1)
	// To be inclusive we incremented posRight above, so
	// to restore the original position, we must decrement
	// it here before returning. Only if we have room to decrement it
	if c.posRight > 0 {
		c.posRight--
		c.globRight--
	}
	return c
}

// CookieCutter returns a subchunk of (left;right)
func (c Chunk) CookieCutter() Chunk {
	return c.Child(c.posLeft+1, c.posRight)
}

func spaceAtIndex(v coll.Vector[byte], index int) bool {
	return v[index] == 0x9 || v[index] == 0xa || v[index] == 0xd || v[index] == 0x20
}

func (c Chunk) AtSpace() bool {
	return spaceAtIndex(c.vector, c.posLeft)
}

func (c Chunk) Seek() Chunk {
	for c.posLeft < len(c.vector) && c.AtSpace() {
		c.globLeft++
		c.posLeft++
	}
	return c
}

// Step increments the left position by 1 if b
// is the byte at the current left position
func (c Chunk) StepIf(b byte) Chunk {
	if c.vector[c.posLeft] != b {
		return c
	}
	c.posLeft++
	c.globLeft++
	return c
}

func (c Chunk) ValueEnd() (Chunk, error) {
	switch c.vector[c.posLeft] {
	case '"', '{', '[':
		return c.Match()
	default:
		for index := c.posLeft; index <= c.posRight; index++ {
			if c.vector[index] == ',' || spaceAtIndex(c.vector, index) {
				if index == c.posLeft {
					return c, c.LeftPosf("%w", ErrMissingValue)
				}
				return c.Child(c.posLeft, index), nil
			}
		}
		return c, nil
	}
}

// Skip skips the current left position and then Seeks
func (c Chunk) Skip() Chunk {
	if c.posLeft+1 < len(c.vector) {
		c.posLeft++
		c.globLeft++
	}
	return c.Seek()
}

type MatchRule struct {
	MatchByte      byte
	StartFromRight bool
}

var (
	matchers = map[byte]byte{
		'{': '}',
		'[': ']',
		'"': '"',
	}
)

func New(v []byte) Chunk {
	posRight := len(v) - 1
	if len(v) == 0 {
		posRight = 0
	}
	return Chunk{
		vector:    coll.Vector[byte](v),
		posLeft:   0,
		posRight:  posRight,
		globRight: posRight,
	}
}

func (c Chunk) Child(left, right int) Chunk {
	sub := c.vector[left:right]
	return Chunk{
		vector:    sub,
		posLeft:   0,
		posRight:  len(sub) - 1,
		globLeft:  (c.globLeft - c.posLeft) + left,
		globRight: (c.globRight - c.posRight) + (right - 1),
	}
}

type ParseFunc[T any] func(T) (T, error)

type Row struct {
	Name  string
	Value Chunk
}

func (c Chunk) String() string {
	return string(c.vector[c.posLeft : c.posRight+1])
}

func (c Chunk) Row() (Row, error) {
	if c.vector[c.posLeft] != '"' {
		return Row{}, c.LeftPosf("%w: %c", ErrMapMemberNotString, c.vector[c.posLeft])
	}
	name, err := c.Match()
	if err != nil {
		return Row{}, c.LeftPosf("match: %w", err)
	}
	postName := c.Copy()
	postName.posLeft = name.posRight
	postName.globLeft = name.globRight
	postName = postName.Skip()
	// Next we must get a :
	if postName.vector[postName.posLeft] != ':' {
		return Row{}, postName.LeftPosf("%w '%c', expected ':'", ErrUnexpectedSymbol, postName.vector[postName.posLeft])
	}
	value, err := postName.Skip().ValueEnd()
	if err != nil {
		return Row{}, postName.LeftPosf("value: %w", err)
	}

	return Row{
		Name:  name.String()[1 : name.posRight-name.posLeft],
		Value: value,
	}, nil
}

// After returns the chunk with its left position, if possible,
// right after the global right position of v
func (c Chunk) After(v Chunk) Chunk {
	// Add two, as one is for dealing with right side being exclusive
	// in slice indexes, and another one to go on to the next
	offset := (v.globRight - c.globLeft) + 1
	// Then, make sure we don't go too far
	if c.posLeft+offset >= len(c.vector) {
		offset--
	}
	c.posLeft += offset
	c.globLeft += offset
	return c
}

func (c Chunk) EOF() bool {
	return c.posLeft >= len(c.vector)-1
}