421 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
			
		
		
	
	
			421 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
package jmespath
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
	"encoding/json"
 | 
						|
	"fmt"
 | 
						|
	"strconv"
 | 
						|
	"strings"
 | 
						|
	"unicode/utf8"
 | 
						|
)
 | 
						|
 | 
						|
type token struct {
 | 
						|
	tokenType tokType
 | 
						|
	value     string
 | 
						|
	position  int
 | 
						|
	length    int
 | 
						|
}
 | 
						|
 | 
						|
type tokType int
 | 
						|
 | 
						|
const eof = -1
 | 
						|
 | 
						|
// Lexer contains information about the expression being tokenized.
 | 
						|
type Lexer struct {
 | 
						|
	expression string       // The expression provided by the user.
 | 
						|
	currentPos int          // The current position in the string.
 | 
						|
	lastWidth  int          // The width of the current rune.  This
 | 
						|
	buf        bytes.Buffer // Internal buffer used for building up values.
 | 
						|
}
 | 
						|
 | 
						|
// SyntaxError is the main error used whenever a lexing or parsing error occurs.
 | 
						|
type SyntaxError struct {
 | 
						|
	msg        string // Error message displayed to user
 | 
						|
	Expression string // Expression that generated a SyntaxError
 | 
						|
	Offset     int    // The location in the string where the error occurred
 | 
						|
}
 | 
						|
 | 
						|
func (e SyntaxError) Error() string {
 | 
						|
	// In the future, it would be good to underline the specific
 | 
						|
	// location where the error occurred.
 | 
						|
	return "SyntaxError: " + e.msg
 | 
						|
}
 | 
						|
 | 
						|
// HighlightLocation will show where the syntax error occurred.
 | 
						|
// It will place a "^" character on a line below the expression
 | 
						|
// at the point where the syntax error occurred.
 | 
						|
func (e SyntaxError) HighlightLocation() string {
 | 
						|
	return e.Expression + "\n" + strings.Repeat(" ", e.Offset) + "^"
 | 
						|
}
 | 
						|
 | 
						|
//go:generate stringer -type=tokType
 | 
						|
const (
 | 
						|
	tUnknown tokType = iota
 | 
						|
	tStar
 | 
						|
	tDot
 | 
						|
	tFilter
 | 
						|
	tFlatten
 | 
						|
	tLparen
 | 
						|
	tRparen
 | 
						|
	tLbracket
 | 
						|
	tRbracket
 | 
						|
	tLbrace
 | 
						|
	tRbrace
 | 
						|
	tOr
 | 
						|
	tPipe
 | 
						|
	tNumber
 | 
						|
	tUnquotedIdentifier
 | 
						|
	tQuotedIdentifier
 | 
						|
	tComma
 | 
						|
	tColon
 | 
						|
	tLT
 | 
						|
	tLTE
 | 
						|
	tGT
 | 
						|
	tGTE
 | 
						|
	tEQ
 | 
						|
	tNE
 | 
						|
	tJSONLiteral
 | 
						|
	tStringLiteral
 | 
						|
	tCurrent
 | 
						|
	tExpref
 | 
						|
	tAnd
 | 
						|
	tNot
 | 
						|
	tEOF
 | 
						|
)
 | 
						|
 | 
						|
var basicTokens = map[rune]tokType{
 | 
						|
	'.': tDot,
 | 
						|
	'*': tStar,
 | 
						|
	',': tComma,
 | 
						|
	':': tColon,
 | 
						|
	'{': tLbrace,
 | 
						|
	'}': tRbrace,
 | 
						|
	']': tRbracket, // tLbracket not included because it could be "[]"
 | 
						|
	'(': tLparen,
 | 
						|
	')': tRparen,
 | 
						|
	'@': tCurrent,
 | 
						|
}
 | 
						|
 | 
						|
// Bit mask for [a-zA-Z_] shifted down 64 bits to fit in a single uint64.
 | 
						|
// When using this bitmask just be sure to shift the rune down 64 bits
 | 
						|
// before checking against identifierStartBits.
 | 
						|
const identifierStartBits uint64 = 576460745995190270
 | 
						|
 | 
						|
// Bit mask for [a-zA-Z0-9], 128 bits -> 2 uint64s.
 | 
						|
var identifierTrailingBits = [2]uint64{287948901175001088, 576460745995190270}
 | 
						|
 | 
						|
var whiteSpace = map[rune]bool{
 | 
						|
	' ': true, '\t': true, '\n': true, '\r': true,
 | 
						|
}
 | 
						|
 | 
						|
func (t token) String() string {
 | 
						|
	return fmt.Sprintf("Token{%+v, %s, %d, %d}",
 | 
						|
		t.tokenType, t.value, t.position, t.length)
 | 
						|
}
 | 
						|
 | 
						|
// NewLexer creates a new JMESPath lexer.
 | 
						|
func NewLexer() *Lexer {
 | 
						|
	lexer := Lexer{}
 | 
						|
	return &lexer
 | 
						|
}
 | 
						|
 | 
						|
func (lexer *Lexer) next() rune {
 | 
						|
	if lexer.currentPos >= len(lexer.expression) {
 | 
						|
		lexer.lastWidth = 0
 | 
						|
		return eof
 | 
						|
	}
 | 
						|
	r, w := utf8.DecodeRuneInString(lexer.expression[lexer.currentPos:])
 | 
						|
	lexer.lastWidth = w
 | 
						|
	lexer.currentPos += w
 | 
						|
	return r
 | 
						|
}
 | 
						|
 | 
						|
func (lexer *Lexer) back() {
 | 
						|
	lexer.currentPos -= lexer.lastWidth
 | 
						|
}
 | 
						|
 | 
						|
func (lexer *Lexer) peek() rune {
 | 
						|
	t := lexer.next()
 | 
						|
	lexer.back()
 | 
						|
	return t
 | 
						|
}
 | 
						|
 | 
						|
// tokenize takes an expression and returns corresponding tokens.
 | 
						|
func (lexer *Lexer) tokenize(expression string) ([]token, error) {
 | 
						|
	var tokens []token
 | 
						|
	lexer.expression = expression
 | 
						|
	lexer.currentPos = 0
 | 
						|
	lexer.lastWidth = 0
 | 
						|
loop:
 | 
						|
	for {
 | 
						|
		r := lexer.next()
 | 
						|
		if identifierStartBits&(1<<(uint64(r)-64)) > 0 {
 | 
						|
			t := lexer.consumeUnquotedIdentifier()
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if val, ok := basicTokens[r]; ok {
 | 
						|
			// Basic single char token.
 | 
						|
			t := token{
 | 
						|
				tokenType: val,
 | 
						|
				value:     string(r),
 | 
						|
				position:  lexer.currentPos - lexer.lastWidth,
 | 
						|
				length:    1,
 | 
						|
			}
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '-' || (r >= '0' && r <= '9') {
 | 
						|
			t := lexer.consumeNumber()
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '[' {
 | 
						|
			t := lexer.consumeLBracket()
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '"' {
 | 
						|
			t, err := lexer.consumeQuotedIdentifier()
 | 
						|
			if err != nil {
 | 
						|
				return tokens, err
 | 
						|
			}
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '\'' {
 | 
						|
			t, err := lexer.consumeRawStringLiteral()
 | 
						|
			if err != nil {
 | 
						|
				return tokens, err
 | 
						|
			}
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '`' {
 | 
						|
			t, err := lexer.consumeLiteral()
 | 
						|
			if err != nil {
 | 
						|
				return tokens, err
 | 
						|
			}
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '|' {
 | 
						|
			t := lexer.matchOrElse(r, '|', tOr, tPipe)
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '<' {
 | 
						|
			t := lexer.matchOrElse(r, '=', tLTE, tLT)
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '>' {
 | 
						|
			t := lexer.matchOrElse(r, '=', tGTE, tGT)
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '!' {
 | 
						|
			t := lexer.matchOrElse(r, '=', tNE, tNot)
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '=' {
 | 
						|
			t := lexer.matchOrElse(r, '=', tEQ, tUnknown)
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == '&' {
 | 
						|
			t := lexer.matchOrElse(r, '&', tAnd, tExpref)
 | 
						|
			tokens = append(tokens, t)
 | 
						|
		} else if r == eof {
 | 
						|
			break loop
 | 
						|
		} else if _, ok := whiteSpace[r]; ok {
 | 
						|
			// Ignore whitespace
 | 
						|
		} else {
 | 
						|
			return tokens, lexer.syntaxError(fmt.Sprintf("Unknown char: %s", strconv.QuoteRuneToASCII(r)))
 | 
						|
		}
 | 
						|
	}
 | 
						|
	tokens = append(tokens, token{tEOF, "", len(lexer.expression), 0})
 | 
						|
	return tokens, nil
 | 
						|
}
 | 
						|
 | 
						|
// Consume characters until the ending rune "r" is reached.
 | 
						|
// If the end of the expression is reached before seeing the
 | 
						|
// terminating rune "r", then an error is returned.
 | 
						|
// If no error occurs then the matching substring is returned.
 | 
						|
// The returned string will not include the ending rune.
 | 
						|
func (lexer *Lexer) consumeUntil(end rune) (string, error) {
 | 
						|
	start := lexer.currentPos
 | 
						|
	current := lexer.next()
 | 
						|
	for current != end && current != eof {
 | 
						|
		if current == '\\' && lexer.peek() != eof {
 | 
						|
			lexer.next()
 | 
						|
		}
 | 
						|
		current = lexer.next()
 | 
						|
	}
 | 
						|
	if lexer.lastWidth == 0 {
 | 
						|
		// Then we hit an EOF so we never reached the closing
 | 
						|
		// delimiter.
 | 
						|
		return "", SyntaxError{
 | 
						|
			msg:        "Unclosed delimiter: " + string(end),
 | 
						|
			Expression: lexer.expression,
 | 
						|
			Offset:     len(lexer.expression),
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return lexer.expression[start : lexer.currentPos-lexer.lastWidth], nil
 | 
						|
}
 | 
						|
 | 
						|
func (lexer *Lexer) consumeLiteral() (token, error) {
 | 
						|
	start := lexer.currentPos
 | 
						|
	value, err := lexer.consumeUntil('`')
 | 
						|
	if err != nil {
 | 
						|
		return token{}, err
 | 
						|
	}
 | 
						|
	value = strings.Replace(value, "\\`", "`", -1)
 | 
						|
	return token{
 | 
						|
		tokenType: tJSONLiteral,
 | 
						|
		value:     value,
 | 
						|
		position:  start,
 | 
						|
		length:    len(value),
 | 
						|
	}, nil
 | 
						|
}
 | 
						|
 | 
						|
func (lexer *Lexer) consumeRawStringLiteral() (token, error) {
 | 
						|
	start := lexer.currentPos
 | 
						|
	currentIndex := start
 | 
						|
	current := lexer.next()
 | 
						|
	for current != '\'' && lexer.peek() != eof {
 | 
						|
		if current == '\\' && lexer.peek() == '\'' {
 | 
						|
			chunk := lexer.expression[currentIndex : lexer.currentPos-1]
 | 
						|
			lexer.buf.WriteString(chunk)
 | 
						|
			lexer.buf.WriteString("'")
 | 
						|
			lexer.next()
 | 
						|
			currentIndex = lexer.currentPos
 | 
						|
		}
 | 
						|
		current = lexer.next()
 | 
						|
	}
 | 
						|
	if lexer.lastWidth == 0 {
 | 
						|
		// Then we hit an EOF so we never reached the closing
 | 
						|
		// delimiter.
 | 
						|
		return token{}, SyntaxError{
 | 
						|
			msg:        "Unclosed delimiter: '",
 | 
						|
			Expression: lexer.expression,
 | 
						|
			Offset:     len(lexer.expression),
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if currentIndex < lexer.currentPos {
 | 
						|
		lexer.buf.WriteString(lexer.expression[currentIndex : lexer.currentPos-1])
 | 
						|
	}
 | 
						|
	value := lexer.buf.String()
 | 
						|
	// Reset the buffer so it can reused again.
 | 
						|
	lexer.buf.Reset()
 | 
						|
	return token{
 | 
						|
		tokenType: tStringLiteral,
 | 
						|
		value:     value,
 | 
						|
		position:  start,
 | 
						|
		length:    len(value),
 | 
						|
	}, nil
 | 
						|
}
 | 
						|
 | 
						|
func (lexer *Lexer) syntaxError(msg string) SyntaxError {
 | 
						|
	return SyntaxError{
 | 
						|
		msg:        msg,
 | 
						|
		Expression: lexer.expression,
 | 
						|
		Offset:     lexer.currentPos - 1,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Checks for a two char token, otherwise matches a single character
 | 
						|
// token. This is used whenever a two char token overlaps a single
 | 
						|
// char token, e.g. "||" -> tPipe, "|" -> tOr.
 | 
						|
func (lexer *Lexer) matchOrElse(first rune, second rune, matchedType tokType, singleCharType tokType) token {
 | 
						|
	start := lexer.currentPos - lexer.lastWidth
 | 
						|
	nextRune := lexer.next()
 | 
						|
	var t token
 | 
						|
	if nextRune == second {
 | 
						|
		t = token{
 | 
						|
			tokenType: matchedType,
 | 
						|
			value:     string(first) + string(second),
 | 
						|
			position:  start,
 | 
						|
			length:    2,
 | 
						|
		}
 | 
						|
	} else {
 | 
						|
		lexer.back()
 | 
						|
		t = token{
 | 
						|
			tokenType: singleCharType,
 | 
						|
			value:     string(first),
 | 
						|
			position:  start,
 | 
						|
			length:    1,
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return t
 | 
						|
}
 | 
						|
 | 
						|
func (lexer *Lexer) consumeLBracket() token {
 | 
						|
	// There's three options here:
 | 
						|
	// 1. A filter expression "[?"
 | 
						|
	// 2. A flatten operator "[]"
 | 
						|
	// 3. A bare rbracket "["
 | 
						|
	start := lexer.currentPos - lexer.lastWidth
 | 
						|
	nextRune := lexer.next()
 | 
						|
	var t token
 | 
						|
	if nextRune == '?' {
 | 
						|
		t = token{
 | 
						|
			tokenType: tFilter,
 | 
						|
			value:     "[?",
 | 
						|
			position:  start,
 | 
						|
			length:    2,
 | 
						|
		}
 | 
						|
	} else if nextRune == ']' {
 | 
						|
		t = token{
 | 
						|
			tokenType: tFlatten,
 | 
						|
			value:     "[]",
 | 
						|
			position:  start,
 | 
						|
			length:    2,
 | 
						|
		}
 | 
						|
	} else {
 | 
						|
		t = token{
 | 
						|
			tokenType: tLbracket,
 | 
						|
			value:     "[",
 | 
						|
			position:  start,
 | 
						|
			length:    1,
 | 
						|
		}
 | 
						|
		lexer.back()
 | 
						|
	}
 | 
						|
	return t
 | 
						|
}
 | 
						|
 | 
						|
func (lexer *Lexer) consumeQuotedIdentifier() (token, error) {
 | 
						|
	start := lexer.currentPos
 | 
						|
	value, err := lexer.consumeUntil('"')
 | 
						|
	if err != nil {
 | 
						|
		return token{}, err
 | 
						|
	}
 | 
						|
	var decoded string
 | 
						|
	asJSON := []byte("\"" + value + "\"")
 | 
						|
	if err := json.Unmarshal([]byte(asJSON), &decoded); err != nil {
 | 
						|
		return token{}, err
 | 
						|
	}
 | 
						|
	return token{
 | 
						|
		tokenType: tQuotedIdentifier,
 | 
						|
		value:     decoded,
 | 
						|
		position:  start - 1,
 | 
						|
		length:    len(decoded),
 | 
						|
	}, nil
 | 
						|
}
 | 
						|
 | 
						|
func (lexer *Lexer) consumeUnquotedIdentifier() token {
 | 
						|
	// Consume runes until we reach the end of an unquoted
 | 
						|
	// identifier.
 | 
						|
	start := lexer.currentPos - lexer.lastWidth
 | 
						|
	for {
 | 
						|
		r := lexer.next()
 | 
						|
		if r < 0 || r > 128 || identifierTrailingBits[uint64(r)/64]&(1<<(uint64(r)%64)) == 0 {
 | 
						|
			lexer.back()
 | 
						|
			break
 | 
						|
		}
 | 
						|
	}
 | 
						|
	value := lexer.expression[start:lexer.currentPos]
 | 
						|
	return token{
 | 
						|
		tokenType: tUnquotedIdentifier,
 | 
						|
		value:     value,
 | 
						|
		position:  start,
 | 
						|
		length:    lexer.currentPos - start,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (lexer *Lexer) consumeNumber() token {
 | 
						|
	// Consume runes until we reach something that's not a number.
 | 
						|
	start := lexer.currentPos - lexer.lastWidth
 | 
						|
	for {
 | 
						|
		r := lexer.next()
 | 
						|
		if r < '0' || r > '9' {
 | 
						|
			lexer.back()
 | 
						|
			break
 | 
						|
		}
 | 
						|
	}
 | 
						|
	value := lexer.expression[start:lexer.currentPos]
 | 
						|
	return token{
 | 
						|
		tokenType: tNumber,
 | 
						|
		value:     value,
 | 
						|
		position:  start,
 | 
						|
		length:    lexer.currentPos - start,
 | 
						|
	}
 | 
						|
}
 |