yosoyhendrix
/
psiphon-tunnel-core


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
							package mar

import (
	"bytes"
	"strconv"
	"strings"
	"unicode/utf8"
)

// Scanner is a marionette DSL tokenizer.
type Scanner struct {
	i    int
	data []byte
	pos  Pos
}

// NewScanner returns a new instance of Scanner.
func NewScanner(data []byte) *Scanner {
	data = bytes.Replace(data, []byte{0}, []byte("\uFFFD"), -1)
	data = bytes.Replace(data, []byte{'\f'}, []byte{'\n'}, -1)
	data = bytes.Replace(data, []byte{'\r', '\n'}, []byte{'\n'}, -1)
	return &Scanner{data: data}
}

// Scan returns the next token from the reader.
func (s *Scanner) Scan() (tok Token, lit string, pos Pos) {
	for {
		// Special handling for whitespace, numbers, strings, & names.
		ch := s.peek()
		switch {
		case isWhitespace(ch):
			return s.scanWhitespace()
		case isDigit(ch) || ch == '-':
			return s.scanNumber()
		case ch == '"' || ch == '\'':
			return s.scanString()
		case isNameStart(ch):
			return s.scanIdent()
		}

		// Check against individual code points next.
		pos = s.pos
		switch ch := s.read(); ch {
		case eof:
			return EOF, "", pos
		case ',':
			return COMMA, string(ch), pos
		case ':':
			return COLON, string(ch), pos
		case '(':
			return LPAREN, string(ch), pos
		case ')':
			return RPAREN, string(ch), pos
		case '.':
			return DOT, string(ch), pos
		case '#':
			return HASH, string(ch), pos
		default:
			return ILLEGAL, string(ch), pos
		}
	}
}

// ScanIgnoreWhitespace returns the next non-whitespace, non-comment token.
func (s *Scanner) ScanIgnoreWhitespace() (tok Token, lit string, pos Pos) {
	for {
		if tok, lit, pos = s.Scan(); tok == HASH {
			s.scanUntilNewline()
		} else if tok != WS {
			return tok, lit, pos
		}
	}
}

// Peek returns the next token without moving the scanner forward.
func (s *Scanner) Peek() (tok Token, lit string, pos Pos) {
	i, prev := s.i, s.pos
	tok, lit, pos = s.Scan()
	s.i, s.pos = i, prev
	return tok, lit, pos
}

// PeekIgnoreWhitespace returns the next non-whitespace, non-comment token without moving the scanner forward.
func (s *Scanner) PeekIgnoreWhitespace() (tok Token, lit string, pos Pos) {
	i, prev := s.i, s.pos
	for {
		if tok, lit, pos = s.Scan(); tok == HASH {
			s.scanUntilNewline()
		} else if tok != WS {
			s.i, s.pos = i, prev
			return tok, lit, pos
		}
	}
}

// scanWhitespace consumes the current code point and all subsequent whitespace.
func (s *Scanner) scanWhitespace() (tok Token, lit string, pos Pos) {
	pos = s.pos

	var buf bytes.Buffer
	for ch := s.peek(); isWhitespace(ch); ch = s.peek() {
		buf.WriteRune(s.read())
	}
	return WS, buf.String(), pos
}

// scanUntilNewline consumes all code points up to and including the next newline or EOF.
func (s *Scanner) scanUntilNewline() {
	for ch := s.read(); ch != '\n' && ch != eof; ch = s.read() {
	}
}

// scanString consumes a quoted string.
func (s *Scanner) scanString() (tok Token, lit string, pos Pos) {
	pos = s.pos
	ending := s.read()

	var buf bytes.Buffer
	for {
		if ch := s.peek(); ch == eof {
			return ILLEGAL, "", pos
		}

		switch ch := s.read(); ch {
		case ending:
			return STRING, buf.String(), pos
		case '\\':
			switch next := s.peek(); next {
			case '\\':
				buf.WriteRune(s.read())
			case '\'':
				buf.WriteRune(s.read())
			case '"':
				buf.WriteRune(s.read())
			case 'a':
				s.read()
				buf.WriteRune('\a')
			case 'b':
				s.read()
				buf.WriteRune('\b')
			case 'f':
				s.read()
				buf.WriteRune('\f')
			case 'n':
				s.read()
				buf.WriteRune('\n')
			case 'r':
				s.read()
				buf.WriteRune('\r')
			case 't':
				s.read()
				buf.WriteRune('\t')
			case 'v':
				s.read()
				buf.WriteRune('\v')
			case 'o':
				s.read()
				buf.WriteRune(rune(s.readOctal()))
			case 'x':
				s.read()
				buf.WriteRune(rune(s.readHex()))
			default:
				buf.WriteRune('\\')
			}
		default:
			buf.WriteRune(ch)
		}
	}
}

// scanNumber consumes a number.
func (s *Scanner) scanNumber() (tok Token, lit string, pos Pos) {
	pos = s.pos

	// If initial code point is + or - then store it.
	var buf bytes.Buffer
	switch ch := s.peek(); ch {
	case '+', '-':
		buf.WriteRune(s.read())
	}

	// Read as many digits as possible.
	s.scanDigits(&buf)

	// If next code points are a full stop and digit then consume them.
	if next := s.peek(); next == '.' {
		buf.WriteRune(s.read())
		s.scanDigits(&buf)
		return FLOAT, buf.String(), pos
	}
	return INTEGER, buf.String(), pos
}

// scanDigits consume a contiguous series of digits.
func (s *Scanner) scanDigits(buf *bytes.Buffer) {
	for ch := s.peek(); isDigit(ch); ch = s.peek() {
		buf.WriteRune(s.read())
	}
}

// readOctal reads and parses a stream of octal digits.
func (s *Scanner) readOctal() int {
	var buf bytes.Buffer
	for ch := s.peek(); isOctal(ch); ch = s.peek() {
		buf.WriteRune(s.read())
	}
	i, _ := strconv.ParseInt(buf.String(), 8, 64)
	return int(i)
}

// readHex reads and parses a stream of hex digits.
func (s *Scanner) readHex() int {
	var buf bytes.Buffer
	for ch := s.peek(); isHex(ch); ch = s.peek() {
		buf.WriteRune(s.read())
	}
	i, _ := strconv.ParseInt(buf.String(), 16, 64)
	return int(i)
}

// scanIdent consumes an identifier token.
func (s *Scanner) scanIdent() (tok Token, lit string, pos Pos) {
	pos = s.pos

	var buf bytes.Buffer
	for ch := s.peek(); isName(ch); ch = s.peek() {
		buf.WriteRune(s.read())
	}

	lit = buf.String()
	switch strings.ToLower(lit) {
	case "action":
		return ACTION, lit, pos
	case "client":
		return CLIENT, lit, pos
	case "if":
		return IF, lit, pos
	case "end":
		return END, lit, pos
	case "null":
		return NULL, lit, pos
	case "regex_match_incoming":
		return REGEX_MATCH_INCOMING, lit, pos
	case "server":
		return SERVER, lit, pos
	case "start":
		return START, lit, pos
	default:
		return IDENT, buf.String(), pos
	}
}

func (s *Scanner) read() rune {
	if s.i >= len(s.data) {
		return eof
	}
	ch, sz := utf8.DecodeRune(s.data[s.i:])
	s.i += sz

	// Track scanner position.
	if ch == '\n' {
		s.pos.Line++
		s.pos.Char = 0
	} else {
		s.pos.Char++
	}

	return ch
}

func (s *Scanner) peek() rune {
	if s.i >= len(s.data) {
		return eof
	}
	ch, _ := utf8.DecodeRune(s.data[s.i:])
	return ch
}

// isWhitespace returns true if the rune is a space, tab, or newline.
func isWhitespace(ch rune) bool {
	return ch == ' ' || ch == '\t' || ch == '\n'
}

// isLetter returns true if the rune is a letter.
func isLetter(ch rune) bool {
	return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
}

// isDigit returns true if the rune is a decimal digit.
func isDigit(ch rune) bool {
	return (ch >= '0' && ch <= '9')
}

// isOctal returns true if the rune is an octal digit.
func isOctal(ch rune) bool {
	return (ch >= '0' && ch <= '7')
}

// isHex returns true if the rune is a hex digit.
func isHex(ch rune) bool {
	return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')
}

// isNameStart returns true if the rune can start a name.
func isNameStart(ch rune) bool {
	return isLetter(ch) || ch == '_'
}

// isName returns true if the character is a name code point.
func isName(ch rune) bool {
	return isNameStart(ch) || isDigit(ch) || ch == '-'
}

// eof represents an EOF file byte.
var eof rune = -1