bin/ 230 CLI tools (ask-*, br-*, agent-*, roadid, carpool) scripts/ 99 automation scripts fleet/ Node configs and deployment workers/ Cloudflare Worker sources (roadpay, road-search, squad webhooks) roadc/ RoadC programming language roadnet/ Mesh network (5 APs, WireGuard) operator/ Memory system scripts config/ System configs dotfiles/ Shell configs docs/ Documentation BlackRoad OS — Pave Tomorrow. RoadChain-SHA2048: d1a24f55318d338b RoadChain-Identity: alexa@sovereign RoadChain-Full: d1a24f55318d338b24b60bad7be39286379c76ae5470817482100cb0ddbbcb97e147d07ac7243da0a9f0363e4e5c833d612b9c0df3a3cd20802465420278ef74875a5b77f55af6fe42a931b8b635b3d0d0b6bde9abf33dc42eea52bc03c951406d8cbe49f1a3d29b26a94dade05e9477f34a7d4d4c6ec4005c3c2ac54e73a68440c512c8e83fd9b1fe234750b898ef8f4032c23db173961fe225e67a0432b5293a9714f76c5c57ed5fdf35b9fb40fd73c03ebf88b7253c6a0575f5afb6a6b49b3bda310602fb1ef676859962dad2aebbb2875814b30eee0a8ba195e482d4cbc91d8819e7f38f6db53e8063401649c77bb994371473cabfb917fb53e8cbe73d60
619 lines
19 KiB
Python
619 lines
19 KiB
Python
"""
|
|
BlackRoad OS Language Lexer
|
|
Transforms source code into tokens for parsing
|
|
"""
|
|
|
|
import re
|
|
from enum import Enum, auto
|
|
from dataclasses import dataclass
|
|
from typing import List, Optional
|
|
|
|
class TokenType(Enum):
|
|
"""All possible token types in BlackRoad language"""
|
|
|
|
# Literals
|
|
INTEGER = auto()
|
|
FLOAT = auto()
|
|
STRING = auto()
|
|
BOOLEAN = auto()
|
|
COLOR = auto()
|
|
|
|
# Identifiers & Keywords
|
|
IDENTIFIER = auto()
|
|
|
|
# Keywords - Control Flow
|
|
IF = auto()
|
|
ELIF = auto()
|
|
ELSE = auto()
|
|
MATCH = auto()
|
|
FOR = auto()
|
|
WHILE = auto()
|
|
BREAK = auto()
|
|
CONTINUE = auto()
|
|
RETURN = auto()
|
|
|
|
# Keywords - Declarations
|
|
LET = auto()
|
|
VAR = auto()
|
|
CONST = auto()
|
|
FUN = auto()
|
|
ASYNC = auto()
|
|
TYPE = auto()
|
|
MODULE = auto()
|
|
IMPORT = auto()
|
|
FROM = auto()
|
|
EXPORT = auto()
|
|
|
|
# Keywords - 3D/Spatial
|
|
SPACE = auto()
|
|
CUBE = auto()
|
|
SPHERE = auto()
|
|
PLANE = auto()
|
|
LIGHT = auto()
|
|
CAMERA = auto()
|
|
RENDER = auto()
|
|
|
|
# Keywords - Concurrency
|
|
SPAWN = auto()
|
|
AWAIT = auto()
|
|
CHAN = auto()
|
|
SELECT = auto()
|
|
CASE = auto()
|
|
|
|
# Keywords - Memory
|
|
MUT = auto()
|
|
|
|
# Keywords - Metaprogramming
|
|
MACRO = auto()
|
|
COMPTIME = auto()
|
|
ASM = auto()
|
|
|
|
# Types
|
|
INT = auto()
|
|
FLOAT_TYPE = auto()
|
|
STRING_TYPE = auto()
|
|
BOOL_TYPE = auto()
|
|
BYTE = auto()
|
|
CHAR = auto()
|
|
VEC2 = auto()
|
|
VEC3 = auto()
|
|
VEC4 = auto()
|
|
COLOR_TYPE = auto()
|
|
LIST = auto()
|
|
DICT = auto()
|
|
SET = auto()
|
|
ANY = auto()
|
|
|
|
# Operators
|
|
PLUS = auto() # +
|
|
MINUS = auto() # -
|
|
STAR = auto() # *
|
|
SLASH = auto() # /
|
|
PERCENT = auto() # %
|
|
POWER = auto() # **
|
|
|
|
ASSIGN = auto() # =
|
|
PLUS_ASSIGN = auto() # +=
|
|
MINUS_ASSIGN = auto() # -=
|
|
STAR_ASSIGN = auto() # *=
|
|
SLASH_ASSIGN = auto() # /=
|
|
|
|
EQ = auto() # ==
|
|
NE = auto() # !=
|
|
LT = auto() # <
|
|
GT = auto() # >
|
|
LE = auto() # <=
|
|
GE = auto() # >=
|
|
|
|
AND = auto() # and
|
|
OR = auto() # or
|
|
NOT = auto() # not
|
|
|
|
AMPERSAND = auto() # &
|
|
PIPE = auto() # |
|
|
CARET = auto() # ^
|
|
TILDE = auto() # ~
|
|
|
|
# Delimiters
|
|
LPAREN = auto() # (
|
|
RPAREN = auto() # )
|
|
LBRACKET = auto() # [
|
|
RBRACKET = auto() # ]
|
|
LBRACE = auto() # {
|
|
RBRACE = auto() # }
|
|
|
|
COLON = auto() # :
|
|
SEMICOLON = auto() # ;
|
|
COMMA = auto() # ,
|
|
DOT = auto() # .
|
|
ARROW = auto() # ->
|
|
DOUBLE_DOT = auto() # ..
|
|
TRIPLE_DOT = auto() # ...
|
|
AT = auto() # @
|
|
HASH = auto() # #
|
|
QUESTION = auto() # ?
|
|
DOLLAR = auto() # $
|
|
|
|
# Special
|
|
NEWLINE = auto()
|
|
INDENT = auto()
|
|
DEDENT = auto()
|
|
EOF = auto()
|
|
|
|
# HTML-like tags for 3D
|
|
LT_SLASH = auto() # </
|
|
SLASH_GT = auto() # />
|
|
|
|
@dataclass
|
|
class Token:
|
|
"""Represents a single token"""
|
|
type: TokenType
|
|
value: any
|
|
line: int
|
|
column: int
|
|
|
|
def __repr__(self):
|
|
return f"Token({self.type.name}, {self.value!r}, {self.line}:{self.column})"
|
|
|
|
class Lexer:
|
|
"""BlackRoad OS Language Lexer"""
|
|
|
|
KEYWORDS = {
|
|
# Control flow
|
|
'if': TokenType.IF,
|
|
'elif': TokenType.ELIF,
|
|
'else': TokenType.ELSE,
|
|
'match': TokenType.MATCH,
|
|
'for': TokenType.FOR,
|
|
'while': TokenType.WHILE,
|
|
'break': TokenType.BREAK,
|
|
'continue': TokenType.CONTINUE,
|
|
'return': TokenType.RETURN,
|
|
|
|
# Declarations
|
|
'let': TokenType.LET,
|
|
'var': TokenType.VAR,
|
|
'const': TokenType.CONST,
|
|
'fun': TokenType.FUN,
|
|
'async': TokenType.ASYNC,
|
|
'type': TokenType.TYPE,
|
|
'module': TokenType.MODULE,
|
|
'import': TokenType.IMPORT,
|
|
'from': TokenType.FROM,
|
|
'export': TokenType.EXPORT,
|
|
|
|
# 3D/Spatial
|
|
'space': TokenType.SPACE,
|
|
'cube': TokenType.CUBE,
|
|
'sphere': TokenType.SPHERE,
|
|
'plane': TokenType.PLANE,
|
|
'light': TokenType.LIGHT,
|
|
'camera': TokenType.CAMERA,
|
|
'render': TokenType.RENDER,
|
|
|
|
# Concurrency
|
|
'spawn': TokenType.SPAWN,
|
|
'await': TokenType.AWAIT,
|
|
'chan': TokenType.CHAN,
|
|
'select': TokenType.SELECT,
|
|
'case': TokenType.CASE,
|
|
|
|
# Memory
|
|
'mut': TokenType.MUT,
|
|
|
|
# Metaprogramming
|
|
'macro': TokenType.MACRO,
|
|
'comptime': TokenType.COMPTIME,
|
|
'asm': TokenType.ASM,
|
|
|
|
# Boolean literals
|
|
'true': TokenType.BOOLEAN,
|
|
'false': TokenType.BOOLEAN,
|
|
|
|
# Logical operators
|
|
'and': TokenType.AND,
|
|
'or': TokenType.OR,
|
|
'not': TokenType.NOT,
|
|
|
|
# Types
|
|
'int': TokenType.INT,
|
|
'float': TokenType.FLOAT_TYPE,
|
|
'string': TokenType.STRING_TYPE,
|
|
'bool': TokenType.BOOL_TYPE,
|
|
'byte': TokenType.BYTE,
|
|
'char': TokenType.CHAR,
|
|
'vec2': TokenType.VEC2,
|
|
'vec3': TokenType.VEC3,
|
|
'vec4': TokenType.VEC4,
|
|
'color': TokenType.COLOR_TYPE,
|
|
'list': TokenType.LIST,
|
|
'dict': TokenType.DICT,
|
|
'set': TokenType.SET,
|
|
'any': TokenType.ANY,
|
|
}
|
|
|
|
def __init__(self, source: str, filename: str = "<stdin>"):
|
|
self.source = source
|
|
self.filename = filename
|
|
self.pos = 0
|
|
self.line = 1
|
|
self.column = 1
|
|
self.tokens: List[Token] = []
|
|
self.indent_stack = [0] # Track indentation levels
|
|
|
|
def current_char(self) -> Optional[str]:
|
|
"""Get current character without advancing"""
|
|
if self.pos >= len(self.source):
|
|
return None
|
|
return self.source[self.pos]
|
|
|
|
def peek_char(self, offset: int = 1) -> Optional[str]:
|
|
"""Look ahead at character"""
|
|
pos = self.pos + offset
|
|
if pos >= len(self.source):
|
|
return None
|
|
return self.source[pos]
|
|
|
|
def advance(self) -> Optional[str]:
|
|
"""Move to next character"""
|
|
char = self.current_char()
|
|
if char == '\n':
|
|
self.line += 1
|
|
self.column = 1
|
|
else:
|
|
self.column += 1
|
|
self.pos += 1
|
|
return char
|
|
|
|
def skip_whitespace(self, skip_newlines: bool = False):
|
|
"""Skip whitespace (but preserve newlines for indentation unless skip_newlines=True)"""
|
|
while self.current_char() in ' \t\r' or (skip_newlines and self.current_char() == '\n'):
|
|
self.advance()
|
|
|
|
def skip_comment(self):
|
|
"""Skip single-line comments starting with #"""
|
|
if self.current_char() == '#':
|
|
# Check for multi-line comment #[ ]#
|
|
if self.peek_char() == '[':
|
|
self.advance() # #
|
|
self.advance() # [
|
|
while True:
|
|
if self.current_char() is None:
|
|
raise SyntaxError(f"Unterminated multi-line comment at {self.line}:{self.column}")
|
|
if self.current_char() == ']' and self.peek_char() == '#':
|
|
self.advance() # ]
|
|
self.advance() # #
|
|
break
|
|
self.advance()
|
|
else:
|
|
# Single-line comment
|
|
while self.current_char() not in ['\n', None]:
|
|
self.advance()
|
|
|
|
def tokenize_number(self) -> Token:
|
|
"""Tokenize integer or float"""
|
|
start_line = self.line
|
|
start_column = self.column
|
|
num_str = ''
|
|
|
|
while self.current_char() and (self.current_char().isdigit() or self.current_char() == '.'):
|
|
num_str += self.current_char()
|
|
self.advance()
|
|
|
|
# Check for scientific notation
|
|
if self.current_char() in ['e', 'E']:
|
|
num_str += self.current_char()
|
|
self.advance()
|
|
if self.current_char() in ['+', '-']:
|
|
num_str += self.current_char()
|
|
self.advance()
|
|
while self.current_char() and self.current_char().isdigit():
|
|
num_str += self.current_char()
|
|
self.advance()
|
|
|
|
if '.' in num_str or 'e' in num_str or 'E' in num_str:
|
|
return Token(TokenType.FLOAT, float(num_str), start_line, start_column)
|
|
else:
|
|
return Token(TokenType.INTEGER, int(num_str), start_line, start_column)
|
|
|
|
def tokenize_string(self) -> Token:
|
|
"""Tokenize string literal"""
|
|
start_line = self.line
|
|
start_column = self.column
|
|
quote_char = self.current_char()
|
|
self.advance() # Skip opening quote
|
|
|
|
string_value = ''
|
|
while self.current_char() and self.current_char() != quote_char:
|
|
if self.current_char() == '\\':
|
|
self.advance()
|
|
# Handle escape sequences
|
|
escape_char = self.current_char()
|
|
if escape_char == 'n':
|
|
string_value += '\n'
|
|
elif escape_char == 't':
|
|
string_value += '\t'
|
|
elif escape_char == 'r':
|
|
string_value += '\r'
|
|
elif escape_char == '\\':
|
|
string_value += '\\'
|
|
elif escape_char == quote_char:
|
|
string_value += quote_char
|
|
else:
|
|
string_value += escape_char
|
|
self.advance()
|
|
else:
|
|
string_value += self.current_char()
|
|
self.advance()
|
|
|
|
if self.current_char() != quote_char:
|
|
raise SyntaxError(f"Unterminated string at {start_line}:{start_column}")
|
|
|
|
self.advance() # Skip closing quote
|
|
return Token(TokenType.STRING, string_value, start_line, start_column)
|
|
|
|
def tokenize_color(self) -> Token:
|
|
"""Tokenize color literal like #FF1D6C"""
|
|
start_line = self.line
|
|
start_column = self.column
|
|
self.advance() # Skip #
|
|
|
|
color_code = '#'
|
|
while self.current_char() and self.current_char() in '0123456789ABCDEFabcdef':
|
|
color_code += self.current_char()
|
|
self.advance()
|
|
|
|
if len(color_code) not in [4, 7, 9]: # #RGB, #RRGGBB, #RRGGBBAA
|
|
raise SyntaxError(f"Invalid color code {color_code} at {start_line}:{start_column}")
|
|
|
|
return Token(TokenType.COLOR, color_code, start_line, start_column)
|
|
|
|
def tokenize_identifier(self) -> Token:
|
|
"""Tokenize identifier or keyword"""
|
|
start_line = self.line
|
|
start_column = self.column
|
|
identifier = ''
|
|
|
|
while self.current_char() and (self.current_char().isalnum() or self.current_char() == '_'):
|
|
identifier += self.current_char()
|
|
self.advance()
|
|
|
|
# Check if it's a keyword
|
|
token_type = self.KEYWORDS.get(identifier, TokenType.IDENTIFIER)
|
|
|
|
# Special handling for boolean literals
|
|
if token_type == TokenType.BOOLEAN:
|
|
value = identifier == 'true'
|
|
return Token(token_type, value, start_line, start_column)
|
|
|
|
return Token(token_type, identifier, start_line, start_column)
|
|
|
|
def handle_indentation(self, indent_level: int):
|
|
"""Generate INDENT/DEDENT tokens based on indentation"""
|
|
if indent_level > self.indent_stack[-1]:
|
|
self.indent_stack.append(indent_level)
|
|
self.tokens.append(Token(TokenType.INDENT, indent_level, self.line, self.column))
|
|
elif indent_level < self.indent_stack[-1]:
|
|
while self.indent_stack and indent_level < self.indent_stack[-1]:
|
|
self.indent_stack.pop()
|
|
self.tokens.append(Token(TokenType.DEDENT, indent_level, self.line, self.column))
|
|
if indent_level != self.indent_stack[-1]:
|
|
raise SyntaxError(f"Inconsistent indentation at {self.line}:{self.column}")
|
|
|
|
def tokenize(self) -> List[Token]:
|
|
"""Main tokenization loop"""
|
|
while self.pos < len(self.source):
|
|
# Handle newlines and indentation
|
|
if self.current_char() == '\n':
|
|
# Skip empty lines and comments
|
|
self.advance()
|
|
|
|
# Measure indentation on next line
|
|
indent_level = 0
|
|
while self.current_char() and self.current_char() in ' \t':
|
|
if self.current_char() == ' ':
|
|
indent_level += 1
|
|
elif self.current_char() == '\t':
|
|
indent_level += 4 # Tab = 4 spaces
|
|
self.advance()
|
|
|
|
# Skip blank lines and comment-only lines
|
|
if self.current_char() == '\n' or self.current_char() == '#':
|
|
continue
|
|
|
|
# Handle indentation
|
|
if self.current_char() is not None:
|
|
self.handle_indentation(indent_level)
|
|
self.tokens.append(Token(TokenType.NEWLINE, '\n', self.line, self.column))
|
|
continue
|
|
|
|
# Skip whitespace (but not newlines)
|
|
if self.current_char() in ' \t\r':
|
|
self.skip_whitespace()
|
|
continue
|
|
|
|
# Skip comments
|
|
if self.current_char() == '#':
|
|
# Check if it's a color code
|
|
if self.peek_char() and self.peek_char() in '0123456789ABCDEFabcdef':
|
|
self.tokens.append(self.tokenize_color())
|
|
else:
|
|
self.skip_comment()
|
|
continue
|
|
|
|
# Numbers
|
|
if self.current_char().isdigit():
|
|
self.tokens.append(self.tokenize_number())
|
|
continue
|
|
|
|
# Strings
|
|
if self.current_char() in ['"', "'"]:
|
|
self.tokens.append(self.tokenize_string())
|
|
continue
|
|
|
|
# Identifiers and keywords
|
|
if self.current_char().isalpha() or self.current_char() == '_':
|
|
self.tokens.append(self.tokenize_identifier())
|
|
continue
|
|
|
|
# Operators and delimiters
|
|
char = self.current_char()
|
|
line = self.line
|
|
col = self.column
|
|
|
|
# Two-character operators
|
|
if char == '-' and self.peek_char() == '>':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.ARROW, '->', line, col))
|
|
continue
|
|
|
|
if char == '.' and self.peek_char() == '.':
|
|
self.advance()
|
|
self.advance()
|
|
if self.current_char() == '.':
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.TRIPLE_DOT, '...', line, col))
|
|
else:
|
|
self.tokens.append(Token(TokenType.DOUBLE_DOT, '..', line, col))
|
|
continue
|
|
|
|
if char == '=' and self.peek_char() == '=':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.EQ, '==', line, col))
|
|
continue
|
|
|
|
if char == '!' and self.peek_char() == '=':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.NE, '!=', line, col))
|
|
continue
|
|
|
|
if char == '<' and self.peek_char() == '=':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.LE, '<=', line, col))
|
|
continue
|
|
|
|
if char == '>' and self.peek_char() == '=':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.GE, '>=', line, col))
|
|
continue
|
|
|
|
if char == '<' and self.peek_char() == '/':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.LT_SLASH, '</', line, col))
|
|
continue
|
|
|
|
if char == '/' and self.peek_char() == '>':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.SLASH_GT, '/>', line, col))
|
|
continue
|
|
|
|
if char == '*' and self.peek_char() == '*':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.POWER, '**', line, col))
|
|
continue
|
|
|
|
# Assignment operators
|
|
if char == '+' and self.peek_char() == '=':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.PLUS_ASSIGN, '+=', line, col))
|
|
continue
|
|
|
|
if char == '-' and self.peek_char() == '=':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.MINUS_ASSIGN, '-=', line, col))
|
|
continue
|
|
|
|
if char == '*' and self.peek_char() == '=':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.STAR_ASSIGN, '*=', line, col))
|
|
continue
|
|
|
|
if char == '/' and self.peek_char() == '=':
|
|
self.advance()
|
|
self.advance()
|
|
self.tokens.append(Token(TokenType.SLASH_ASSIGN, '/=', line, col))
|
|
continue
|
|
|
|
# Single-character tokens
|
|
single_char_tokens = {
|
|
'+': TokenType.PLUS,
|
|
'-': TokenType.MINUS,
|
|
'*': TokenType.STAR,
|
|
'/': TokenType.SLASH,
|
|
'%': TokenType.PERCENT,
|
|
'=': TokenType.ASSIGN,
|
|
'<': TokenType.LT,
|
|
'>': TokenType.GT,
|
|
'(': TokenType.LPAREN,
|
|
')': TokenType.RPAREN,
|
|
'[': TokenType.LBRACKET,
|
|
']': TokenType.RBRACKET,
|
|
'{': TokenType.LBRACE,
|
|
'}': TokenType.RBRACE,
|
|
':': TokenType.COLON,
|
|
';': TokenType.SEMICOLON,
|
|
',': TokenType.COMMA,
|
|
'.': TokenType.DOT,
|
|
'@': TokenType.AT,
|
|
'?': TokenType.QUESTION,
|
|
'$': TokenType.DOLLAR,
|
|
'&': TokenType.AMPERSAND,
|
|
'|': TokenType.PIPE,
|
|
'^': TokenType.CARET,
|
|
'~': TokenType.TILDE,
|
|
}
|
|
|
|
if char in single_char_tokens:
|
|
self.advance()
|
|
self.tokens.append(Token(single_char_tokens[char], char, line, col))
|
|
continue
|
|
|
|
# Unknown character
|
|
raise SyntaxError(f"Unexpected character '{char}' at {line}:{col}")
|
|
|
|
# Handle remaining dedents
|
|
while len(self.indent_stack) > 1:
|
|
self.indent_stack.pop()
|
|
self.tokens.append(Token(TokenType.DEDENT, 0, self.line, self.column))
|
|
|
|
# Add EOF token
|
|
self.tokens.append(Token(TokenType.EOF, None, self.line, self.column))
|
|
return self.tokens
|
|
|
|
def lex(source: str, filename: str = "<stdin>") -> List[Token]:
|
|
"""Convenience function to tokenize source code"""
|
|
lexer = Lexer(source, filename)
|
|
return lexer.tokenize()
|
|
|
|
# Example usage
|
|
if __name__ == "__main__":
|
|
test_code = '''
|
|
# This is a test
|
|
let x: int = 42
|
|
let name = "BlackRoad"
|
|
let color = #FF1D6C
|
|
|
|
fun greet(name: string) -> string:
|
|
return "Hello, {name}!"
|
|
|
|
space MyScene:
|
|
cube Box1:
|
|
position: vec3(0, 0, 0)
|
|
color: #F5A623
|
|
'''
|
|
|
|
tokens = lex(test_code)
|
|
for token in tokens:
|
|
print(token)
|