Source code for pygerber.tokenizer

# -*- coding: utf-8 -*-
from __future__ import annotations

from collections import deque
from typing import TYPE_CHECKING, Deque

if TYPE_CHECKING:
    from pathlib import Path
    from pygerber.tokens.token import Token

from pygerber.drawing_state import DrawingState

from .exceptions import DeprecatedSyntax, EndOfStream, InvalidSyntaxError, TokenNotFound
from .tokens import token_classes

DEFAULT_TRACE_FILEPATH = "<string>"


[docs]class Tokenizer: token_stack: deque # contains Token objects state: DrawingState token_stack_size: int = 0 begin_index: int = 0 char_index = 0 line_index = 1 def __init__(self, ignore_deprecated: bool = True) -> None: self.ignore_deprecated = ignore_deprecated self.token_stack = deque() self.state = DrawingState() self.set_defaults()
[docs] def set_defaults(self): self.state.set_defaults() self.token_stack_size = 0 self.begin_index = 0 self.char_index = 0 self.line_index = 1
[docs] def tokenize_file(self, file_path: str | Path) -> Deque[Token]: with open(file_path) as file: source = file.read() return self.tokenize(source, file_path)
[docs] def tokenize(self, source, file_path: str = "<string>") -> Deque[Token]: try: while not self.__has_reached_end(source): self.__next_token(source) except EndOfStream: pass except InvalidSyntaxError as e: raise e.__class__( f"""File "{file_path}", line {self.line_index}, char {self.char_index}:\n{e}""" ) from e else: raise InvalidSyntaxError( """File "{file_path}",No explicit indication of end at the end of source.""" ) return self.token_stack
def __next_token(self, source) -> int: token: Token = self.__find_matching_token(source) self.__check_deprecated_syntax(token.__deprecated__) self.push_token(token) self.__update_indexes(token) token.alter_state(self.state) # token.alter_state() # token.pre_render() # self.__update_bbox(token.bbox()) # token.post_render() def __check_deprecated_syntax(self, message: str): if message is not None and not self.ignore_deprecated: raise DeprecatedSyntax(message) def __find_matching_token(self, source): for token_class in token_classes: re_match = token_class.regex.match(source, pos=self.begin_index) if re_match is not None: return token_class(re_match, self.state) else: self.raise_token_not_found(source) def __has_reached_end(self, source): return self.begin_index >= len(source) # def __update_bbox(self, bbox: BoundingBox): # if bbox is not None: # if self.bbox is None: # self.bbox = bbox # else: # self.bbox += bbox
[docs] def push_token(self, token: Token) -> None: if token.keep is True: self.token_stack.append(token) self.token_stack_size += 1
def __update_indexes(self, token: Token) -> None: # update begin index self.begin_index = token.re_match.end() matched_string: str = token.re_match.group() endl_count = matched_string.count("\n") # update line index self.line_index += endl_count source_length = len(matched_string) # update char index if endl_count == 0: self.char_index += source_length else: last_endl_index = matched_string.rfind("\n") self.char_index = source_length - last_endl_index
[docs] def raise_token_not_found(self, source): end_index = min(len(source), self.begin_index + 30) raise TokenNotFound(f"{source[self.begin_index:end_index]}")