Source code for pygerber.tokenizer

# -*- coding: utf-8 -*-
from __future__ import annotations

from collections import deque
from typing import TYPE_CHECKING, Deque

if TYPE_CHECKING:
    from pathlib import Path
    from pygerber.tokens.token import Token

from pygerber.drawing_state import DrawingState

from .exceptions import DeprecatedSyntax, EndOfStream, InvalidSyntaxError, TokenNotFound
from .tokens import token_classes

DEFAULT_TRACE_FILEPATH = "<string>"


[docs]class Tokenizer:

    token_stack: deque  # contains Token objects
    state: DrawingState
    token_stack_size: int = 0
    begin_index: int = 0
    char_index = 0
    line_index = 1

    def __init__(self, ignore_deprecated: bool = True) -> None:
        self.ignore_deprecated = ignore_deprecated
        self.token_stack = deque()
        self.state = DrawingState()
        self.set_defaults()

[docs]    def set_defaults(self):
        self.state.set_defaults()
        self.token_stack_size = 0
        self.begin_index = 0
        self.char_index = 0
        self.line_index = 1

[docs]    def tokenize_file(self, file_path: str | Path) -> Deque[Token]:
        with open(file_path) as file:
            source = file.read()
        return self.tokenize(source, file_path)

[docs]    def tokenize(self, source, file_path: str = "<string>") -> Deque[Token]:
        try:
            while not self.__has_reached_end(source):
                self.__next_token(source)
        except EndOfStream:
            pass
        except InvalidSyntaxError as e:
            raise e.__class__(
                f"""File "{file_path}", line {self.line_index}, char {self.char_index}:\n{e}"""
            ) from e
        else:
            raise InvalidSyntaxError(
                """File "{file_path}",No explicit indication of end at the end of source."""
            )
        return self.token_stack

    def __next_token(self, source) -> int:
        token: Token = self.__find_matching_token(source)
        self.__check_deprecated_syntax(token.__deprecated__)
        self.push_token(token)
        self.__update_indexes(token)
        token.alter_state(self.state)
        # token.alter_state()
        # token.pre_render()
        # self.__update_bbox(token.bbox())
        # token.post_render()

    def __check_deprecated_syntax(self, message: str):
        if message is not None and not self.ignore_deprecated:
            raise DeprecatedSyntax(message)

    def __find_matching_token(self, source):
        for token_class in token_classes:
            re_match = token_class.regex.match(source, pos=self.begin_index)
            if re_match is not None:
                return token_class(re_match, self.state)
        else:
            self.raise_token_not_found(source)

    def __has_reached_end(self, source):
        return self.begin_index >= len(source)

    # def __update_bbox(self, bbox: BoundingBox):
    #     if bbox is not None:
    #         if self.bbox is None:
    #             self.bbox = bbox
    #         else:
    #             self.bbox += bbox

[docs]    def push_token(self, token: Token) -> None:
        if token.keep is True:
            self.token_stack.append(token)
            self.token_stack_size += 1

    def __update_indexes(self, token: Token) -> None:
        # update begin index
        self.begin_index = token.re_match.end()
        matched_string: str = token.re_match.group()
        endl_count = matched_string.count("\n")
        # update line index
        self.line_index += endl_count
        source_length = len(matched_string)
        # update char index
        if endl_count == 0:
            self.char_index += source_length
        else:
            last_endl_index = matched_string.rfind("\n")
            self.char_index = source_length - last_endl_index

[docs]    def raise_token_not_found(self, source):
        end_index = min(len(source), self.begin_index + 30)
        raise TokenNotFound(f"{source[self.begin_index:end_index]}")