Source code for univention.ucslint.python

# SPDX-FileCopyrightText: 2008-2025 Univention GmbH
# SPDX-License-Identifier: AGPL-3.0-only

import re
from collections.abc import Iterator
from pathlib import Path
from re import Pattern

from univention.ucslint.base import FilteredDirWalkGenerator


def _or(*disjunct: str, name: str | None = None) -> str:
    return r'(?{}{})'.format(':' if name is None else f'P<{name}>', '|'.join(disjunct))


RE_HASHBANG = re.compile(r"^#!.*[ /]python[0-9.]*\b")
ESCAPE_LENIENT = r"\\."
ESCAPE_RAW = r"\\(?:$|.)"
ESCAPE_BYTES = r"""\\(?:$|[\\'"abfnrtv]|[0-7]{1,3}|x[0-9a-fA-F]{2})"""
ESCAPE_UNIICODE = _or(ESCAPE_BYTES, r"\\(?:N\{[^}]+\}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})")
LITERALS = _or(
    r"'''(?:[^'\\]|%(esc)s|'[^']|''[^'])*?'''",
    r'"""(?:[^"\\]|%(esc)s|"[^"]|""[^"])*?"""',
    r"'(?:[^'\\\n]|%(esc)s)*?'",
    r'"(?:[^"\\\n]|%(esc)s)*?"',
)
MATCHED_LENIENT = rf"(?:\b[BbFfRrUu]{{1,2}})?{(LITERALS % {'esc': ESCAPE_LENIENT})}"
COMMENT = _or(r"#[^\n]*$", name="cmt")
RE_LENIENT = re.compile(_or(COMMENT, _or(MATCHED_LENIENT, name="str")), re.MULTILINE)


[docs] class Base: VER = (0, 0) MATCHED_RAW = r"\b{}{}".format( _or("[Rr]", "[BbFfUu][Rr]", "[Rr][BbFf]"), # (ur|ru) only in 2, (rb) since 3.3 LITERALS % {"esc": ESCAPE_RAW}, ) MATCHED_BYTES = rf"\b[Bb]{(LITERALS % {'esc': ESCAPE_BYTES})}" MATCHED_UNICODE = rf"(?:\b[FfUu])?{(LITERALS % {'esc': ESCAPE_UNIICODE})}" # [u] not in 3.0-3.2, [f] since 3.6
[docs] @classmethod def matcher(cls) -> Pattern[str]: MATCHED_STRING = _or( cls.MATCHED_RAW, cls.MATCHED_BYTES, cls.MATCHED_UNICODE, name="str", ) RE_STRING = re.compile(_or(COMMENT, MATCHED_STRING), re.MULTILINE) return RE_STRING
[docs] class Python27(Base): VER = (2, 7) MATCHED_RAW = r"\b{}{}".format( _or("[Rr]", "[BbUu][Rr]", "[Rr][Uu]"), # (ur|ru) only in 2, (rb) since 3.3 LITERALS % {"esc": ESCAPE_RAW}, )
[docs] class Python30(Base): VER = (3, 0) MATCHED_RAW = r"\b{}{}".format(_or("[Rr]", "[Bb][Rr]"), LITERALS % {"esc": ESCAPE_RAW}) MATCHED_UNICODE = LITERALS % {"esc": ESCAPE_UNIICODE} # [u] not in 3.0-3.2
[docs] class Python33(Base): VER = (3, 3) MATCHED_RAW = r"\b{}{}".format( _or("[Rr]", "[Bb][Rr]", "[Rr][Bb]"), # 2, (rb) since 3.3 LITERALS % {"esc": ESCAPE_RAW}, ) MATCHED_UNICODE = rf"(?:\b[Uu])?{(LITERALS % {'esc': ESCAPE_UNIICODE})}"
[docs] class Python36(Base): VER = (3, 6) MATCHED_RAW = r"\b{}{}".format( _or("[Rr]", "[BbFf][Rr]", "[Rr][BbFf]"), # (f) since 3.6 LITERALS % {"esc": ESCAPE_RAW}, ) MATCHED_UNICODE = rf"(?:\b[FfUu])?{(LITERALS % {'esc': ESCAPE_UNIICODE})}" # [f] since 3.6
[docs] def python_files(path: Path) -> Iterator[Path]: SUFFIXES = ('.py',) yield from FilteredDirWalkGenerator(path, suffixes=SUFFIXES, reHashBang=RE_HASHBANG)