Source code for univention.directory.reports.parser

# -*- coding: utf-8 -*-
#
# Univention Directory Reports
#  splits a text into tokens
#
# Copyright 2007-2022 Univention GmbH
#
# https://www.univention.de/
#
# All rights reserved.
#
# The source code of this program is made available
# under the terms of the GNU Affero General Public License version 3
# (GNU AGPL V3) as published by the Free Software Foundation.
#
# Binary versions of this program provided by Univention to you as
# well as other copyrighted, protected or trademarked materials like
# Logos, graphics, fonts, specific documentations and configurations,
# cryptographic keys etc. are subject to a license agreement between
# you and Univention and not subject to the GNU AGPL V3.
#
# In the case you use this program under the terms of the GNU AGPL V3,
# the program is provided in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public
# License with the Debian GNU/Linux or Univention distribution in file
# /usr/share/common-licenses/AGPL-3; if not, see
# <https://www.gnu.org/licenses/>.

import re
import shlex

from .tokens import Token, TextToken, AttributeToken, PolicyToken, QueryToken, HeaderToken, FooterToken, IContextToken, ResolveToken, DateToken


[docs]class Parser(object):

	REGEX_OPEN = re.compile('<@(?P<tag>[^/][^ ]+)( +(?P<attrs>([a-z-0-9]+="[^"]*" *)*)|)@>')
	REGEX_CLOSE = re.compile('<@/(?P<tag>[^ ]+) *@>')
	START = '<@'
	END = '@>'

	def __init__(self, filename=None, data=None):
		if filename:
			self._filename = filename
			fd = open(self._filename, 'r')
			self._data = fd.read()
			fd.close()
		elif data:
			self._data = data
		self._tokens = []
		self._header = None
		self._footer = None
		self._context = self._tokens
		self._stack = [self._tokens]

[docs]	def parse_token(self, token):
		attrs = {}
		closing = False
		m = Parser.REGEX_OPEN.match(token)
		if not m:
			m = Parser.REGEX_CLOSE.match(token)
			if not m:
				raise SyntaxError("failed to parse token: '%s'" % token)
			closing = True
		d = m.groupdict()
		if not closing and d.get('attrs', None):
			for attr in shlex.split(d['attrs']):
				key, value = attr.split('=', 1)
				attrs[key] = value

		return (d['tag'], attrs, closing)

[docs]	def next_token(self):
		if not self._data:
			# empty token
			return Token()

		start = self._data.find(Parser.START)
		# no further tags -> rest is text
		if start < 0:
			token = TextToken(self._data)
			self._data = None
			return token
		# is text before next tag?
		if start > 0:
			token = TextToken(self._data[: start])
			self._data = self._data[start:]
			return token
		# find end of tag
		end = self._data.find(Parser.END)
		if end < 0:
			max_len = len(self._data) - start
			raise SyntaxError('No matching end tag (tag: %s)' % self._data[start: min(20, max_len)])
		name, attrs, closing = self.parse_token(self._data[start: end + len(Parser.END)])
		self._data = self._data[end + len(Parser.END):]
		if name == 'attribute':
			return AttributeToken(attrs)
		elif name == 'policy':
			return PolicyToken(attrs)
		elif name == 'query':
			return QueryToken(attrs, closing)
		elif name == 'resolve':
			return ResolveToken(attrs, closing)
		elif name == 'header':
			return HeaderToken(attrs, closing)
		elif name == 'footer':
			return FooterToken(attrs, closing)
		elif name == 'date':
			return DateToken(attrs)
		else:
			raise SyntaxError('Unknown tag: %s' % name)

[docs]	def tokenize(self):
		token = self.next_token()
		while token:
			if isinstance(token, (TextToken, AttributeToken, PolicyToken, DateToken)):
				if isinstance(token, TextToken):
					if token.data == '\n' and len(self._context) and isinstance(self._context[-1], HeaderToken):
						# ignore line feed after header
						pass
					else:
						self._context.append(token)
				else:
					self._context.append(token)
			elif isinstance(token, IContextToken):
				if not token.closing:
					self._stack.append(self._context)
					self._context.append(token)
					self._context = self._context[-1]
				else:
					self._context[-1].closing = True
					self._context = self._stack.pop()
			token = self.next_token()
		# strip header and footer, if exist
		trash = []
		for i in range(len(self._context)):
			if isinstance(self._context[i], HeaderToken):
				self._header = self._context[i]
				if len(self._header) and isinstance(self._header[0], TextToken):
					self._header = self._header[0]
				trash.append(i)
			elif isinstance(self._context[i], FooterToken):
				self._footer = self._context[i]
				if len(self._footer) and isinstance(self._footer[0], TextToken):
					self._footer = self._footer[0]
				if i > 0 and isinstance(self._context[i - 1], TextToken) and \
					self._context[i - 1].data == '\n':
					trash.append(i - 1)
				trash.append(i)
		trash.reverse()
		for rm in trash:
			self._context.pop(rm)
Univention Corporate Server Python API 5.0 documentation

Source code for univention.directory.reports.parser