85 lines
2.4 KiB
Python
85 lines
2.4 KiB
Python
"""
|
|
ldap.schema.tokenizer - Low-level parsing functions for schema element strings
|
|
|
|
See https://www.python-ldap.org/ for details.
|
|
"""
|
|
|
|
import re
|
|
|
|
TOKENS_FINDALL = re.compile(
|
|
r"(\()" # opening parenthesis
|
|
r"|" # or
|
|
r"(\))" # closing parenthesis
|
|
r"|" # or
|
|
r"([^'$()\s]+)" # string of length >= 1 without '$() or whitespace
|
|
r"|" # or
|
|
r"('(?:[^'\\]|\\\\|\\.)*?'(?!\w))"
|
|
# any string or empty string surrounded by unescaped
|
|
# single quotes except if right quote is succeeded by
|
|
# alphanumeric char
|
|
r"|" # or
|
|
r"([^\s]+?)", # residue, all non-whitespace strings
|
|
).findall
|
|
|
|
UNESCAPE_PATTERN = re.compile(r"\\(.)")
|
|
|
|
|
|
def split_tokens(s):
|
|
"""
|
|
Returns list of syntax elements with quotes and spaces stripped.
|
|
"""
|
|
parts = []
|
|
parens = 0
|
|
for opar, cpar, unquoted, quoted, residue in TOKENS_FINDALL(s):
|
|
if unquoted:
|
|
parts.append(unquoted)
|
|
elif quoted:
|
|
parts.append(UNESCAPE_PATTERN.sub(r'\1', quoted[1:-1]))
|
|
elif opar:
|
|
parens += 1
|
|
parts.append(opar)
|
|
elif cpar:
|
|
parens -= 1
|
|
parts.append(cpar)
|
|
elif residue == '$':
|
|
if not parens:
|
|
raise ValueError("'$' outside parenthesis in %r" % (s))
|
|
else:
|
|
raise ValueError(residue, s)
|
|
if parens:
|
|
raise ValueError("Unbalanced parenthesis in %r" % (s))
|
|
return parts
|
|
|
|
def extract_tokens(l,known_tokens):
|
|
"""
|
|
Returns dictionary of known tokens with all values
|
|
"""
|
|
assert l[0].strip()=="(" and l[-1].strip()==")",ValueError(l)
|
|
result = {}
|
|
result.update(known_tokens)
|
|
i = 0
|
|
l_len = len(l)
|
|
while i<l_len:
|
|
if l[i] in result:
|
|
token = l[i]
|
|
i += 1 # Consume token
|
|
if i<l_len:
|
|
if l[i] in result:
|
|
# non-valued
|
|
result[token] = (())
|
|
elif l[i]=="(":
|
|
# multi-valued
|
|
i += 1 # Consume left parentheses
|
|
start = i
|
|
while i<l_len and l[i]!=")":
|
|
i += 1
|
|
result[token] = tuple(filter(lambda v:v!='$',l[start:i]))
|
|
i += 1 # Consume right parentheses
|
|
else:
|
|
# single-valued
|
|
result[token] = l[i],
|
|
i += 1 # Consume single value
|
|
else:
|
|
i += 1 # Consume unrecognized item
|
|
return result
|