"""parsing and generation of content lines"""
import re
from icalendar.parser.parameter import Parameters
from icalendar.parser.property import unescape_backslash, unescape_list_or_string
from icalendar.parser.string import (
_escape_string,
_unescape_string,
foldline,
validate_token,
)
from icalendar.parser_tools import DEFAULT_ENCODING, ICAL_TYPE, to_unicode
UFOLD = re.compile("(\r?\n)+[ \t]")
NEWLINE = re.compile(r"\r?\n")
OWS = " \t"
OWS_AROUND_DELIMITERS_RE = re.compile(r"[ \t]*([;=])[ \t]*")
def _strip_ows_around_delimiters(st: str, delimiters: str = ";=") -> str:
"""Strip optional whitespace around delimiters outside of quoted sections,
respecting backslash escapes so that escaped delimiters are not treated as
separators.
This is a lenient parsing helper (used when strict=False) to support
iCalendar content lines that contain extra whitespace around tokens.
"""
if not st:
return st
# Fast path for the common case in non-strict mode:
# no whitespace in the parameter section means there is nothing to normalize.
if " " not in st and "\t" not in st:
return st
# Fast regex-based path for simple parameter sections without quoting/escaping.
if delimiters == ";=" and '"' not in st and "\\" not in st:
return OWS_AROUND_DELIMITERS_RE.sub(r"\1", st).strip()
out: list[str] = []
pending_ws: list[str] = []
in_quotes = False
escaped = False
# True only if the last appended char was a raw delimiter.
last_was_delimiter = False
def flush_pending() -> None:
nonlocal pending_ws
if not pending_ws:
return
if not last_was_delimiter:
out.extend(pending_ws)
pending_ws.clear()
for ch in st:
# Handle escaped character (the backslash set escaped in previous iteration)
if escaped:
flush_pending()
out.append(ch)
escaped = False
last_was_delimiter = False
continue
# Handle backslash to escape next character
if ch == "\\" and not in_quotes:
flush_pending()
out.append(ch)
escaped = True
last_was_delimiter = False
continue
# Handle quote toggling
if ch == '"' and not escaped:
in_quotes = not in_quotes
flush_pending()
out.append(ch)
last_was_delimiter = False
continue
# Whitespace outside quotes is buffered
if not in_quotes and not escaped and ch in OWS:
pending_ws.append(ch)
continue
# Raw delimiter (unescaped and outside quotes)
if not in_quotes and not escaped and ch in delimiters:
pending_ws.clear()
while out and out[-1] in OWS:
out.pop()
out.append(ch)
last_was_delimiter = True
continue
# Regular character
flush_pending()
out.append(ch)
last_was_delimiter = False
if pending_ws and not last_was_delimiter:
out.extend(pending_ws)
return "".join(out).strip()
[docs]
class Contentline(str):
"""A content line is basically a string that can be folded and parsed into
parts.
"""
__slots__ = ("strict",)
def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING):
value = to_unicode(value, encoding=encoding)
assert "\n" not in value, (
"Content line can not contain unescaped new line characters."
)
self = super().__new__(cls, value)
self.strict = strict
return self
[docs]
@classmethod
def from_parts(
cls,
name: ICAL_TYPE,
params: Parameters,
values,
sorted: bool = True, # noqa: A002
):
"""Turn a parts into a content line."""
assert isinstance(params, Parameters)
if hasattr(values, "to_ical"):
values = values.to_ical()
else:
from icalendar.prop import vText
values = vText(values).to_ical()
# elif isinstance(values, basestring):
# values = escape_char(values)
# TODO: after unicode only, remove this
# Convert back to unicode, after to_ical encoded it.
name = to_unicode(name)
values = to_unicode(values)
if params:
params = to_unicode(params.to_ical(sorted=sorted))
if params:
# some parameter values can be skipped during serialization
return cls(f"{name};{params}:{values}")
return cls(f"{name}:{values}")
[docs]
def parts(self) -> tuple[str, Parameters, str]:
"""Split the content line into ``name``, ``parameters``, and ``values`` parts.
Properly handles escaping with backslashes and double-quote sections
to avoid corrupting URL-encoded characters in values.
Example with parameter:
.. code-block:: ics
DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild
Example without parameters:
.. code-block:: ics
DESCRIPTION:The Fall'98 Wild
"""
try:
name_split: int | None = None
value_split: int | None = None
in_quotes: bool = False
escaped: bool = False
for i, ch in enumerate(self):
if ch == '"' and not escaped:
in_quotes = not in_quotes
elif ch == "\\" and not in_quotes:
escaped = True
continue
elif not in_quotes and not escaped:
# Find first delimiter for name
if ch in ":;" and name_split is None:
name_split = i
# Find value delimiter (first colon)
if ch == ":" and value_split is None:
value_split = i
escaped = False
# Validate parsing results
if not value_split:
# No colon found - value is empty, use end of string
value_split = len(self)
# Extract name - if no delimiter,
# take whole string for validate_token to reject
name = self[:name_split] if name_split else self
if not self.strict:
name = re.sub(r"[ \t]+", "", name.strip())
validate_token(name)
if not name_split or name_split + 1 == value_split:
# No delimiter or empty parameter section
raise ValueError("Invalid content line") # noqa: TRY301
# Parse parameters - they still need to be escaped/unescaped
# for proper handling of commas, semicolons, etc. in parameter values
raw_param_str = self[name_split + 1 : value_split]
if not self.strict:
raw_param_str = _strip_ows_around_delimiters(raw_param_str)
param_str = _escape_string(raw_param_str)
params = Parameters.from_ical(param_str, strict=self.strict)
params = Parameters(
(_unescape_string(key), unescape_list_or_string(value))
for key, value in iter(params.items())
)
# Unescape backslash sequences in values but preserve URL encoding
values = unescape_backslash(self[value_split + 1 :])
except ValueError as exc:
raise ValueError(
f"Content line could not be parsed into parts: '{self}': {exc}"
) from exc
return (name, params, values)
[docs]
@classmethod
def from_ical(cls, ical, strict=False):
"""Unfold the content lines in an iCalendar into long content lines."""
ical = to_unicode(ical)
# a fold is carriage return followed by either a space or a tab
return cls(UFOLD.sub("", ical), strict=strict)
[docs]
def to_ical(self):
"""Long content lines are folded so they are less than 75 characters
wide.
"""
return foldline(self).encode(DEFAULT_ENCODING)
[docs]
class Contentlines(list[Contentline]):
"""I assume that iCalendar files generally are a few kilobytes in size.
Then this should be efficient. for Huge files, an iterator should probably
be used instead.
"""
[docs]
def to_ical(self):
"""Simply join self."""
return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n"
[docs]
@classmethod
def from_ical(cls, st):
"""Parses a string into content lines."""
st = to_unicode(st)
try:
# a fold is carriage return followed by either a space or a tab
unfolded = UFOLD.sub("", st)
lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line)
lines.append("") # '\r\n' at the end of every content line
except Exception as e:
raise ValueError("Expected StringType with content lines") from e
return lines
__all__ = ["Contentline", "Contentlines"]