Source code for icalendar.parser.content_line

"""parsing and generation of content lines"""

import re

from icalendar.parser.parameter import Parameters
from icalendar.parser.property import unescape_backslash, unescape_list_or_string
from icalendar.parser.string import (
    _escape_string,
    _unescape_string,
    foldline,
    validate_token,
)
from icalendar.parser_tools import DEFAULT_ENCODING, ICAL_TYPE, to_unicode

UFOLD = re.compile("(\r?\n)+[ \t]")
NEWLINE = re.compile(r"\r?\n")

OWS = " \t"
OWS_AROUND_DELIMITERS_RE = re.compile(r"[ \t]*([;=])[ \t]*")


def _strip_ows_around_delimiters(st: str, delimiters: str = ";=") -> str:
    """Strip optional whitespace around delimiters outside of quoted sections,
    respecting backslash escapes so that escaped delimiters are not treated as
    separators.

    This is a lenient parsing helper (used when strict=False) to support
    iCalendar content lines that contain extra whitespace around tokens.
    """
    if not st:
        return st

    # Fast path for the common case in non-strict mode:
    # no whitespace in the parameter section means there is nothing to normalize.
    if " " not in st and "\t" not in st:
        return st

    # Fast regex-based path for simple parameter sections without quoting/escaping.
    if delimiters == ";=" and '"' not in st and "\\" not in st:
        return OWS_AROUND_DELIMITERS_RE.sub(r"\1", st).strip()

    out: list[str] = []
    pending_ws: list[str] = []
    in_quotes = False
    escaped = False
    # True only if the last appended char was a raw delimiter.
    last_was_delimiter = False

    def flush_pending() -> None:
        nonlocal pending_ws
        if not pending_ws:
            return
        if not last_was_delimiter:
            out.extend(pending_ws)
        pending_ws.clear()

    for ch in st:
        # Handle escaped character (the backslash set escaped in previous iteration)
        if escaped:
            flush_pending()
            out.append(ch)
            escaped = False
            last_was_delimiter = False
            continue

        # Handle backslash to escape next character
        if ch == "\\" and not in_quotes:
            flush_pending()
            out.append(ch)
            escaped = True
            last_was_delimiter = False
            continue

        # Handle quote toggling
        if ch == '"' and not escaped:
            in_quotes = not in_quotes
            flush_pending()
            out.append(ch)
            last_was_delimiter = False
            continue

        # Whitespace outside quotes is buffered
        if not in_quotes and not escaped and ch in OWS:
            pending_ws.append(ch)
            continue

        # Raw delimiter (unescaped and outside quotes)
        if not in_quotes and not escaped and ch in delimiters:
            pending_ws.clear()
            while out and out[-1] in OWS:
                out.pop()
            out.append(ch)
            last_was_delimiter = True
            continue

        # Regular character
        flush_pending()
        out.append(ch)
        last_was_delimiter = False

    if pending_ws and not last_was_delimiter:
        out.extend(pending_ws)

    return "".join(out).strip()



[docs]
class Contentline(str):
    """A content line is basically a string that can be folded and parsed into
    parts.
    """

    __slots__ = ("strict",)

    def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING):
        value = to_unicode(value, encoding=encoding)
        assert "\n" not in value, (
            "Content line can not contain unescaped new line characters."
        )
        self = super().__new__(cls, value)
        self.strict = strict
        return self


[docs]
    @classmethod
    def from_parts(
        cls,
        name: ICAL_TYPE,
        params: Parameters,
        values,
        sorted: bool = True,  # noqa: A002
    ):
        """Turn a parts into a content line."""
        assert isinstance(params, Parameters)
        if hasattr(values, "to_ical"):
            values = values.to_ical()
        else:
            from icalendar.prop import vText

            values = vText(values).to_ical()
        # elif isinstance(values, basestring):
        #    values = escape_char(values)

        # TODO: after unicode only, remove this
        # Convert back to unicode, after to_ical encoded it.
        name = to_unicode(name)
        values = to_unicode(values)
        if params:
            params = to_unicode(params.to_ical(sorted=sorted))
            if params:
                # some parameter values can be skipped during serialization
                return cls(f"{name};{params}:{values}")
        return cls(f"{name}:{values}")



[docs]
    def parts(self) -> tuple[str, Parameters, str]:
        """Split the content line into ``name``, ``parameters``, and ``values`` parts.

        Properly handles escaping with backslashes and double-quote sections
        to avoid corrupting URL-encoded characters in values.

        Example with parameter:

        .. code-block:: ics

            DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild

        Example without parameters:

        .. code-block:: ics

            DESCRIPTION:The Fall'98 Wild
        """
        try:
            name_split: int | None = None
            value_split: int | None = None
            in_quotes: bool = False
            escaped: bool = False

            for i, ch in enumerate(self):
                if ch == '"' and not escaped:
                    in_quotes = not in_quotes
                elif ch == "\\" and not in_quotes:
                    escaped = True
                    continue
                elif not in_quotes and not escaped:
                    # Find first delimiter for name
                    if ch in ":;" and name_split is None:
                        name_split = i
                    # Find value delimiter (first colon)
                    if ch == ":" and value_split is None:
                        value_split = i

                escaped = False

            # Validate parsing results
            if not value_split:
                # No colon found - value is empty, use end of string
                value_split = len(self)

            # Extract name - if no delimiter,
            #   take whole string for validate_token to reject
            name = self[:name_split] if name_split else self
            if not self.strict:
                name = re.sub(r"[ \t]+", "", name.strip())
            validate_token(name)

            if not name_split or name_split + 1 == value_split:
                # No delimiter or empty parameter section
                raise ValueError("Invalid content line")  # noqa: TRY301
            # Parse parameters - they still need to be escaped/unescaped
            # for proper handling of commas, semicolons, etc. in parameter values
            raw_param_str = self[name_split + 1 : value_split]
            if not self.strict:
                raw_param_str = _strip_ows_around_delimiters(raw_param_str)
            param_str = _escape_string(raw_param_str)
            params = Parameters.from_ical(param_str, strict=self.strict)
            params = Parameters(
                (_unescape_string(key), unescape_list_or_string(value))
                for key, value in iter(params.items())
            )
            # Unescape backslash sequences in values but preserve URL encoding
            values = unescape_backslash(self[value_split + 1 :])
        except ValueError as exc:
            raise ValueError(
                f"Content line could not be parsed into parts: '{self}': {exc}"
            ) from exc
        return (name, params, values)



[docs]
    @classmethod
    def from_ical(cls, ical, strict=False):
        """Unfold the content lines in an iCalendar into long content lines."""
        ical = to_unicode(ical)
        # a fold is carriage return followed by either a space or a tab
        return cls(UFOLD.sub("", ical), strict=strict)



[docs]
    def to_ical(self):
        """Long content lines are folded so they are less than 75 characters
        wide.
        """
        return foldline(self).encode(DEFAULT_ENCODING)





[docs]
class Contentlines(list[Contentline]):
    """I assume that iCalendar files generally are a few kilobytes in size.
    Then this should be efficient. for Huge files, an iterator should probably
    be used instead.
    """


[docs]
    def to_ical(self):
        """Simply join self."""
        return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n"



[docs]
    @classmethod
    def from_ical(cls, st):
        """Parses a string into content lines."""
        st = to_unicode(st)
        try:
            # a fold is carriage return followed by either a space or a tab
            unfolded = UFOLD.sub("", st)
            lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line)
            lines.append("")  # '\r\n' at the end of every content line
        except Exception as e:
            raise ValueError("Expected StringType with content lines") from e
        return lines




__all__ = ["Contentline", "Contentlines"]