Source code for icalendar.parser.ical.component

"""Parsing a component's iCalendar data."""

from __future__ import annotations

from typing import TYPE_CHECKING, ClassVar

from icalendar.parser.content_line import Contentline, Contentlines
from icalendar.parser.property import split_on_unescaped_comma
from icalendar.prop import vBroken
from icalendar.timezone import tzp

if TYPE_CHECKING:
    from icalendar.cal import Component, ComponentFactory
    from icalendar.parser.parameter import Parameters
    from icalendar.prop import VPROPERTY, TypesFactory


[docs] class ComponentIcalParser: """A parser for a component's iCalendar data. This uses the template method pattern, where the main parsing logic can be refined in subclasses. """ datetime_names: ClassVar[tuple[str, ...]] = ( "DTSTART", "DTEND", "RECURRENCE-ID", "DUE", "RDATE", "EXDATE", ) """Names to check for TZID parameter when parsing datetimes. Their ``from_ical`` methods take an optional ``tzid`` argument, which is used if the property has a TZID parameter. """ def __init__( self, data: bytes | str | list[Contentline], component_factory: ComponentFactory, types_factory: TypesFactory, ): """Initialize the parser with the raw data. Parameters: data: The raw iCalendar data to parse, either as bytes or a list of content lines. component_factory: The factory to use for creating components. types_factory: The factory to use for creating property values. """ self._data = data self._component_factory = component_factory self._types_factory = types_factory self._tzp = tzp _content_lines: list[Contentline]
[docs] def contains_component(self, name: str) -> bool: """Check if the parser contains a component.""" self.initialize_parsing() begin_line = "BEGIN:" + name.upper() return any( len(content_line) == len(begin_line) and content_line.upper() == begin_line for content_line in self._content_lines )
[docs] def contains_uid(self, uid: str) -> bool: """Determines whether the component contains a ``uid``. Returns: ``True`` if the component contains a ``uid``, else ``False``. """ self.initialize_parsing() return any(uid in line for line in self._content_lines)
[docs] def initialize_parsing(self): self._stack: list[Component] = [] self._components: list[Component] = [] self._data = self._content_lines = ( self._data if isinstance(self._data, list) else Contentlines.from_ical(self._data) ) self._content_lines_iterator = iter(self._content_lines)
[docs] def handle_line_parse_error(self, exception: Exception): """Handle a line parsing error.""" # if unable to parse a line within a component # that ignores exceptions, mark the component # as broken and skip the line. otherwise raise. component = self.component if not component or not component.ignore_exceptions: raise exception component.errors.append((None, str(exception)))
[docs] def handle_begin_component(self, vals: str) -> None: """Handle the beginning of a component.""" # try and create one of the components defined in the spec, # otherwise get a general Components for robustness. c_name = vals.upper() c_class = self._component_factory.get_component_class(c_name) # If component factory cannot resolve ``c_name``, the generic # ``Component`` class is used which does not have the name set. # That's opposed to the usage of ``cls``, which represents a # more concrete subclass with a name set (e.g. VCALENDAR). component = c_class() if not getattr(component, "name", ""): # undefined components component.name = c_name self._stack.append(component)
[docs] def handle_end_component(self, vals: str) -> None: """Handle the end of a component.""" # we are done adding properties to this component # so pop it from the stack and add it to the new top. if not self._stack: # The stack is currently empty, the input must be invalid raise ValueError("END encountered without an accompanying BEGIN!") component = self._stack.pop() if not self._stack: # we are at the end self._components.append(component) else: self._stack[-1].add_component(component) if vals.upper() == "VTIMEZONE" and "TZID" in component: tzp.cache_timezone_component(component)
[docs] def prepare_components(self) -> None: """Prepare the parsed components. This is called when all components are parsed. """
[docs] def parse(self) -> list[Component]: """Parse the raw data.""" self.initialize_parsing() self.parse_content_lines() self.prepare_components() return self._components
[docs] def parse_content_lines(self) -> None: """Parse the content lines.""" for line in self._content_lines_iterator: if not line: continue try: name, params, vals = line.parts() except ValueError as e: self.handle_line_parse_error(e) continue uname = name.upper() if uname == "BEGIN": self.handle_begin_component(vals) elif uname == "END": self.handle_end_component(vals) else: self.handle_property(uname, params, vals, line)
@property def component(self) -> Component | None: return self._stack[-1] if self._stack else None
[docs] def get_factory_for_property(self, name: str, params: Parameters) -> VPROPERTY: """Get the factory for a property.""" return self._types_factory.for_property(name, params.value)
[docs] def handle_property( self, name: str, params: Parameters, vals: str, line: Contentline ) -> None: """Handle a property line. Add properties to the top of the current stack. Parameters: name: The name of the property, uppercased. params: The parameters of the property. vals: The value of the property. line: The original content line. """ # Extract VALUE parameter if present if not self.component: # only accept X-COMMENT at the end of the .ics file # ignore these components in parsing if name == "X-COMMENT": return raise ValueError(f'Property "{name}" does not have a parent component.') # Determine TZID for datetime properties tzid = params.get("TZID") if params and name in self.datetime_names else None # Handle special cases for value list preparation if name == "CATEGORIES": if self.handle_categories(params, vals, line): return # Fallback to normal processing if we can't find colon vals_list = [vals] elif name == "FREEBUSY": # Handle FREEBUSY comma-separated values vals_list = vals.split(",") # Workaround broken ICS files with empty RDATE # (not EXDATE - let it parse and fail) elif name == "RDATE" and vals == "": vals_list = [] else: vals_list = [vals] # Parse all properties eagerly for val in vals_list: self.parse_and_add_property(name, params, val, tzid, line)
[docs] def parse_and_add_property( self, name: str, params: Parameters, val: str, tzid: str | None, line: Contentline, ): """Parse a property value and add it to the current component.""" factory = self.get_factory_for_property(name, params) try: if tzid: parsed_val = factory.from_ical(val, tzid) else: parsed_val = factory.from_ical(val) except (ValueError, TypeError) as e: self.handle_property_parse_error(e, name, params, val, line) else: vals_inst = factory(parsed_val) vals_inst.params = params self.component.add(name, vals_inst, encode=False)
[docs] def handle_property_parse_error( self, exception: Exception, name: str, params: Parameters, val: str, line: Contentline, ): """Handle the parse error for a property.""" if not self.component.ignore_exceptions and not name[:2].upper() == "X-": raise exception # Error-tolerant mode: create vBroken factory = self.get_factory_for_property(name, params) expected_type = getattr(factory, "__name__", "unknown") broken_prop = vBroken.from_parse_error( raw_value=val, params=params, property_name=name, expected_type=expected_type, error=exception, ) self.component.errors.append((name, str(exception))) self.component.add(name, broken_prop, encode=0)
[docs] def handle_categories( self, params: Parameters, vals: str, line: Contentline ) -> bool: """Handle the special case of CATEGORIES property. Returns: ``True`` if handled, else ``False``. """ # Special handling for CATEGORIES - need raw value # before unescaping to properly split on unescaped commas line_str = str(line) # Use rfind to get the last colon (value separator) # to handle parameters with colons like ALTREP="http://..." colon_idx = line_str.rfind(":") if colon_idx > 0: raw_value = line_str[colon_idx + 1 :] # Parse categories immediately (not lazily) for both # strict and tolerant components. # CATEGORIES needs special comma handling try: category_list = split_on_unescaped_comma(raw_value) factory = self.get_factory_for_property("CATEGORIES", params) vals_inst = factory(category_list) vals_inst.params = params self.component.add("CATEGORIES", vals_inst, encode=0) except ValueError as e: self.handle_property_parse_error( e, "CATEGORIES", params, raw_value, line ) return True return False