Generic PDF objects

Implementation of generic PDF objects (dictionary, number, string, …).

class pypdf.generic.BooleanObject(value: Any)[source]

Bases: PdfObject

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → BooleanObject[source]: Clone object into pdf_dest.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

static read_from_stream(stream: IO[Any]) → BooleanObject[source]

class pypdf.generic.FloatObject(value: Union[str, Any] = '0.0', context: Optional[Any] = None)[source]

Bases: float, PdfObject

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → FloatObject[source]: Clone object into pdf_dest.

myrepr() → str[source]

as_numeric() → float[source]

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

class pypdf.generic.NumberObject(value: Any)[source]

Bases: int, PdfObject

NumberPattern = re.compile(b'[^+-.0-9]')

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → NumberObject[source]: Clone object into pdf_dest.

as_numeric() → int[source]

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

static read_from_stream(stream: IO[Any]) → Union[NumberObject, FloatObject][source]

class pypdf.generic.NameObject[source]

Bases: str, PdfObject

delimiter_pattern = re.compile(b'\\s+|[\\(\\)<>\\[\\]{}/%]')

surfix = b'/'

renumber_table: ClassVar[Dict[str, bytes]] = {'\x00': b'#00', '\x01': b'#01', '\x02': b'#02', '\x03': b'#03', '\x04': b'#04', '\x05': b'#05', '\x06': b'#06', '\x07': b'#07', '\x08': b'#08', '\t': b'#09', '\n': b'#0A', '\x0b': b'#0B', '\x0c': b'#0C', '\r': b'#0D', '\x0e': b'#0E', '\x0f': b'#0F', '\x10': b'#10', '\x11': b'#11', '\x12': b'#12', '\x13': b'#13', '\x14': b'#14', '\x15': b'#15', '\x16': b'#16', '\x17': b'#17', '\x18': b'#18', '\x19': b'#19', '\x1a': b'#1A', '\x1b': b'#1B', '\x1c': b'#1C', '\x1d': b'#1D', '\x1e': b'#1E', '\x1f': b'#1F', ' ': b'#20', '#': b'#23', '%': b'#25', '(': b'#28', ')': b'#29', '/': b'#2F'}

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → NameObject[source]: Clone object into pdf_dest.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

renumber() → bytes[source]

static unnumber(sin: bytes) → bytes[source]

CHARSETS = ('utf-8', 'gbk', 'latin1')

static read_from_stream(stream: IO[Any], pdf: Any) → NameObject[source]

class pypdf.generic.IndirectObject(idnum: int, generation: int, pdf: Any)[source]

Bases: PdfObject

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → IndirectObject[source]: Clone object into pdf_dest.

property indirect_reference: IndirectObject

get_object() → Optional[PdfObject][source]

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

static read_from_stream(stream: IO[Any], pdf: Any) → IndirectObject[source]

class pypdf.generic.NullObject(*args, **kwargs)[source]

Bases: PdfObject

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → NullObject[source]: Clone object into pdf_dest.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

static read_from_stream(stream: IO[Any]) → NullObject[source]

class pypdf.generic.PdfObject(*args, **kwargs)[source]

Bases: PdfObjectProtocol

hash_func(*, usedforsecurity=True): Returns a sha1 hash object; optionally initialized with a string

indirect_reference: Optional[IndirectObject]

hash_value_data() → bytes[source]

hash_value() → bytes[source]

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → PdfObject[source]

Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).

By default, this method will call _reference_clone (see _reference).

Parameters

pdf_dest – Target to clone to.
force_duplicate – By default, if the object has already been cloned and referenced, the copy will be returned; when True, a new copy will be created. (Default value = False)
ignore_fields – List/tuple of field names (for dictionaries) that will be ignored during cloning (applies to children duplication as well). If fields are to be considered for a limited number of levels, you have to add it as integer, for example [1,"/B","/TOTO"] means that "/B" will be ignored at the first level only but "/TOTO" on all levels.

Returns

The cloned PdfObject

get_object() → Optional[PdfObject][source]: Resolve indirect references.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

class pypdf.generic.TextStringObject(*args, **kwargs)[source]

Bases: str, PdfObject

A string object that has been decoded into a real unicode string.

If read from a PDF document, this string appeared to match the PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to occur.

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → TextStringObject[source]: Clone object into pdf_dest.

autodetect_pdfdocencoding = False

autodetect_utf16 = False

property original_bytes: bytes: It is occasionally possible that a text string object gets created where a byte string object was expected due to the autodetection mechanism – if that occurs, this “original_bytes” property can be used to back-calculate what the original encoded bytes were.

get_original_bytes() → bytes[source]

get_encoded_bytes() → bytes[source]

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

class pypdf.generic.ByteStringObject(*args, **kwargs)[source]

Bases: bytes, PdfObject

Represents a string object where the text encoding could not be determined.

This occurs quite often, as the PDF spec doesn’t provide an alternate way to represent strings – for example, the encryption data stored in files (like /O) is clearly not text, but is still stored in a “String” object.

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → ByteStringObject[source]: Clone object into pdf_dest.

property original_bytes: bytes: For compatibility with TextStringObject.original_bytes.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

class pypdf.generic.AnnotationBuilder[source]

Bases: object

The AnnotationBuilder is deprecated.

Instead, use the annotation classes in pypdf.annotations.

See adding PDF annotations for it’s usage combined with PdfWriter.

static text(rect: Union[RectangleObject, Tuple[float, float, float, float]], text: str, open: bool = False, flags: int = 0) → DictionaryObject[source]

Add text annotation.

Parameters

rect – array of four integers [xLL, yLL, xUR, yUR] specifying the clickable rectangular area
text – The text that is added to the document
open –
flags –

Returns

A dictionary object representing the annotation.

static free_text(text: str, rect: Union[RectangleObject, Tuple[float, float, float, float]], font: str = 'Helvetica', bold: bool = False, italic: bool = False, font_size: str = '14pt', font_color: str = '000000', border_color: Optional[str] = '000000', background_color: Optional[str] = 'ffffff') → DictionaryObject[source]

Add text in a rectangle to a page.

Parameters

text – Text to be added
rect – array of four integers [xLL, yLL, xUR, yUR] specifying the clickable rectangular area
font – Name of the Font, e.g. ‘Helvetica’
bold – Print the text in bold
italic – Print the text in italic
font_size – How big the text will be, e.g. ‘14pt’
font_color – Hex-string for the color, e.g. cdcdcd
border_color – Hex-string for the border color, e.g. cdcdcd. Use None for no border.
background_color – Hex-string for the background of the annotation, e.g. cdcdcd. Use None for transparent background.

Returns

A dictionary object representing the annotation.

static popup(*, rect: Union[RectangleObject, Tuple[float, float, float, float]], flags: int = 0, parent: Optional[DictionaryObject] = None, open: bool = False) → DictionaryObject[source]

Add a popup to the document.

Parameters

rect – Specifies the clickable rectangular area as [xLL, yLL, xUR, yUR]
flags – 1 - invisible, 2 - hidden, 3 - print, 4 - no zoom, 5 - no rotate, 6 - no view, 7 - read only, 8 - locked, 9 - toggle no view, 10 - locked contents
open – Whether the popup should be shown directly (default is False).
parent – The contents of the popup. Create this via the AnnotationBuilder.

Returns

A dictionary object representing the annotation.

static line(p1: Tuple[float, float], p2: Tuple[float, float], rect: Union[RectangleObject, Tuple[float, float, float, float]], text: str = '', title_bar: Optional[str] = None) → DictionaryObject[source]

Draw a line on the PDF.

Parameters

p1 – First point
p2 – Second point
rect – array of four integers [xLL, yLL, xUR, yUR] specifying the clickable rectangular area
text – Text to be displayed as the line annotation
title_bar – Text to be displayed in the title bar of the annotation; by convention this is the name of the author

Returns

A dictionary object representing the annotation.

static polyline(vertices: List[Tuple[float, float]]) → DictionaryObject[source]

Draw a polyline on the PDF.

Parameters: vertices – Array specifying the vertices (x, y) coordinates of the poly-line.
Returns: A dictionary object representing the annotation.

static rectangle(rect: Union[RectangleObject, Tuple[float, float, float, float]], interiour_color: Optional[str] = None) → DictionaryObject[source]

Draw a rectangle on the PDF.

This method uses the /Square annotation type of the PDF format.

Parameters

rect – array of four integers [xLL, yLL, xUR, yUR] specifying the clickable rectangular area
interiour_color – None or hex-string for the color, e.g. cdcdcd If None is used, the interiour is transparent.

Returns

A dictionary object representing the annotation.

static highlight(*, rect: Union[RectangleObject, Tuple[float, float, float, float]], quad_points: ArrayObject, highlight_color: str = 'ff0000') → DictionaryObject[source]

Add a highlight annotation to the document.

Parameters

rect – Array of four integers [xLL, yLL, xUR, yUR] specifying the highlighted area
quad_points – An ArrayObject of 8 FloatObjects. Must match a word or a group of words, otherwise no highlight will be shown.
highlight_color – The color used for the highlight.

Returns

A dictionary object representing the annotation.

static ellipse(rect: Union[RectangleObject, Tuple[float, float, float, float]], interiour_color: Optional[str] = None) → DictionaryObject[source]

Draw a rectangle on the PDF.

This method uses the /Circle annotation type of the PDF format.

Parameters

rect – array of four integers [xLL, yLL, xUR, yUR] specifying the bounding box of the ellipse
interiour_color – None or hex-string for the color, e.g. cdcdcd If None is used, the interiour is transparent.

Returns

A dictionary object representing the annotation.

static polygon(vertices: List[Tuple[float, float]]) → DictionaryObject[source]

DEFAULT_FIT = <pypdf.generic._fit.Fit object>

static link(rect: ~typing.Union[~pypdf.generic._rectangle.RectangleObject, ~typing.Tuple[float, float, float, float]], border: ~typing.Optional[~pypdf.generic._data_structures.ArrayObject] = None, url: ~typing.Optional[str] = None, target_page_index: ~typing.Optional[int] = None, fit: ~pypdf.generic._fit.Fit = <pypdf.generic._fit.Fit object>) → DictionaryObject[source]

Add a link to the document.

The link can either be an external link or an internal link.

An external link requires the URL parameter. An internal link requires the target_page_index, fit, and fit args.

Parameters

rect – array of four integers [xLL, yLL, xUR, yUR] specifying the clickable rectangular area
border – if provided, an array describing border-drawing properties. See the PDF spec for details. No border will be drawn if this argument is omitted. - horizontal corner radius, - vertical corner radius, and - border width - Optionally: Dash
url – Link to a website (if you want to make an external link)
target_page_index – index of the page to which the link should go (if you want to make an internal link)
fit – Page fit or ‘zoom’ option.

Returns

A dictionary object representing the annotation.

class pypdf.generic.ArrayObject(iterable=(), /)[source]

Bases: List[Any], PdfObject

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → ArrayObject[source]: Clone object into pdf_dest.

items() → Iterable[Any][source]: Emulate DictionaryObject.items for a list (index, object).

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

static read_from_stream(stream: IO[Any], pdf: Optional[PdfReaderProtocol], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) → ArrayObject[source]

class pypdf.generic.DictionaryObject[source]

Bases: Dict[Any, Any], PdfObject

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → DictionaryObject[source]: Clone object into pdf_dest.

raw_get(key: Any) → Any[source]

get_inherited(key: str, default: Any = None) → Any[source]

Returns the value of a key or from the parent if not found. If not found returns default.

Parameters

key – string identifying the field to return
default – default value to return

Returns

Current key or inherited one, otherwise default value.

setdefault(key: Any, value: Optional[Any] = None) → Any[source]

property xmp_metadata: Optional[XmpInformationProtocol]

Retrieve XMP (Extensible Metadata Platform) data relevant to the this object, if available.

See Table 347 — Additional entries in a metadata stream dictionary.

Returns: Returns a XmpInformation instance that can be used to access XMP metadata from the document. Can also return None if no metadata was found on the document root.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

static read_from_stream(stream: IO[Any], pdf: Optional[PdfReaderProtocol], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) → DictionaryObject[source]

class pypdf.generic.TreeObject(dct: Optional[DictionaryObject] = None)[source]

Bases: DictionaryObject

hasChildren() → bool[source]

has_children() → bool[source]

children() → Iterable[Any][source]

add_child(child: Any, pdf: PdfWriterProtocol) → None[source]

inc_parent_counter_default(parent: Union[None, IndirectObject, TreeObject], n: int) → None[source]

inc_parent_counter_outline(parent: Union[None, IndirectObject, TreeObject], n: int) → None[source]

insert_child(child: Any, before: Any, pdf: PdfWriterProtocol, inc_parent_counter: Optional[Callable[[...], Any]] = None) → IndirectObject[source]

remove_child(child: Any) → None[source]

remove_from_tree() → None[source]: Remove the object from the tree it is in.

emptyTree() → None[source]

empty_tree() → None[source]

class pypdf.generic.StreamObject[source]

Bases: DictionaryObject

get_data() → Union[bytes, str][source]

set_data(data: bytes) → None[source]

hash_value_data() → bytes[source]

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

static initializeFromDictionary(data: Dict[str, Any]) → Union[EncodedStreamObject, DecodedStreamObject][source]

static initialize_from_dictionary(data: Dict[str, Any]) → Union[EncodedStreamObject, DecodedStreamObject][source]

flate_encode(level: int = - 1) → EncodedStreamObject[source]

class pypdf.generic.DecodedStreamObject[source]: Bases: StreamObject

class pypdf.generic.EncodedStreamObject[source]

Bases: StreamObject

get_data() → Union[bytes, str][source]

set_data(data: bytes) → None[source]

class pypdf.generic.ContentStream(stream: Any, pdf: Any, forced_encoding: Union[None, str, List[str], Dict[int, str]] = None)[source]

Bases: DecodedStreamObject

In order to be fast, this data structure can contain either:

raw data in ._data
parsed stream operations in ._operations.

At any time, ContentStream object can either have both of those fields defined, or one field defined and the other set to None.

These fields are “rebuilt” lazily, when accessed:

when .get_data() is called, if ._data is None, it is rebuilt from ._operations.
when .operations is called, if ._operations is None, it is rebuilt from ._data.

Conversely, these fields can be invalidated:

when .set_data() is called, ._operations is set to None.
when .operations is set, ._data is set to None.

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) → ContentStream[source]

Clone object into pdf_dest.

Parameters

pdf_dest –
force_duplicate –
ignore_fields –

Returns

The cloned ContentStream

get_data() → bytes[source]

set_data(data: bytes) → None[source]

property operations: List[Tuple[Any, Any]]

isolate_graphics_state() → None[source]

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

class pypdf.generic.ViewerPreferences(value: Any = None)[source]

Bases: DictionaryObject

property PRINT_SCALING: NameObject

class pypdf.generic.OutlineItem(title: str, page: Union[NumberObject, IndirectObject, NullObject, DictionaryObject], fit: Fit)[source]

Bases: Destination

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

class pypdf.generic.OutlineFontFlag(value, names=None, *values, module=None, qualname=None, type=None, start=1, boundary=None)[source]

Bases: IntFlag

A class used as an enumerable flag for formatting an outline font.

italic = 1

bold = 2

pypdf.generic.read_object(stream: IO[Any], pdf: Optional[PdfReaderProtocol], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) → Union[PdfObject, int, str, ContentStream][source]

pypdf.generic.create_string_object(string: Union[str, bytes], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) → Union[TextStringObject, ByteStringObject][source]

Create a ByteStringObject or a TextStringObject from a string to represent the string.

Parameters

string – The data being used
forced_encoding – Typically None, or an encoding string

Returns

A ByteStringObject

Raises

TypeError – If string is not of type str or bytes.

pypdf.generic.encode_pdfdocencoding(unicode_string: str) → bytes[source]

pypdf.generic.decode_pdfdocencoding(byte_array: bytes) → str[source]

pypdf.generic.hex_to_rgb(value: str) → Tuple[float, float, float][source]

pypdf.generic.read_hex_string_from_stream(stream: IO[Any], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) → Union[TextStringObject, ByteStringObject][source]

pypdf.generic.read_string_from_stream(stream: IO[Any], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) → Union[TextStringObject, ByteStringObject][source]

class pypdf._protocols.PdfObjectProtocol(*args, **kwargs)[source]

Bases: Protocol

indirect_reference: Any

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Union[Tuple[str, ...], List[str]]] = ()) → Any[source]

get_object() → Optional[PdfObjectProtocol][source]

hash_value() → bytes[source]

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) → None[source]

class pypdf._protocols.XmpInformationProtocol(*args, **kwargs)[source]: Bases: PdfObjectProtocol

class pypdf._protocols.PdfCommonDocProtocol(*args, **kwargs)[source]

Bases: Protocol

property pdf_header: str

property pages: List[Any]

property root_object: PdfObjectProtocol

get_object(indirect_reference: Any) → Optional[PdfObjectProtocol][source]

property strict: bool

class pypdf._protocols.PdfReaderProtocol(*args, **kwargs)[source]

Bases: PdfCommonDocProtocol, Protocol

abstract property xref: Dict[int, Dict[int, Any]]

abstract property trailer: Dict[str, Any]

class pypdf._protocols.PdfWriterProtocol(*args, **kwargs)[source]

Bases: PdfCommonDocProtocol, Protocol

abstract write(stream: Union[Path, str, IO[Any]]) → Tuple[bool, IO[Any]][source]