Generic PDF objects

Implementation of generic PDF objects (dictionary, number, string, …).

class pypdf.generic.BooleanObject(value: Any)[source]

Bases: PdfObject

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) BooleanObject[source]

Clone object into pdf_dest.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
static read_from_stream(stream: IO[Any]) BooleanObject[source]
class pypdf.generic.FloatObject(value: Union[str, Any] = '0.0', context: Optional[Any] = None)[source]

Bases: float, PdfObject

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) FloatObject[source]

Clone object into pdf_dest.

myrepr() str[source]
as_numeric() float[source]
write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
class pypdf.generic.NumberObject(value: Any)[source]

Bases: int, PdfObject

NumberPattern = re.compile(b'[^+-.0-9]')
clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) NumberObject[source]

Clone object into pdf_dest.

as_numeric() int[source]
write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
static read_from_stream(stream: IO[Any]) Union[NumberObject, FloatObject][source]
class pypdf.generic.NameObject[source]

Bases: str, PdfObject

delimiter_pattern = re.compile(b'\\s+|[\\(\\)<>\\[\\]{}/%]')
surfix = b'/'
renumber_table: ClassVar[Dict[str, bytes]] = {'\x00': b'#00', '\x01': b'#01', '\x02': b'#02', '\x03': b'#03', '\x04': b'#04', '\x05': b'#05', '\x06': b'#06', '\x07': b'#07', '\x08': b'#08', '\t': b'#09', '\n': b'#0A', '\x0b': b'#0B', '\x0c': b'#0C', '\r': b'#0D', '\x0e': b'#0E', '\x0f': b'#0F', '\x10': b'#10', '\x11': b'#11', '\x12': b'#12', '\x13': b'#13', '\x14': b'#14', '\x15': b'#15', '\x16': b'#16', '\x17': b'#17', '\x18': b'#18', '\x19': b'#19', '\x1a': b'#1A', '\x1b': b'#1B', '\x1c': b'#1C', '\x1d': b'#1D', '\x1e': b'#1E', '\x1f': b'#1F', ' ': b'#20', '#': b'#23', '%': b'#25', '(': b'#28', ')': b'#29', '/': b'#2F'}
clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) NameObject[source]

Clone object into pdf_dest.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
renumber() bytes[source]
static unnumber(sin: bytes) bytes[source]
static read_from_stream(stream: IO[Any], pdf: Any) NameObject[source]
class pypdf.generic.IndirectObject(idnum: int, generation: int, pdf: Any)[source]

Bases: PdfObject

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) IndirectObject[source]

Clone object into pdf_dest.

property indirect_reference: IndirectObject
get_object() Optional[PdfObject][source]
write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
static read_from_stream(stream: IO[Any], pdf: Any) IndirectObject[source]
class pypdf.generic.NullObject(*args, **kwargs)[source]

Bases: PdfObject

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) NullObject[source]

Clone object into pdf_dest.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
static read_from_stream(stream: IO[Any]) NullObject[source]
class pypdf.generic.PdfObject(*args, **kwargs)[source]

Bases: PdfObjectProtocol

hash_func(*, usedforsecurity=True)

Returns a sha1 hash object; optionally initialized with a string

indirect_reference: Optional[IndirectObject]
hash_value_data() bytes[source]
hash_value() bytes[source]
clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) PdfObject[source]

Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).

By default, this method will call _reference_clone (see _reference).

Parameters
  • pdf_dest – Target to clone to.

  • force_duplicate – By default, if the object has already been cloned and referenced, the copy will be returned; when True, a new copy will be created. (Default value = False)

  • ignore_fields – List/tuple of field names (for dictionaries) that will be ignored during cloning (applies to children duplication as well). If fields are to be considered for a limited number of levels, you have to add it as integer, for example [1,"/B","/TOTO"] means that "/B" will be ignored at the first level only but "/TOTO" on all levels.

Returns

The cloned PdfObject

get_object() Optional[PdfObject][source]

Resolve indirect references.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
class pypdf.generic.TextStringObject(*args, **kwargs)[source]

Bases: str, PdfObject

A string object that has been decoded into a real unicode string.

If read from a PDF document, this string appeared to match the PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to occur.

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) TextStringObject[source]

Clone object into pdf_dest.

autodetect_pdfdocencoding = False
autodetect_utf16 = False
property original_bytes: bytes

It is occasionally possible that a text string object gets created where a byte string object was expected due to the autodetection mechanism – if that occurs, this “original_bytes” property can be used to back-calculate what the original encoded bytes were.

get_original_bytes() bytes[source]
get_encoded_bytes() bytes[source]
write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
class pypdf.generic.ByteStringObject(*args, **kwargs)[source]

Bases: bytes, PdfObject

Represents a string object where the text encoding could not be determined.

This occurs quite often, as the PDF spec doesn’t provide an alternate way to represent strings – for example, the encryption data stored in files (like /O) is clearly not text, but is still stored in a “String” object.

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) ByteStringObject[source]

Clone object into pdf_dest.

property original_bytes: bytes

For compatibility with TextStringObject.original_bytes.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
class pypdf.generic.AnnotationBuilder[source]

Bases: object

The AnnotationBuilder is deprecated.

Instead, use the annotation classes in pypdf.annotations.

See adding PDF annotations for it’s usage combined with PdfWriter.

static text(rect: Union[RectangleObject, Tuple[float, float, float, float]], text: str, open: bool = False, flags: int = 0) DictionaryObject[source]

Add text annotation.

Parameters
  • rect – array of four integers [xLL, yLL, xUR, yUR] specifying the clickable rectangular area

  • text – The text that is added to the document

  • open

  • flags

Returns

A dictionary object representing the annotation.

static free_text(text: str, rect: Union[RectangleObject, Tuple[float, float, float, float]], font: str = 'Helvetica', bold: bool = False, italic: bool = False, font_size: str = '14pt', font_color: str = '000000', border_color: Optional[str] = '000000', background_color: Optional[str] = 'ffffff') DictionaryObject[source]

Add text in a rectangle to a page.

Parameters
  • text – Text to be added

  • rect – array of four integers [xLL, yLL, xUR, yUR] specifying the clickable rectangular area

  • font – Name of the Font, e.g. ‘Helvetica’

  • bold – Print the text in bold

  • italic – Print the text in italic

  • font_size – How big the text will be, e.g. ‘14pt’

  • font_color – Hex-string for the color, e.g. cdcdcd

  • border_color – Hex-string for the border color, e.g. cdcdcd. Use None for no border.

  • background_color – Hex-string for the background of the annotation, e.g. cdcdcd. Use None for transparent background.

Returns

A dictionary object representing the annotation.

static popup(*, rect: Union[RectangleObject, Tuple[float, float, float, float]], flags: int = 0, parent: Optional[DictionaryObject] = None, open: bool = False) DictionaryObject[source]

Add a popup to the document.

Parameters
  • rect – Specifies the clickable rectangular area as [xLL, yLL, xUR, yUR]

  • flags – 1 - invisible, 2 - hidden, 3 - print, 4 - no zoom, 5 - no rotate, 6 - no view, 7 - read only, 8 - locked, 9 - toggle no view, 10 - locked contents

  • open – Whether the popup should be shown directly (default is False).

  • parent – The contents of the popup. Create this via the AnnotationBuilder.

Returns

A dictionary object representing the annotation.

static line(p1: Tuple[float, float], p2: Tuple[float, float], rect: Union[RectangleObject, Tuple[float, float, float, float]], text: str = '', title_bar: Optional[str] = None) DictionaryObject[source]

Draw a line on the PDF.

Parameters
  • p1 – First point

  • p2 – Second point

  • rect – array of four integers [xLL, yLL, xUR, yUR] specifying the clickable rectangular area

  • text – Text to be displayed as the line annotation

  • title_bar – Text to be displayed in the title bar of the annotation; by convention this is the name of the author

Returns

A dictionary object representing the annotation.

static polyline(vertices: List[Tuple[float, float]]) DictionaryObject[source]

Draw a polyline on the PDF.

Parameters

vertices – Array specifying the vertices (x, y) coordinates of the poly-line.

Returns

A dictionary object representing the annotation.

static rectangle(rect: Union[RectangleObject, Tuple[float, float, float, float]], interiour_color: Optional[str] = None) DictionaryObject[source]

Draw a rectangle on the PDF.

This method uses the /Square annotation type of the PDF format.

Parameters
  • rect – array of four integers [xLL, yLL, xUR, yUR] specifying the clickable rectangular area

  • interiour_color – None or hex-string for the color, e.g. cdcdcd If None is used, the interiour is transparent.

Returns

A dictionary object representing the annotation.

static highlight(*, rect: Union[RectangleObject, Tuple[float, float, float, float]], quad_points: ArrayObject, highlight_color: str = 'ff0000') DictionaryObject[source]

Add a highlight annotation to the document.

Parameters
  • rect – Array of four integers [xLL, yLL, xUR, yUR] specifying the highlighted area

  • quad_points – An ArrayObject of 8 FloatObjects. Must match a word or a group of words, otherwise no highlight will be shown.

  • highlight_color – The color used for the hightlight

Returns

A dictionary object representing the annotation.

static ellipse(rect: Union[RectangleObject, Tuple[float, float, float, float]], interiour_color: Optional[str] = None) DictionaryObject[source]

Draw a rectangle on the PDF.

This method uses the /Circle annotation type of the PDF format.

Parameters
  • rect – array of four integers [xLL, yLL, xUR, yUR] specifying the bounding box of the ellipse

  • interiour_color – None or hex-string for the color, e.g. cdcdcd If None is used, the interiour is transparent.

Returns

A dictionary object representing the annotation.

static polygon(vertices: List[Tuple[float, float]]) DictionaryObject[source]
DEFAULT_FIT = <pypdf.generic._fit.Fit object>

Add a link to the document.

The link can either be an external link or an internal link.

An external link requires the URL parameter. An internal link requires the target_page_index, fit, and fit args.

Parameters
  • rect – array of four integers [xLL, yLL, xUR, yUR] specifying the clickable rectangular area

  • border – if provided, an array describing border-drawing properties. See the PDF spec for details. No border will be drawn if this argument is omitted. - horizontal corner radius, - vertical corner radius, and - border width - Optionally: Dash

  • url – Link to a website (if you want to make an external link)

  • target_page_index – index of the page to which the link should go (if you want to make an internal link)

  • fit – Page fit or ‘zoom’ option.

Returns

A dictionary object representing the annotation.

class pypdf.generic.ArrayObject(iterable=(), /)[source]

Bases: List[Any], PdfObject

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) ArrayObject[source]

Clone object into pdf_dest.

items() Iterable[Any][source]

Emulate DictionaryObject.items for a list (index, object).

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
static read_from_stream(stream: IO[Any], pdf: Optional[PdfReaderProtocol], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) ArrayObject[source]
class pypdf.generic.DictionaryObject[source]

Bases: Dict[Any, Any], PdfObject

clone(pdf_dest: PdfWriterProtocol, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) DictionaryObject[source]

Clone object into pdf_dest.

raw_get(key: Any) Any[source]
setdefault(key: Any, value: Optional[Any] = None) Any[source]
property xmp_metadata: Optional[PdfObject]

Retrieve XMP (Extensible Metadata Platform) data relevant to the this object, if available.

Stability: Added in v1.12, will exist for all future v1.x releases. See Table 315 – Additional entries in a metadata stream dictionary

Returns

Returns a {@link #xmp.XmpInformation XmlInformation} instance that can be used to access XMP metadata from the document. Can also return None if no metadata was found on the document root.

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
static read_from_stream(stream: IO[Any], pdf: Optional[PdfReaderProtocol], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) DictionaryObject[source]
class pypdf.generic.TreeObject(dct: Optional[DictionaryObject] = None)[source]

Bases: DictionaryObject

hasChildren() bool[source]
has_children() bool[source]
children() Iterable[Any][source]
add_child(child: Any, pdf: PdfWriterProtocol) None[source]
inc_parent_counter_default(parent: Union[None, IndirectObject, TreeObject], n: int) None[source]
inc_parent_counter_outline(parent: Union[None, IndirectObject, TreeObject], n: int) None[source]
insert_child(child: Any, before: Any, pdf: PdfWriterProtocol, inc_parent_counter: Optional[Callable[[...], Any]] = None) IndirectObject[source]
remove_child(child: Any) None[source]
remove_from_tree() None[source]

Remove the object from the tree it is in.

emptyTree() None[source]
empty_tree() None[source]
class pypdf.generic.StreamObject[source]

Bases: DictionaryObject

get_data() Union[bytes, str][source]
set_data(data: bytes) None[source]
hash_value_data() bytes[source]
write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
static initializeFromDictionary(data: Dict[str, Any]) Union[EncodedStreamObject, DecodedStreamObject][source]
static initialize_from_dictionary(data: Dict[str, Any]) Union[EncodedStreamObject, DecodedStreamObject][source]
flate_encode(level: int = - 1) EncodedStreamObject[source]
class pypdf.generic.DecodedStreamObject[source]

Bases: StreamObject

class pypdf.generic.EncodedStreamObject[source]

Bases: StreamObject

get_data() Union[bytes, str][source]
set_data(data: bytes) None[source]
class pypdf.generic.ContentStream(stream: Any, pdf: Any, forced_encoding: Union[None, str, List[str], Dict[int, str]] = None)[source]

Bases: DecodedStreamObject

In order to be fast, this datastructure can contain either: * raw data in ._data * parsed stream operations in ._operations

At any time, ContentStream object can either have one or both of those fields defined, and zero or one of those fields set to None.

Those fields are “rebuilt” lazily, when accessed: * when .get_data() is called, if ._data is None, it is rebuilt from ._operations * when .operations is called, if ._operations is None, it is rebuilt from ._data

On the other side, those fields can be invalidated: * when .set_data() is called, ._operations is set to None * when .operations is set, ._data is set to None

clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Sequence[Union[str, int]]] = ()) ContentStream[source]

Clone object into pdf_dest.

Parameters
  • pdf_dest

  • force_duplicate

  • ignore_fields

Returns

The cloned ContentStream

get_data() bytes[source]
set_data(data: bytes) None[source]
property operations: List[Tuple[Any, Any]]
isolate_graphics_state() None[source]
write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
class pypdf.generic.ViewerPreferences(value: Any = None)[source]

Bases: DictionaryObject

class pypdf.generic.OutlineItem(title: str, page: Union[NumberObject, IndirectObject, NullObject, DictionaryObject], fit: Fit)[source]

Bases: Destination

write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
class pypdf.generic.OutlineFontFlag(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]

Bases: IntFlag

A class used as an enumerable flag for formatting an outline font.

italic = 1
bold = 2
pypdf.generic.read_object(stream: IO[Any], pdf: Optional[PdfReaderProtocol], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) Union[PdfObject, int, str, ContentStream][source]
pypdf.generic.create_string_object(string: Union[str, bytes], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) Union[TextStringObject, ByteStringObject][source]

Create a ByteStringObject or a TextStringObject from a string to represent the string.

Parameters
  • string – The data being used

  • forced_encoding – Typically None, or an encoding string

Returns

A ByteStringObject

Raises

TypeError – If string is not of type str or bytes.

pypdf.generic.encode_pdfdocencoding(unicode_string: str) bytes[source]
pypdf.generic.decode_pdfdocencoding(byte_array: bytes) str[source]
pypdf.generic.hex_to_rgb(value: str) Tuple[float, float, float][source]
pypdf.generic.read_hex_string_from_stream(stream: IO[Any], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) Union[TextStringObject, ByteStringObject][source]
pypdf.generic.read_string_from_stream(stream: IO[Any], forced_encoding: Union[None, str, List[str], Dict[int, str]] = None) Union[TextStringObject, ByteStringObject][source]
class pypdf._protocols.PdfObjectProtocol(*args, **kwargs)[source]

Bases: Protocol

indirect_reference: Any
clone(pdf_dest: Any, force_duplicate: bool = False, ignore_fields: Optional[Union[Tuple[str, ...], List[str]]] = ()) Any[source]
get_object() Optional[PdfObjectProtocol][source]
hash_value() bytes[source]
write_to_stream(stream: IO[Any], encryption_key: Union[None, str, bytes] = None) None[source]
class pypdf._protocols.PdfReaderProtocol(*args, **kwargs)[source]

Bases: Protocol

property pdf_header: str
property strict: bool
property xref: Dict[int, Dict[int, Any]]
property pages: List[Any]
property trailer: Dict[str, Any]
get_object(indirect_reference: Any) Optional[PdfObjectProtocol][source]
class pypdf._protocols.PdfWriterProtocol(*args, **kwargs)[source]

Bases: Protocol

get_object(indirect_reference: Any) Optional[PdfObjectProtocol][source]
write(stream: Union[Path, str, IO[Any]]) Tuple[bool, IO[Any]][source]
property pages: List[Any]
property pdf_header: bytes