Skip to content

Commit 79cbaab

Browse files
committed
MAINT: Turn the appearance stream code into a class
This patch introduces the TextAppearanceStream class, with .from_text_annotation as a class method to instantiate it from a text annotation. It includes the code from generate_appearance_stream and _update_field_annotation.
1 parent 623c380 commit 79cbaab

File tree

2 files changed

+169
-163
lines changed

2 files changed

+169
-163
lines changed

pypdf/_writer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
hex_to_rgb,
106106
is_null_or_none,
107107
)
108-
from .generic._appearance_stream import update_field_annotation
108+
from .generic._appearance_stream import TextStreamAppearance
109109
from .pagerange import PageRange, PageRangeSpec
110110
from .types import (
111111
AnnotationSubtype,
@@ -1020,12 +1020,12 @@ def update_page_form_field_values(
10201020
):
10211021
# Textbox; we need to generate the appearance stream object
10221022
if isinstance(value, tuple):
1023-
appearance_stream_obj = update_field_annotation(
1024-
af, page, parent_annotation, annotation, value[1], value[2]
1023+
appearance_stream_obj = TextStreamAppearance.from_text_annotation(
1024+
af, parent_annotation, annotation, value[1], value[2]
10251025
)
10261026
else:
1027-
appearance_stream_obj = update_field_annotation(
1028-
af, page, parent_annotation, annotation
1027+
appearance_stream_obj = TextStreamAppearance.from_text_annotation(
1028+
af, parent_annotation, annotation
10291029
)
10301030
# Add the appearance stream object
10311031
if AA.AP not in annotation:
Lines changed: 164 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -1,186 +1,192 @@
1-
from typing import Any, cast
1+
from typing import Any, Optional, Union, cast
22

33
from .._cmap import _default_fonts_space_width, build_char_map_from_dict
4-
from .._page import PageObject
54
from .._utils import logger_warning
65
from ..constants import AnnotationDictionaryAttributes as AA
76
from ..constants import FieldDictionaryAttributes as FA
87
from ..generic import (
98
DecodedStreamObject,
109
DictionaryObject,
1110
NameObject,
11+
NumberObject,
1212
RectangleObject,
13-
StreamObject,
1413
)
1514
from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none
1615

1716
DEFAULT_FONT_HEIGHT_IN_MULTILINE = 12
1817

1918

20-
def generate_appearance_stream(
21-
txt: str,
22-
sel: list[str],
23-
da: str,
24-
font_full_rev: dict[str, bytes],
25-
rct: RectangleObject,
26-
font_height: float,
27-
y_offset: float,
28-
) -> bytes:
29-
ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode()
30-
for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")):
31-
if line in sel:
32-
# may be improved but cannot find how to get fill working => replaced with lined box
33-
ap_stream += (
34-
f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
35-
f"0.5 0.5 0.5 rg s\n{da}\n"
36-
).encode()
37-
if line_number == 0:
38-
ap_stream += f"2 {y_offset} Td\n".encode()
39-
else:
40-
# Td is a relative translation
41-
ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
42-
enc_line: list[bytes] = [
43-
font_full_rev.get(c, c.encode("utf-16-be")) for c in line
44-
]
45-
if any(len(c) >= 2 for c in enc_line):
46-
ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n"
47-
else:
48-
ap_stream += b"(" + b"".join(enc_line) + b") Tj\n"
49-
ap_stream += b"ET\nQ\nEMC\nQ\n"
50-
return ap_stream
19+
class TextStreamAppearance(DecodedStreamObject):
20+
"""
21+
A class representing the appearance stream for a text-based form field.
22+
This class is similar in form to the FreeText class in pypdf.
23+
"""
24+
25+
def __init__(
26+
self,
27+
txt: str = "",
28+
sel: Optional[list[str]] = None,
29+
da: str = "",
30+
font_full_rev: Optional[dict[str, bytes]] = None,
31+
rct: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0),
32+
font_height: float = 0,
33+
y_offset: float = 0,
34+
) -> None:
35+
sel = sel or [""]
36+
font_full_rev = font_full_rev or {}
37+
if isinstance(rct, tuple):
38+
rct = RectangleObject(rct)
39+
ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode()
40+
for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")):
41+
if line in sel:
42+
# may be improved but cannot find how to get fill working => replaced with lined box
43+
ap_stream += (
44+
f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
45+
f"0.5 0.5 0.5 rg s\n{da}\n"
46+
).encode()
47+
if line_number == 0:
48+
ap_stream += f"2 {y_offset} Td\n".encode()
49+
else:
50+
# Td is a relative translation
51+
ap_stream += f"0 {-font_height * 1.4} Td\n".encode()
52+
enc_line: list[bytes] = [
53+
font_full_rev.get(c, c.encode("utf-16-be")) for c in line
54+
]
55+
if any(len(c) >= 2 for c in enc_line):
56+
ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n"
57+
else:
58+
ap_stream += b"(" + b"".join(enc_line) + b") Tj\n"
59+
ap_stream += b"ET\nQ\nEMC\nQ\n"
5160

61+
super().__init__()
62+
self[NameObject("/Type")] = NameObject("/XObject")
63+
self[NameObject("/Subtype")] = NameObject("/Form")
64+
self[NameObject("/BBox")] = rct
65+
self.set_data(ByteStringObject(ap_stream))
66+
self[NameObject("/Length")] = NumberObject(len(ap_stream))
5267

53-
def update_field_annotation(
54-
af: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM])
55-
page: PageObject,
56-
field: DictionaryObject,
57-
annotation: DictionaryObject,
58-
font_name: str = "",
59-
font_size: float = -1,
60-
) -> StreamObject:
61-
# Calculate rectangle dimensions
62-
_rct = cast(RectangleObject, annotation[AA.Rect])
63-
rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1])))
68+
@classmethod
69+
def from_text_annotation(
70+
cls,
71+
af: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM])
72+
field: DictionaryObject,
73+
annotation: DictionaryObject,
74+
font_name: str = "",
75+
font_size: float = -1,
76+
) -> "TextStreamAppearance":
77+
"""Creates a TextStreamAppearance object from a given text field annotation."""
78+
# Calculate rectangle dimensions
79+
_rct = cast(RectangleObject, annotation[AA.Rect])
80+
rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1])))
6481

65-
# Extract font information
66-
da = annotation.get_inherited(
67-
AA.DA,
68-
af.get(
69-
AA.DA, None
70-
),
71-
)
72-
if da is None:
73-
da = TextStringObject("/Helv 0 Tf 0 g")
74-
else:
75-
da = da.get_object()
76-
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
77-
font_properties = [x for x in font_properties if x != ""]
78-
if font_name:
79-
font_properties[font_properties.index("Tf") - 2] = font_name
80-
else:
81-
font_name = font_properties[font_properties.index("Tf") - 2]
82-
font_height = (
83-
font_size
84-
if font_size >= 0
85-
else float(font_properties[font_properties.index("Tf") - 1])
86-
)
87-
if font_height == 0:
88-
if field.get(FA.Ff, 0) & FA.FfBits.Multiline:
89-
font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE
82+
# Extract font information
83+
da = annotation.get_inherited(
84+
AA.DA,
85+
af.get(AA.DA, None),
86+
)
87+
if da is None:
88+
da = TextStringObject("/Helv 0 Tf 0 g")
89+
else:
90+
da = da.get_object()
91+
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
92+
font_properties = [x for x in font_properties if x != ""]
93+
if font_name:
94+
font_properties[font_properties.index("Tf") - 2] = font_name
9095
else:
91-
font_height = rct.height - 2
92-
font_properties[font_properties.index("Tf") - 1] = str(font_height)
93-
da = " ".join(font_properties)
94-
y_offset = rct.height - 1 - font_height
96+
font_name = font_properties[font_properties.index("Tf") - 2]
97+
font_height = (
98+
font_size
99+
if font_size >= 0
100+
else float(font_properties[font_properties.index("Tf") - 1])
101+
)
102+
if font_height == 0:
103+
if field.get(FA.Ff, 0) & FA.FfBits.Multiline:
104+
font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE
105+
else:
106+
font_height = rct.height - 2
107+
font_properties[font_properties.index("Tf") - 1] = str(font_height)
108+
da = " ".join(font_properties)
109+
y_offset = rct.height - 1 - font_height
95110

96-
# Retrieve font information from local DR ...
97-
dr: Any = cast(
98-
DictionaryObject,
99-
cast(
111+
# Retrieve font information from local DR ...
112+
dr: Any = cast(
100113
DictionaryObject,
101-
annotation.get_inherited(
102-
"/DR",
103-
af.get("/DR", DictionaryObject()),
104-
),
105-
).get_object(),
106-
)
107-
dr = dr.get("/Font", DictionaryObject()).get_object()
108-
# _default_fonts_space_width keys is the list of Standard fonts
109-
if font_name not in dr and font_name not in _default_fonts_space_width:
110-
# ...or AcroForm dictionary
111-
dr = cast(
112-
dict[Any, Any],
113-
af.get("/DR", {}),
114-
)
115-
dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
116-
font_res = dr.get(font_name, None)
117-
if not is_null_or_none(font_res):
118-
font_res = cast(DictionaryObject, font_res.get_object())
119-
_font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
120-
200, font_res
114+
cast(
115+
DictionaryObject,
116+
annotation.get_inherited(
117+
"/DR",
118+
af.get("/DR", DictionaryObject()),
119+
),
120+
).get_object(),
121121
)
122-
try: # remove width stored in -1 key
123-
del font_map[-1]
124-
except KeyError:
125-
pass
126-
font_full_rev: dict[str, bytes]
127-
if isinstance(font_encoding, str):
128-
font_full_rev = {
129-
v: k.encode(font_encoding) for k, v in font_map.items()
130-
}
122+
dr = dr.get("/Font", DictionaryObject()).get_object()
123+
# _default_fonts_space_width keys is the list of Standard fonts
124+
if font_name not in dr and font_name not in _default_fonts_space_width:
125+
# ...or AcroForm dictionary
126+
dr = cast(
127+
dict[Any, Any],
128+
af.get("/DR", {}),
129+
)
130+
dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
131+
font_res = dr.get(font_name, None)
132+
if not is_null_or_none(font_res):
133+
font_res = cast(DictionaryObject, font_res.get_object())
134+
_font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
135+
200, font_res
136+
)
137+
try: # remove width stored in -1 key
138+
del font_map[-1]
139+
except KeyError:
140+
pass
141+
font_full_rev: dict[str, bytes]
142+
if isinstance(font_encoding, str):
143+
font_full_rev = {
144+
v: k.encode(font_encoding) for k, v in font_map.items()
145+
}
146+
else:
147+
font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
148+
font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
149+
for key, value in font_map.items():
150+
font_full_rev[value] = font_encoding_rev.get(key, key)
131151
else:
132-
font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
133-
font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
134-
for key, value in font_map.items():
135-
font_full_rev[value] = font_encoding_rev.get(key, key)
136-
else:
137-
logger_warning(f"Font dictionary for {font_name} not found.", __name__)
138-
font_full_rev = {}
139-
140-
# Retrieve field text and selected values
141-
field_flags = field.get(FA.Ff, 0)
142-
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
143-
txt = "\n".join(annotation.get_inherited(FA.Opt, []))
144-
sel = field.get("/V", [])
145-
if not isinstance(sel, list):
146-
sel = [sel]
147-
else: # /Tx
148-
txt = field.get("/V", "")
149-
sel = []
150-
# Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
151-
txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
152-
# Generate appearance stream
153-
ap_stream = generate_appearance_stream(
154-
txt, sel, da, font_full_rev, rct, font_height, y_offset
155-
)
152+
logger_warning(f"Font dictionary for {font_name} not found.", __name__)
153+
font_full_rev = {}
156154

157-
# Create appearance dictionary
158-
dct = DecodedStreamObject.initialize_from_dictionary(
159-
{
160-
NameObject("/Type"): NameObject("/XObject"),
161-
NameObject("/Subtype"): NameObject("/Form"),
162-
NameObject("/BBox"): rct,
163-
"__streamdata__": ByteStringObject(ap_stream),
164-
"/Length": 0,
165-
}
166-
)
167-
if AA.AP in annotation:
168-
for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items():
169-
if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
170-
dct[k] = v
155+
# Retrieve field text and selected values
156+
field_flags = field.get(FA.Ff, 0)
157+
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
158+
txt = "\n".join(annotation.get_inherited(FA.Opt, []))
159+
sel = field.get("/V", [])
160+
if not isinstance(sel, list):
161+
sel = [sel]
162+
else: # /Tx
163+
txt = field.get("/V", "")
164+
sel = []
165+
# Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
166+
txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
171167

172-
# Update Resources with font information if necessary
173-
if font_res is not None:
174-
dct[NameObject("/Resources")] = DictionaryObject(
175-
{
176-
NameObject("/Font"): DictionaryObject(
177-
{
178-
NameObject(font_name): getattr(
179-
font_res, "indirect_reference", font_res
180-
)
181-
}
182-
)
183-
}
168+
# Create the TextStreamAppearance instance
169+
new_appearance_stream = cls(
170+
txt, sel, da, font_full_rev, rct, font_height, y_offset
184171
)
185172

186-
return dct
173+
if AA.AP in annotation:
174+
for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items():
175+
if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
176+
new_appearance_stream[k] = v
177+
178+
# Update Resources with font information if necessary
179+
if font_res is not None:
180+
new_appearance_stream[NameObject("/Resources")] = DictionaryObject(
181+
{
182+
NameObject("/Font"): DictionaryObject(
183+
{
184+
NameObject(font_name): getattr(
185+
font_res, "indirect_reference", font_res
186+
)
187+
}
188+
)
189+
}
190+
)
191+
192+
return new_appearance_stream

0 commit comments

Comments
 (0)