Skip to content

Commit dd22a58

Browse files
committed
MAINT: Turn the appearance stream code into a class
This patch introduces the TextAppearanceStream class, with .from_text_annotation as a class method to instantiate it from a text annotation. It includes the code from generate_appearance_stream and _update_field_annotation.
1 parent 490113d commit dd22a58

File tree

2 files changed

+164
-162
lines changed

2 files changed

+164
-162
lines changed

pypdf/_writer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
hex_to_rgb,
106106
is_null_or_none,
107107
)
108-
from .generic._appearance_stream import update_field_annotation
108+
from .generic._appearance_stream import TextStreamAppearance
109109
from .pagerange import PageRange, PageRangeSpec
110110
from .types import (
111111
AnnotationSubtype,
@@ -1020,12 +1020,12 @@ def update_page_form_field_values(
10201020
):
10211021
# Textbox; we need to generate the appearance stream object
10221022
if isinstance(value, tuple):
1023-
appearance_stream_obj = update_field_annotation(
1024-
af, page, parent_annotation, annotation, value[1], value[2]
1023+
appearance_stream_obj = TextStreamAppearance.from_text_annotation(
1024+
af, parent_annotation, annotation, value[1], value[2]
10251025
)
10261026
else:
1027-
appearance_stream_obj = update_field_annotation(
1028-
af, page, parent_annotation, annotation
1027+
appearance_stream_obj = TextStreamAppearance.from_text_annotation(
1028+
af, parent_annotation, annotation
10291029
)
10301030
# Add the appearance stream object
10311031
if AA.AP not in annotation:
Lines changed: 159 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -1,186 +1,188 @@
11
from typing import Any, cast
22

33
from .._cmap import _default_fonts_space_width, build_char_map_from_dict
4-
from .._page import PageObject
54
from .._utils import logger_warning
65
from ..constants import AnnotationDictionaryAttributes as AA
76
from ..constants import FieldDictionaryAttributes as FA
87
from ..generic import (
98
DecodedStreamObject,
109
DictionaryObject,
1110
NameObject,
11+
NumberObject,
1212
RectangleObject,
13-
StreamObject,
1413
)
1514
from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none
1615

1716
DEFAULT_FONT_HEIGHT_IN_MULTILINE = 12
1817

1918

20-
def generate_appearance_stream(
21-
txt: str,
22-
sel: list[str],
23-
da: str,
24-
font_full_rev: dict[str, bytes],
25-
rct: RectangleObject,
26-
font_height: float,
27-
y_offset: float,
28-
) -> bytes:
29-
ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode()
30-
for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")):
31-
if line in sel:
32-
# may be improved but cannot find how to get fill working => replaced with lined box
33-
ap_stream += (
34-
f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
35-
f"0.5 0.5 0.5 rg s\n{da}\n"
36-
).encode()
37-
if line_number == 0:
38-
ap_stream += f"2 {y_offset} Td\n".encode()
39-
else:
40-
# Td is a relative translation
41-
ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
42-
enc_line: list[bytes] = [
43-
font_full_rev.get(c, c.encode("utf-16-be")) for c in line
44-
]
45-
if any(len(c) >= 2 for c in enc_line):
46-
ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n"
47-
else:
48-
ap_stream += b"(" + b"".join(enc_line) + b") Tj\n"
49-
ap_stream += b"ET\nQ\nEMC\nQ\n"
50-
return ap_stream
19+
class TextStreamAppearance(DecodedStreamObject):
20+
"""
21+
A class representing the appearance stream for a text-based form field.
22+
This class is similar in form to the FreeText class in pypdf.
23+
"""
24+
25+
def __init__(
26+
self,
27+
txt: str = "",
28+
sel: list[str] = [],
29+
da: str = "",
30+
font_full_rev: dict[str, bytes] = {},
31+
rct: RectangleObject = RectangleObject((0, 0, 0, 0)),
32+
font_height: float = 0,
33+
y_offset: float = 0,
34+
) -> None:
35+
ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode()
36+
for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")):
37+
if line in sel:
38+
# may be improved but cannot find how to get fill working => replaced with lined box
39+
ap_stream += (
40+
f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
41+
f"0.5 0.5 0.5 rg s\n{da}\n"
42+
).encode()
43+
if line_number == 0:
44+
ap_stream += f"2 {y_offset} Td\n".encode()
45+
else:
46+
# Td is a relative translation
47+
ap_stream += f"0 {-font_height * 1.4} Td\n".encode()
48+
enc_line: list[bytes] = [
49+
font_full_rev.get(c, c.encode("utf-16-be")) for c in line
50+
]
51+
if any(len(c) >= 2 for c in enc_line):
52+
ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n"
53+
else:
54+
ap_stream += b"(" + b"".join(enc_line) + b") Tj\n"
55+
ap_stream += b"ET\nQ\nEMC\nQ\n"
5156

57+
super().__init__()
58+
self[NameObject("/Type")] = NameObject("/XObject")
59+
self[NameObject("/Subtype")] = NameObject("/Form")
60+
self[NameObject("/BBox")] = rct
61+
self.set_data(ByteStringObject(ap_stream))
62+
self[NameObject("/Length")] = NumberObject(len(ap_stream))
5263

53-
def update_field_annotation(
54-
af: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM])
55-
page: PageObject,
56-
field: DictionaryObject,
57-
annotation: DictionaryObject,
58-
font_name: str = "",
59-
font_size: float = -1,
60-
) -> StreamObject:
61-
# Calculate rectangle dimensions
62-
_rct = cast(RectangleObject, annotation[AA.Rect])
63-
rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1])))
64+
@classmethod
65+
def from_text_annotation(
66+
cls,
67+
af: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM])
68+
field: DictionaryObject,
69+
annotation: DictionaryObject,
70+
font_name: str = "",
71+
font_size: float = -1,
72+
) -> "TextStreamAppearance":
73+
"""Creates a TextStreamAppearance object from a given text field annotation."""
74+
# Calculate rectangle dimensions
75+
_rct = cast(RectangleObject, annotation[AA.Rect])
76+
rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1])))
6477

65-
# Extract font information
66-
da = annotation.get_inherited(
67-
AA.DA,
68-
af.get(
69-
AA.DA, None
70-
),
71-
)
72-
if da is None:
73-
da = TextStringObject("/Helv 0 Tf 0 g")
74-
else:
75-
da = da.get_object()
76-
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
77-
font_properties = [x for x in font_properties if x != ""]
78-
if font_name:
79-
font_properties[font_properties.index("Tf") - 2] = font_name
80-
else:
81-
font_name = font_properties[font_properties.index("Tf") - 2]
82-
font_height = (
83-
font_size
84-
if font_size >= 0
85-
else float(font_properties[font_properties.index("Tf") - 1])
86-
)
87-
if font_height == 0:
88-
if field.get(FA.Ff, 0) & FA.FfBits.Multiline:
89-
font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE
78+
# Extract font information
79+
da = annotation.get_inherited(
80+
AA.DA,
81+
af.get(AA.DA, None),
82+
)
83+
if da is None:
84+
da = TextStringObject("/Helv 0 Tf 0 g")
85+
else:
86+
da = da.get_object()
87+
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
88+
font_properties = [x for x in font_properties if x != ""]
89+
if font_name:
90+
font_properties[font_properties.index("Tf") - 2] = font_name
9091
else:
91-
font_height = rct.height - 2
92-
font_properties[font_properties.index("Tf") - 1] = str(font_height)
93-
da = " ".join(font_properties)
94-
y_offset = rct.height - 1 - font_height
92+
font_name = font_properties[font_properties.index("Tf") - 2]
93+
font_height = (
94+
font_size
95+
if font_size >= 0
96+
else float(font_properties[font_properties.index("Tf") - 1])
97+
)
98+
if font_height == 0:
99+
if field.get(FA.Ff, 0) & FA.FfBits.Multiline:
100+
font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE
101+
else:
102+
font_height = rct.height - 2
103+
font_properties[font_properties.index("Tf") - 1] = str(font_height)
104+
da = " ".join(font_properties)
105+
y_offset = rct.height - 1 - font_height
95106

96-
# Retrieve font information from local DR ...
97-
dr: Any = cast(
98-
DictionaryObject,
99-
cast(
107+
# Retrieve font information from local DR ...
108+
dr: Any = cast(
100109
DictionaryObject,
101-
annotation.get_inherited(
102-
"/DR",
103-
af.get("/DR", DictionaryObject()),
104-
),
105-
).get_object(),
106-
)
107-
dr = dr.get("/Font", DictionaryObject()).get_object()
108-
# _default_fonts_space_width keys is the list of Standard fonts
109-
if font_name not in dr and font_name not in _default_fonts_space_width:
110-
# ...or AcroForm dictionary
111-
dr = cast(
112-
dict[Any, Any],
113-
af.get("/DR", {}),
114-
)
115-
dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
116-
font_res = dr.get(font_name, None)
117-
if not is_null_or_none(font_res):
118-
font_res = cast(DictionaryObject, font_res.get_object())
119-
_font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
120-
200, font_res
110+
cast(
111+
DictionaryObject,
112+
annotation.get_inherited(
113+
"/DR",
114+
af.get("/DR", DictionaryObject()),
115+
),
116+
).get_object(),
121117
)
122-
try: # remove width stored in -1 key
123-
del font_map[-1]
124-
except KeyError:
125-
pass
126-
font_full_rev: dict[str, bytes]
127-
if isinstance(font_encoding, str):
128-
font_full_rev = {
129-
v: k.encode(font_encoding) for k, v in font_map.items()
130-
}
118+
dr = dr.get("/Font", DictionaryObject()).get_object()
119+
# _default_fonts_space_width keys is the list of Standard fonts
120+
if font_name not in dr and font_name not in _default_fonts_space_width:
121+
# ...or AcroForm dictionary
122+
dr = cast(
123+
dict[Any, Any],
124+
af.get("/DR", {}),
125+
)
126+
dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
127+
font_res = dr.get(font_name, None)
128+
if not is_null_or_none(font_res):
129+
font_res = cast(DictionaryObject, font_res.get_object())
130+
_font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
131+
200, font_res
132+
)
133+
try: # remove width stored in -1 key
134+
del font_map[-1]
135+
except KeyError:
136+
pass
137+
font_full_rev: dict[str, bytes]
138+
if isinstance(font_encoding, str):
139+
font_full_rev = {
140+
v: k.encode(font_encoding) for k, v in font_map.items()
141+
}
142+
else:
143+
font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
144+
font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
145+
for key, value in font_map.items():
146+
font_full_rev[value] = font_encoding_rev.get(key, key)
131147
else:
132-
font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
133-
font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
134-
for key, value in font_map.items():
135-
font_full_rev[value] = font_encoding_rev.get(key, key)
136-
else:
137-
logger_warning(f"Font dictionary for {font_name} not found.", __name__)
138-
font_full_rev = {}
139-
140-
# Retrieve field text and selected values
141-
field_flags = field.get(FA.Ff, 0)
142-
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
143-
txt = "\n".join(annotation.get_inherited(FA.Opt, []))
144-
sel = field.get("/V", [])
145-
if not isinstance(sel, list):
146-
sel = [sel]
147-
else: # /Tx
148-
txt = field.get("/V", "")
149-
sel = []
150-
# Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
151-
txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
152-
# Generate appearance stream
153-
ap_stream = generate_appearance_stream(
154-
txt, sel, da, font_full_rev, rct, font_height, y_offset
155-
)
148+
logger_warning(f"Font dictionary for {font_name} not found.", __name__)
149+
font_full_rev = {}
156150

157-
# Create appearance dictionary
158-
dct = DecodedStreamObject.initialize_from_dictionary(
159-
{
160-
NameObject("/Type"): NameObject("/XObject"),
161-
NameObject("/Subtype"): NameObject("/Form"),
162-
NameObject("/BBox"): rct,
163-
"__streamdata__": ByteStringObject(ap_stream),
164-
"/Length": 0,
165-
}
166-
)
167-
if AA.AP in annotation:
168-
for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items():
169-
if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
170-
dct[k] = v
151+
# Retrieve field text and selected values
152+
field_flags = field.get(FA.Ff, 0)
153+
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
154+
txt = "\n".join(annotation.get_inherited(FA.Opt, []))
155+
sel = field.get("/V", [])
156+
if not isinstance(sel, list):
157+
sel = [sel]
158+
else: # /Tx
159+
txt = field.get("/V", "")
160+
sel = []
161+
# Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
162+
txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
171163

172-
# Update Resources with font information if necessary
173-
if font_res is not None:
174-
dct[NameObject("/Resources")] = DictionaryObject(
175-
{
176-
NameObject("/Font"): DictionaryObject(
177-
{
178-
NameObject(font_name): getattr(
179-
font_res, "indirect_reference", font_res
180-
)
181-
}
182-
)
183-
}
164+
# Create the TextStreamAppearance instance
165+
new_appearance_stream = cls(
166+
txt, sel, da, font_full_rev, rct, font_height, y_offset
184167
)
185168

186-
return dct
169+
if AA.AP in annotation:
170+
for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items():
171+
if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
172+
new_appearance_stream[k] = v
173+
174+
# Update Resources with font information if necessary
175+
if font_res is not None:
176+
new_appearance_stream[NameObject("/Resources")] = DictionaryObject(
177+
{
178+
NameObject("/Font"): DictionaryObject(
179+
{
180+
NameObject(font_name): getattr(
181+
font_res, "indirect_reference", font_res
182+
)
183+
}
184+
)
185+
}
186+
)
187+
188+
return new_appearance_stream

0 commit comments

Comments
 (0)