|
1 | | -from typing import Any, cast |
| 1 | +from typing import Any, Optional, Union, cast |
2 | 2 |
|
3 | 3 | from .._cmap import _default_fonts_space_width, build_char_map_from_dict |
4 | | -from .._page import PageObject |
5 | 4 | from .._utils import logger_warning |
6 | 5 | from ..constants import AnnotationDictionaryAttributes as AA |
7 | 6 | from ..constants import FieldDictionaryAttributes as FA |
8 | 7 | from ..generic import ( |
9 | 8 | DecodedStreamObject, |
10 | 9 | DictionaryObject, |
11 | 10 | NameObject, |
| 11 | + NumberObject, |
12 | 12 | RectangleObject, |
13 | | - StreamObject, |
14 | 13 | ) |
15 | 14 | from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none |
16 | 15 |
|
17 | 16 | DEFAULT_FONT_HEIGHT_IN_MULTILINE = 12 |
18 | 17 |
|
19 | 18 |
|
20 | | -def generate_appearance_stream( |
21 | | - txt: str, |
22 | | - sel: list[str], |
23 | | - da: str, |
24 | | - font_full_rev: dict[str, bytes], |
25 | | - rct: RectangleObject, |
26 | | - font_height: float, |
27 | | - y_offset: float, |
28 | | -) -> bytes: |
29 | | - ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode() |
30 | | - for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")): |
31 | | - if line in sel: |
32 | | - # may be improved but cannot find how to get fill working => replaced with lined box |
33 | | - ap_stream += ( |
34 | | - f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n" |
35 | | - f"0.5 0.5 0.5 rg s\n{da}\n" |
36 | | - ).encode() |
37 | | - if line_number == 0: |
38 | | - ap_stream += f"2 {y_offset} Td\n".encode() |
39 | | - else: |
40 | | - # Td is a relative translation |
41 | | - ap_stream += f"0 {- font_height * 1.4} Td\n".encode() |
42 | | - enc_line: list[bytes] = [ |
43 | | - font_full_rev.get(c, c.encode("utf-16-be")) for c in line |
44 | | - ] |
45 | | - if any(len(c) >= 2 for c in enc_line): |
46 | | - ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n" |
47 | | - else: |
48 | | - ap_stream += b"(" + b"".join(enc_line) + b") Tj\n" |
49 | | - ap_stream += b"ET\nQ\nEMC\nQ\n" |
50 | | - return ap_stream |
| 19 | +class TextStreamAppearance(DecodedStreamObject): |
| 20 | + """ |
| 21 | + A class representing the appearance stream for a text-based form field. |
| 22 | + This class is similar in form to the FreeText class in pypdf. |
| 23 | + """ |
| 24 | + |
| 25 | + def __init__( |
| 26 | + self, |
| 27 | + txt: str = "", |
| 28 | + sel: Optional[list[str]] = None, |
| 29 | + da: str = "", |
| 30 | + font_full_rev: Optional[dict[str, bytes]] = None, |
| 31 | + rct: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0), |
| 32 | + font_height: float = 0, |
| 33 | + y_offset: float = 0, |
| 34 | + ) -> None: |
| 35 | + sel = sel or [""] |
| 36 | + font_full_rev = font_full_rev or {} |
| 37 | + if isinstance(rct, tuple): |
| 38 | + rct = RectangleObject(rct) |
| 39 | + ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode() |
| 40 | + for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")): |
| 41 | + if line in sel: |
| 42 | + # may be improved but cannot find how to get fill working => replaced with lined box |
| 43 | + ap_stream += ( |
| 44 | + f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n" |
| 45 | + f"0.5 0.5 0.5 rg s\n{da}\n" |
| 46 | + ).encode() |
| 47 | + if line_number == 0: |
| 48 | + ap_stream += f"2 {y_offset} Td\n".encode() |
| 49 | + else: |
| 50 | + # Td is a relative translation |
| 51 | + ap_stream += f"0 {-font_height * 1.4} Td\n".encode() |
| 52 | + enc_line: list[bytes] = [ |
| 53 | + font_full_rev.get(c, c.encode("utf-16-be")) for c in line |
| 54 | + ] |
| 55 | + if any(len(c) >= 2 for c in enc_line): |
| 56 | + ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n" |
| 57 | + else: |
| 58 | + ap_stream += b"(" + b"".join(enc_line) + b") Tj\n" |
| 59 | + ap_stream += b"ET\nQ\nEMC\nQ\n" |
51 | 60 |
|
| 61 | + super().__init__() |
| 62 | + self[NameObject("/Type")] = NameObject("/XObject") |
| 63 | + self[NameObject("/Subtype")] = NameObject("/Form") |
| 64 | + self[NameObject("/BBox")] = rct |
| 65 | + self.set_data(ByteStringObject(ap_stream)) |
| 66 | + self[NameObject("/Length")] = NumberObject(len(ap_stream)) |
52 | 67 |
|
53 | | -def update_field_annotation( |
54 | | - af: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM]) |
55 | | - page: PageObject, |
56 | | - field: DictionaryObject, |
57 | | - annotation: DictionaryObject, |
58 | | - font_name: str = "", |
59 | | - font_size: float = -1, |
60 | | -) -> StreamObject: |
61 | | - # Calculate rectangle dimensions |
62 | | - _rct = cast(RectangleObject, annotation[AA.Rect]) |
63 | | - rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1]))) |
| 68 | + @classmethod |
| 69 | + def from_text_annotation( |
| 70 | + cls, |
| 71 | + af: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM]) |
| 72 | + field: DictionaryObject, |
| 73 | + annotation: DictionaryObject, |
| 74 | + font_name: str = "", |
| 75 | + font_size: float = -1, |
| 76 | + ) -> "TextStreamAppearance": |
| 77 | + """Creates a TextStreamAppearance object from a given text field annotation.""" |
| 78 | + # Calculate rectangle dimensions |
| 79 | + _rct = cast(RectangleObject, annotation[AA.Rect]) |
| 80 | + rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1]))) |
64 | 81 |
|
65 | | - # Extract font information |
66 | | - da = annotation.get_inherited( |
67 | | - AA.DA, |
68 | | - af.get( |
69 | | - AA.DA, None |
70 | | - ), |
71 | | - ) |
72 | | - if da is None: |
73 | | - da = TextStringObject("/Helv 0 Tf 0 g") |
74 | | - else: |
75 | | - da = da.get_object() |
76 | | - font_properties = da.replace("\n", " ").replace("\r", " ").split(" ") |
77 | | - font_properties = [x for x in font_properties if x != ""] |
78 | | - if font_name: |
79 | | - font_properties[font_properties.index("Tf") - 2] = font_name |
80 | | - else: |
81 | | - font_name = font_properties[font_properties.index("Tf") - 2] |
82 | | - font_height = ( |
83 | | - font_size |
84 | | - if font_size >= 0 |
85 | | - else float(font_properties[font_properties.index("Tf") - 1]) |
86 | | - ) |
87 | | - if font_height == 0: |
88 | | - if field.get(FA.Ff, 0) & FA.FfBits.Multiline: |
89 | | - font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE |
| 82 | + # Extract font information |
| 83 | + da = annotation.get_inherited( |
| 84 | + AA.DA, |
| 85 | + af.get(AA.DA, None), |
| 86 | + ) |
| 87 | + if da is None: |
| 88 | + da = TextStringObject("/Helv 0 Tf 0 g") |
| 89 | + else: |
| 90 | + da = da.get_object() |
| 91 | + font_properties = da.replace("\n", " ").replace("\r", " ").split(" ") |
| 92 | + font_properties = [x for x in font_properties if x != ""] |
| 93 | + if font_name: |
| 94 | + font_properties[font_properties.index("Tf") - 2] = font_name |
90 | 95 | else: |
91 | | - font_height = rct.height - 2 |
92 | | - font_properties[font_properties.index("Tf") - 1] = str(font_height) |
93 | | - da = " ".join(font_properties) |
94 | | - y_offset = rct.height - 1 - font_height |
| 96 | + font_name = font_properties[font_properties.index("Tf") - 2] |
| 97 | + font_height = ( |
| 98 | + font_size |
| 99 | + if font_size >= 0 |
| 100 | + else float(font_properties[font_properties.index("Tf") - 1]) |
| 101 | + ) |
| 102 | + if font_height == 0: |
| 103 | + if field.get(FA.Ff, 0) & FA.FfBits.Multiline: |
| 104 | + font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE |
| 105 | + else: |
| 106 | + font_height = rct.height - 2 |
| 107 | + font_properties[font_properties.index("Tf") - 1] = str(font_height) |
| 108 | + da = " ".join(font_properties) |
| 109 | + y_offset = rct.height - 1 - font_height |
95 | 110 |
|
96 | | - # Retrieve font information from local DR ... |
97 | | - dr: Any = cast( |
98 | | - DictionaryObject, |
99 | | - cast( |
| 111 | + # Retrieve font information from local DR ... |
| 112 | + dr: Any = cast( |
100 | 113 | DictionaryObject, |
101 | | - annotation.get_inherited( |
102 | | - "/DR", |
103 | | - af.get("/DR", DictionaryObject()), |
104 | | - ), |
105 | | - ).get_object(), |
106 | | - ) |
107 | | - dr = dr.get("/Font", DictionaryObject()).get_object() |
108 | | - # _default_fonts_space_width keys is the list of Standard fonts |
109 | | - if font_name not in dr and font_name not in _default_fonts_space_width: |
110 | | - # ...or AcroForm dictionary |
111 | | - dr = cast( |
112 | | - dict[Any, Any], |
113 | | - af.get("/DR", {}), |
114 | | - ) |
115 | | - dr = dr.get_object().get("/Font", DictionaryObject()).get_object() |
116 | | - font_res = dr.get(font_name, None) |
117 | | - if not is_null_or_none(font_res): |
118 | | - font_res = cast(DictionaryObject, font_res.get_object()) |
119 | | - _font_subtype, _, font_encoding, font_map = build_char_map_from_dict( |
120 | | - 200, font_res |
| 114 | + cast( |
| 115 | + DictionaryObject, |
| 116 | + annotation.get_inherited( |
| 117 | + "/DR", |
| 118 | + af.get("/DR", DictionaryObject()), |
| 119 | + ), |
| 120 | + ).get_object(), |
121 | 121 | ) |
122 | | - try: # remove width stored in -1 key |
123 | | - del font_map[-1] |
124 | | - except KeyError: |
125 | | - pass |
126 | | - font_full_rev: dict[str, bytes] |
127 | | - if isinstance(font_encoding, str): |
128 | | - font_full_rev = { |
129 | | - v: k.encode(font_encoding) for k, v in font_map.items() |
130 | | - } |
| 122 | + dr = dr.get("/Font", DictionaryObject()).get_object() |
| 123 | + # _default_fonts_space_width keys is the list of Standard fonts |
| 124 | + if font_name not in dr and font_name not in _default_fonts_space_width: |
| 125 | + # ...or AcroForm dictionary |
| 126 | + dr = cast( |
| 127 | + dict[Any, Any], |
| 128 | + af.get("/DR", {}), |
| 129 | + ) |
| 130 | + dr = dr.get_object().get("/Font", DictionaryObject()).get_object() |
| 131 | + font_res = dr.get(font_name, None) |
| 132 | + if not is_null_or_none(font_res): |
| 133 | + font_res = cast(DictionaryObject, font_res.get_object()) |
| 134 | + _font_subtype, _, font_encoding, font_map = build_char_map_from_dict( |
| 135 | + 200, font_res |
| 136 | + ) |
| 137 | + try: # remove width stored in -1 key |
| 138 | + del font_map[-1] |
| 139 | + except KeyError: |
| 140 | + pass |
| 141 | + font_full_rev: dict[str, bytes] |
| 142 | + if isinstance(font_encoding, str): |
| 143 | + font_full_rev = { |
| 144 | + v: k.encode(font_encoding) for k, v in font_map.items() |
| 145 | + } |
| 146 | + else: |
| 147 | + font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()} |
| 148 | + font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()} |
| 149 | + for key, value in font_map.items(): |
| 150 | + font_full_rev[value] = font_encoding_rev.get(key, key) |
131 | 151 | else: |
132 | | - font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()} |
133 | | - font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()} |
134 | | - for key, value in font_map.items(): |
135 | | - font_full_rev[value] = font_encoding_rev.get(key, key) |
136 | | - else: |
137 | | - logger_warning(f"Font dictionary for {font_name} not found.", __name__) |
138 | | - font_full_rev = {} |
139 | | - |
140 | | - # Retrieve field text and selected values |
141 | | - field_flags = field.get(FA.Ff, 0) |
142 | | - if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0: |
143 | | - txt = "\n".join(annotation.get_inherited(FA.Opt, [])) |
144 | | - sel = field.get("/V", []) |
145 | | - if not isinstance(sel, list): |
146 | | - sel = [sel] |
147 | | - else: # /Tx |
148 | | - txt = field.get("/V", "") |
149 | | - sel = [] |
150 | | - # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings) |
151 | | - txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)") |
152 | | - # Generate appearance stream |
153 | | - ap_stream = generate_appearance_stream( |
154 | | - txt, sel, da, font_full_rev, rct, font_height, y_offset |
155 | | - ) |
| 152 | + logger_warning(f"Font dictionary for {font_name} not found.", __name__) |
| 153 | + font_full_rev = {} |
156 | 154 |
|
157 | | - # Create appearance dictionary |
158 | | - dct = DecodedStreamObject.initialize_from_dictionary( |
159 | | - { |
160 | | - NameObject("/Type"): NameObject("/XObject"), |
161 | | - NameObject("/Subtype"): NameObject("/Form"), |
162 | | - NameObject("/BBox"): rct, |
163 | | - "__streamdata__": ByteStringObject(ap_stream), |
164 | | - "/Length": 0, |
165 | | - } |
166 | | - ) |
167 | | - if AA.AP in annotation: |
168 | | - for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items(): |
169 | | - if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}: |
170 | | - dct[k] = v |
| 155 | + # Retrieve field text and selected values |
| 156 | + field_flags = field.get(FA.Ff, 0) |
| 157 | + if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0: |
| 158 | + txt = "\n".join(annotation.get_inherited(FA.Opt, [])) |
| 159 | + sel = field.get("/V", []) |
| 160 | + if not isinstance(sel, list): |
| 161 | + sel = [sel] |
| 162 | + else: # /Tx |
| 163 | + txt = field.get("/V", "") |
| 164 | + sel = [] |
| 165 | + # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings) |
| 166 | + txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)") |
171 | 167 |
|
172 | | - # Update Resources with font information if necessary |
173 | | - if font_res is not None: |
174 | | - dct[NameObject("/Resources")] = DictionaryObject( |
175 | | - { |
176 | | - NameObject("/Font"): DictionaryObject( |
177 | | - { |
178 | | - NameObject(font_name): getattr( |
179 | | - font_res, "indirect_reference", font_res |
180 | | - ) |
181 | | - } |
182 | | - ) |
183 | | - } |
| 168 | + # Create the TextStreamAppearance instance |
| 169 | + new_appearance_stream = cls( |
| 170 | + txt, sel, da, font_full_rev, rct, font_height, y_offset |
184 | 171 | ) |
185 | 172 |
|
186 | | - return dct |
| 173 | + if AA.AP in annotation: |
| 174 | + for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items(): |
| 175 | + if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}: |
| 176 | + new_appearance_stream[k] = v |
| 177 | + |
| 178 | + # Update Resources with font information if necessary |
| 179 | + if font_res is not None: |
| 180 | + new_appearance_stream[NameObject("/Resources")] = DictionaryObject( |
| 181 | + { |
| 182 | + NameObject("/Font"): DictionaryObject( |
| 183 | + { |
| 184 | + NameObject(font_name): getattr( |
| 185 | + font_res, "indirect_reference", font_res |
| 186 | + ) |
| 187 | + } |
| 188 | + ) |
| 189 | + } |
| 190 | + ) |
| 191 | + |
| 192 | + return new_appearance_stream |
0 commit comments