1
1
import hashlib
2
+ import json
2
3
import pathlib
3
4
import subprocess
4
5
import sqlite_utils
5
6
import tempfile
7
+ import zlib
6
8
7
9
root = pathlib .Path (__file__ ).parent .resolve ()
8
10
TMP_PATH = pathlib .Path (tempfile .gettempdir ())
9
- SHOT_HASH_PATHS = [
10
- (root / "templates" / "row.html" ),
11
- (root / "templates" / "til_base.html" ),
12
- ]
13
11
12
+ # Change the following tuple manually any time the templates have changed
13
+ # to a point that all of the screenshots need to be re-taken
14
+ # https://github.com/simonw/til/issues/82
15
+ _decompress = lambda compressed : zlib .decompress (compressed ).decode ("utf-8" )
16
+ SHOT_HASH_ELEMENTS = (
17
+ # Compressed HTML from the last time this ran against the actual templates
18
+ # Delete this entirely - and the import zlib line - the first time
19
+ # SHOT_HASH_ELEMENTS needs to be manually invalidated.
20
+ _decompress (
21
+ b"x\x9c \xb5 VQo\xdb 6\x10 ~\xf7 \xaf \xb8 *\xc8 d\xaf \xae \x94 \xd6 \xd9 V\xb8 \xb6 "
22
+ b"\xb0 \xa0 \r \xb0 \x01 \xc5 \x1e \x96 \x02 {(\x06 \x83 \x16 i\x93 +E\xaa $\xd5 \xcc u\x0c "
23
+ b"\xec o\xec \xef \xed \x97 \xec HJ\x8e \xed \xc4 \xee \x86 ay\x88 \xa4 \xe3 w"
24
+ b"\xdf \x1d \xef \x8e \x1f \xbd >\x07 \xf6 \xbb c\x8a ZH\x9c \x90 \xb3 9\xb1 ,\xe3 \xae \x92 \t "
25
+ b"\x9c oz\xbd \xf5 9X\xe6 \x00 W,L\xc1 ~\x94 \xfd $Iz\x80 \x7f \x96 IV:\xf8 \x1a \x16 "
26
+ b"FW\x1e \x00 \xb7 \x9c \x19 \x06 5q\x1c \xa1 c\xa7 kQ\xc2 \xdd \x1d \xa4 \xb3 \xd4 ?\xc6 "
27
+ b"V6\xcb \xf0 \x9d U4\xed !\xcd \x10 \xd6 I@%c\x08 \xcf !$\x1e \x84 \x9f \xfe \xb1 \x19 \x02 %"
28
+ b"\x8e \xf8 \x84 \xa6 >5\x9b \x0c z\x98 \xd2 }F\x18 \xc5 \x9b \xdf _\xfc \n \xd1 .\x16 \xa0 "
29
+ b"t\x9b ,Z|\x96 -\xda \x10 a\x19 E\x87 \xf0 2\xbb \xbc \xb8 \xec '\xef ~|\x1b \xe0 \x0b \xdd ("
30
+ b"\x9a \x0c Z\x0e \xac \x04 \xd2 \xb4 [\x9f K]~@>'\x19 \x9a \xd6 kO\x9d \xc5 \xcf "
31
+ b"\xcd \x06 \xee \xe0 FTZ\xc1 /BJa\xb5 \xfa \xeb \x8f ?- \xab \x8d 4\xd1 y\x8f \t +m"
32
+ b'\xc8 \x8c 3B\xbd }"\x85 \xfa \x00 \x86 \xc9 ib\xdd J2\xcb \x19 s\t p\xc3 \x16 \xd3 \x84 ;W'
33
+ b"\xdb q\x9e \xfb \x88 \xd6 \x87 \xb9 m\xa3 d\x8a \xb9 \xdc :\xe2 D\x99 /\x85 "
34
+ b"\xe3 \xcd \xfc \x99 \x14 K\xee \xb2 \xd2 \xda \xa4 \xe8 M*\xe6 \x08 (R\xf9 \xa2 "
35
+ b"\xdd \n \xe7 \x98 \x19 \x97 \xc4 \xd0 \x04 J\xad \xb0 \xcf \x0e \x83 5UE\xcc j&\x89 Y\xb2 "
36
+ b"\x99 \xa8 \xc8 \x92 \x1d s4\x8c 8mv|\xbf \x8f \xb9 \x1c \xc1 \x87 \xd2 \xec \xa0 \x0f *v"
37
+ b"\xc4 \x8b 2[\x1a Q;\xa1 \xd5 C\xdf 6\xd9 \xe3 \xde 1\xff {\xbf \xae pv\x94 \x91 "
38
+ b"\x8a |\xd6 \x8a \xdc \xda \xac \xd4 \xd5 \x91 Jva\xb8 v3N,\xc7 @\xd9 o\xf5 \xf2 T\xb0 1\x91 "
39
+ b"n'\xe0 \r V\x89 )O\x00 z\x01 \x8e \x0b \x0b \xc4 `w\xe4 }Uk\xa3 kf\xdc j\x9a \xe8 "
40
+ b"\xe5 \xb8 1\xf2 \x91 tOf\x17 \xcf \xd2 f\xb3 \xcd \xd6 \x1f \xa5 \x9d \x8a \xec "
41
+ b"\xf1 \xbb U\xbd [\x8f \x93 \xb9 \xfc \xd3 \x86 \xed 9\xfd \xdb ~\xed 9\xff \xcf "
42
+ b"\xed z\x18 \xeb ?v+r\xdc \n \xea \xf8 \x0e \xcb \xcb \x8b \x8b Sp\xce \xfc \x89 \xdc \xc1 "
43
+ b"_\x06 \xfc Q]\x98 k\xba \n \x8a \xc0 \x9f \x17 \x07 -\x98 \xe4 hCh\xb4 za\xbe \xb3 d\xe1 W"
44
+ b"\xb6 \xe2 \x8c \n B\\ T\xb7 \xf8 6\xf3 \n \xd8 \xc7 \x7f \x83 {a\xec @A\x17 '|T\xfc \x1c \r "
45
+ b"\xc8 ?*\xbc \xa9 \x91 PJb\xed 4i\xa1 I\xd1 )\xe8 B\x9b \xa0 \xb7 B\xed \xd3 "
46
+ b"\xa0 \x97 \x14 \xc5 \xc4 \xd6 Du\xbe Q\xcf \x8b \x83 \xb9 \x9d \xe4 \x1e S\xc0 \x84 "
47
+ b"\xb4 \xea \xf6 \xc5 \xc1 ~P\x05 R\xc0 3h\xad A\x96 \x18 }?~\x8e \xd2 \xef \x17 1\x8b .W,"
48
+ b"\xb0 O7n3od\xb1 /\xea \x93 \xba \xcb \xb4 %I\x8a \xd7 \xf1 \xe5 \x80 \x1c yc\xe1 vmO"
49
+ b"\xc2 \x8d \x93 55m\x8b 0\x84 \xee \xbd \xf5 \xee >\xf1 z\xf8 \xaa \x12 \x94 j\xf7 \xea ~"
50
+ b"\xd7 \x1d \xc6 \xc8 ;\xc3 jIJ\xd6 \xdf \xce \x7f \x94 \xf2 0\xf9 q\xea \xfd \x01 \xc8 q<"
51
+ b"\xe6 9^\x94 _\x80 \xe1 g%\x9c \xcd \xf1 \x16 \xf3 \xc5 \xfb AXT\xed \x95 /\xda \xce "
52
+ b"\xe6 O&\x14 \xfc \xae \xa9 p\xde i\x92 \xd7 8r\x93 x\xca \x8b ^\x9e \xc3 \x15 \xa5 \xf0 "
53
+ b"IX1\xc7 n\x9c \x81 \xbf \xb7 ,^\xd9 @\xa4 \x04 \xfe \xe2 )\xf8 \x1b M\xa8 \xa5 \x05 \xe8 Q]6"
54
+ b"\x15 \xce |\xf6 \xb1 afu\x13 ~$hs%e?\xe5 /\x86 |4\xe4 \x97 C\xfe \xcd \x90 \x7f "
55
+ b"\x9b \x0e 2l\xd4 5)y\x9f \xe1 U^\xc0 \x1a ;&q\x9c \x89 *9vp\n L\xee \x93 \xf4 "
56
+ b"S\x92 \xc5 \xc5 t\xf0 \n \xc1 \xb8 \xab \xfe \x93 h\x18 \x04 o\xc0 \xf1 t\x8d Q~q\xd3 "
57
+ b'\xb2 \t \x7f ."([2w\xe5 \x9c \x11 \xf3 \xc6 \xb1 ~*h\xa4 \xc1 0\x86 U\xfa \x13 {\xcd \x85 '
58
+ b"\xa4 \xfd \x96 \xb0 ]\xb1 \x87 .Cd\x0c \x8b \x9e \xdc +\xd0 [\x7f \x89 Oa\xbb \xef 8"
59
+ b"+\xd7 \x92 \xf9 /L9\x06 \xe9 \x90 Y\xb8 \xe9 3\x87 \xbf \x04 \xde \xb0 R\x1b \xe2 U\x14 "
60
+ b"\xdd S\xa5 \x15 K\x1f A\x96 Z\x86 b\xa4 g\xf3 \xef \xe6 \xa3 \xf9 \xe8 1\xcc \x02 e\xe6 "
61
+ b"F|f\x1e v\x91 \xbd d\xd5 \x01 ho\x0f \xbe \xf3 \xb8 \x8b \xf4 ,\x85 \xa7 \xdd ^\xb6 "
62
+ b"P\xa1 \x14 3\xef 0\xb9 \x10 1m\x8b @\xea \x1a \x87 (\x96 \xe7 `\x9f \x1e \xfb \x93 \xa6 "
63
+ b"\xc8 \x0b \xe9 p\xb5 \xd3 \xe2 8qaP\x1e Rt\xd1 \x10 \xbe \x19 \xf4 P\x1b \xda 9"
64
+ b";\x90 \xc7 \xbf \x01 *r\x94 d"
65
+ ),
66
+ _decompress (
67
+ b"x\x9c }U\xeb \x8e \xe3 4\x14 \xfe \xdf \xa7 0A\xd5 NE.\xed t\xa6 \x9d \xc9 4\x15 \x88 E,"
68
+ b"\x12 \x02 \x04 #!~!\xd7 >m\xbc \xe3 \xd8 \xc1 vo\xbb \xaa \xc4 k\xf0 z<\t \xc7 q\xd2 M"
69
+ b"\x99 \xd9 i\xd5 \xa6 >=\xe7 ;\xdf \xb9 z\xf1 \xc5 \xdb \x9f \xbf }\xfc \xe3 \x97 \xef H\xe9 "
70
+ b"*\xb9 \x1c ,\xfc \x83 H\xaa 6E\x04 *\xf2 \x02 \xa0 \x1c \x1f \x15 8JXI\x8d \x05 WD[\xb7 "
71
+ b"N\xee \xa2 N\xac h\x05 E\xb4 \x13 \xb0 \xaf \xb5 q\x11 aZ9P\xa8 \xb6 \x17 \xdc \x95 "
72
+ b"\x05 \x87 \x9d `\x90 4\x87 \x98 \x08 %\x9c \xa0 2\xb1 \x8c J(&\x1e \xc4 \t 'a\xf9 qHVR"
73
+ b"\xb3 '\xd2 \x1c \xc9 \xf0 \x84 \x02 P<\xc8 \x86 \xa7 E\x16 \xd4 \x06 \x0b \xcb \x8c "
74
+ b"\xa8 \x1d \xe1 \xb0 \x06 C8u4\xe1 \xba \xa2 B\x15 \x91 \x13 2\xb5 \xa2 \xd2 j/\xa4 \x14 "
75
+ b"V\xab T\x01 \xf2 \xb1 \x86 \x15 Q\xe9 \\ m\xf3 ,\xab %\xdd Z\xb1 \x92 \x90 \n \x9d \xbd "
76
+ b"\xb7 \xbd \xf3 {\x1b -\x17 Y@G7R\xa8 'b@\x16 \x11 \x95 \x0e \x8c \xa2 \x0e \" "
77
+ b'\xe2 \x8e 5\x86 J\xeb Z\n F\x9d \xd0 *\xa3 NW_\x1d *\x19 \x05 \xde E\xf4 \r \n "R\x1a '
78
+ b"X\x17 \x11 r\x96 6[\x03 \xf0 \xd4 \xeb a\xb0 \xe7 (\xe1 \xe0 \x0c \xfd \xd3 g\xf7 Y\xa8 "
79
+ b"\x18 \xa2 ;\xfa PW\x9a \x1f \xc9 \xc7 \x01 \xc1 \xd7 \x1a s\x9a \xac i%\xe4 1'\xd1 ;\x90 ;pH"
80
+ b"\x81 \xfc \x04 [\x88 bRv\x82 \x98 X\xaa lb\xc1 \x88 \xf5 Cc\x89 q@R\x82 \xd8 \x94 .'"
81
+ b"\x93 \xf4 &H+j6B\xe5 d\x1c \x8e 5\xe5 \\ \xa8 Ms>\r \xca \xc9 K^\xbf \x07 \x8d 6"
82
+ b"\xe8 \xe1 \xcd \xa3 \xa8 \xc0 \xa2 \xeb =\xf9 \x15 3\xaf \xde \xc4 \xa4 \x91 \xa0 \xef \xe0 "
83
+ b"\xf6 4H\x99 \x01 L\x18 oqV\xda p0\x89 \xd3 5R\xa8 \x0f \xc4 j)8\xf9 \x92 1v\xe1 "
84
+ b"\xbe U\x80 \xea \xe1 \x93 w+>\x00 \xd2 J\xef \xbc \x14 \x81 QE\xb0 \x0e \x96 \xb2 \xa7 \x8d "
85
+ b"\xd1 [\xc5 \x13 \xa6 \xa5 6\xf9 s\xc8 \x9c L\xeb \xc3 C\x8f \xc4 \x05 \x81 \xf9 "
86
+ b"|\xfe \x82 \xab \xd9 m\xc7 \xa0 \xd6 V\xf8 *\xe7 \xbe \x11 \xb0 \xde ;\x08 \xf2 \x86 g2\xf1 "
87
+ b"\xc8 gJ\xb4 %\xe5 \xb0 \xb2 \t \x07 \xa6 \r \r \xa6 J\xab \xd6 \xac %\xb9 \x92 \xc8 \xdb ["
88
+ b"\xd6 \x06 Z\xa3 })\x1c $\xb6 \xa6 \x0c \x19 \xa0 8\xd9 \x1b Z{\x15 Ew}\\ *\xc5 \x06 !%"
89
+ b"\xac \xdd \xc3 \xff R\x90 7\x95 \xa6 &\xd9 \x18 \xca \x05 \xce \xdf \x95 \xd3 \x18 \xb3 "
90
+ b"\xc3 \xbe \x8b \x89 \xd9 \xac \xae &\xb7 71\x99 \x8c \xa7 \xf8 5\xbf \x1d \x91 \xf1 "
91
+ b"0\x88 \xef g1\x99 _\xa3 \xf0 \xfa ~Dn\xee [\xe9 d<\x8e \xc9 l\x8e \xe2 \xe9 x\x84 V\xe3 "
92
+ b'\xe1 \xe8 "\x86 \x86 pG\xb0 n)raq\x9c \x8e yh\xf1 \xd7 \x1b \xed \x06 \xab 0\xc6 \x8f \x7f ^'
93
+ b"\x87 \xda z(\x9a \xfb \xb9 \x8b \xdb \xdf ;\x81 \xf9 \x07 \xde \x1d K\xbd \x03 \xd3 "
94
+ b"\x1d \xd6 \x9a mmw\xa0 \xcc \x17 \xa7 \xa5 qI\xf1 \x95 \x9a \x9c \x06 \x16 \x98 "
95
+ b"\x17 \xa4 \xbd I\xfb 4\n )\xf6 A\xe0 \x16 \x02 9\x84 \x1d \x96 \x93 \xbb \xf1 8\x94 \xfe \xeb "
96
+ b"\n \xb8 \xa0 \xe4 \xaa \xf7 \xdf \xcc \xff 7j\xb1 ^\x80 \xbf t\x81 \x0e \xce \xed ~j\xbe \xfb "
97
+ b"\xf9 \xfc l\xc2 z6\xa7 \x01 N$vL\x8c a\xf3 .\x03 >Ok\xa9 \xf7 M\x1b a9p\x18 "
98
+ b"\x9f \x92 =\xf6 \xbf '\xed (n\xbb \xcb \xc1 \xc4 \x8c IZ[\xec \xbd \xee W\xa3 \x88 \xeb "
99
+ b"\xda \xf1 \xe7 i\x99 v\xde _\x98 \xa8 \xb7 S\xff \x0e \xd6 \x9f \x9f R\x80 \xc6 "
100
+ b"\xc1 V\xa6 \xcd \\ \x9d \xf7 \x04 .m\x1c C\xbf \xfc \x12 \xbf i\xfb \xb3 \xf3 "
101
+ b"\xea \xc2 \xea \x01 I\xd1 b\x05 \x83 $L\xc0 \x99 \xf6 i\xd0 \xf4 \xe6 _[\xed \xe0 B\xb1 "
102
+ b"\xd9 ;\x1d z\x9b \x16 ?f\xde r\xee \x1b \xa1 \r \xf0 \x1e f\xab \xd5 \xed 5\\ .\xae "
103
+ b'\xbe \xa6 w\x82 \xf7 HX\xe1 \x8b \xac \xbd @}\x0b \xe0 \x03 \xcb \xbb l,\x17 \xf5 rA\xbb k"Z'
104
+ b"\xfe \xe6 \xaf -\xf2 {{o\xfd \xfb \xf7 ?\x96 <\xfe \xf0 \xa3 ]d\x14 \xef \xa4 \xda \xe3 "
105
+ b'4\x96 \x8b \xb6 \xa3 \x08 \x93 \xd4 \xda "\xf2 \xa8 \xfd [\xa5 i\xb4 \xe7 \xf7 I'
106
+ b"\xd6 \x9a y\x9c \x96 H\x16 .\xfc \xff \x00 W\x90 y)"
107
+ ),
108
+ )
14
109
15
- def png_for_path (path ):
110
+
111
+ def s3_contents ():
112
+ proc = subprocess .run (
113
+ ["s3-credentials" , "list-bucket" , "til.simonwillison.net" ], capture_output = True
114
+ )
115
+ return [item ["Key" ] for item in json .loads (proc .stdout )]
116
+
117
+
118
+ def jpeg_for_path (path ):
16
119
page_html = str (TMP_PATH / "generate-screenshots-page.html" )
17
120
# Use datasette to generate HTML
18
121
proc = subprocess .run (["datasette" , "." , "--get" , path ], capture_output = True )
19
122
open (page_html , "wb" ).write (proc .stdout )
20
- # Now use puppeteer screenshot to generate a PNG
123
+ # Now use shot-scraper to generate a PNG
21
124
proc2 = subprocess .run (
22
125
[
23
- "puppeteer " ,
24
- "screenshot " ,
126
+ "shot-scraper " ,
127
+ "shot " ,
25
128
page_html ,
26
- "--viewport" ,
27
- "800x400" ,
28
- "--full-page=false" ,
129
+ "-w" ,
130
+ "800" ,
131
+ "-h" ,
132
+ "400" ,
133
+ "--retina" ,
134
+ "--quality" ,
135
+ "60" ,
136
+ "-o" ,
137
+ "-" ,
29
138
],
30
139
capture_output = True ,
31
140
)
32
- png_bytes = proc2 .stdout
33
- return png_bytes
141
+ return proc2 .stdout
34
142
35
143
36
144
def generate_screenshots (root ):
37
145
db = sqlite_utils .Database (root / "tils.db" )
38
146
39
- # The shot_hash incorporates a hash of all of row.html
147
+ # If the old 'shot' column exists, drop it
148
+ if "shot" in db ["til" ].columns_dict :
149
+ db ["til" ].transform (drop = ["shot" ])
40
150
151
+ # shot_hash incorporates a hash of key templates
41
152
shot_html_hash = hashlib .md5 ()
42
- for filepath in SHOT_HASH_PATHS :
43
- shot_html_hash .update (filepath . read_text () .encode ("utf-8" ))
153
+ for element in SHOT_HASH_ELEMENTS :
154
+ shot_html_hash .update (element .encode ("utf-8" ))
44
155
shot_html_hash = shot_html_hash .hexdigest ()
45
156
157
+ s3_keys = s3_contents ()
158
+
46
159
for row in db ["til" ].rows :
47
160
path = row ["path" ]
48
161
html = row ["html" ]
49
162
shot_hash = hashlib .md5 ((shot_html_hash + html ).encode ("utf-8" )).hexdigest ()
50
- if shot_hash != row .get ("shot_hash" ):
51
- png = png_for_path ("/{}/{}" .format (row ["topic" ], row ["slug" ]))
52
- db ["til" ].update (path , {"shot" : png , "shot_hash" : shot_hash }, alter = True )
163
+ shot_filename = "{}.jpg" .format (shot_hash )
164
+ if shot_hash != row .get ("shot_hash" ) or shot_filename not in s3_keys :
165
+ jpeg = jpeg_for_path ("/{}/{}" .format (row ["topic" ], row ["slug" ]))
166
+ db ["til" ].update (path , {"shot_hash" : shot_hash }, alter = True )
167
+ # Store it to S3
168
+ subprocess .run (
169
+ [
170
+ "s3-credentials" ,
171
+ "put-object" ,
172
+ "til.simonwillison.net" ,
173
+ shot_filename ,
174
+ "-" ,
175
+ "--content-type" ,
176
+ "image/jpeg" ,
177
+ "--silent" ,
178
+ ],
179
+ input = jpeg ,
180
+ )
53
181
print (
54
- "Got {} byte PNG for {} shot hash {}" .format (len (png ), path , shot_hash )
182
+ "Stored {} byte JPEG for {} shot hash {}" .format (
183
+ len (jpeg ), path , shot_hash
184
+ )
55
185
)
56
186
else :
57
187
print ("Skipped {} with shot hash {}" .format (path , shot_hash ))
58
188
59
189
60
190
if __name__ == "__main__" :
61
- generate_screenshots (root )
191
+ generate_screenshots (root )
0 commit comments