@@ -98,6 +98,48 @@ def mkdir_with_dot_tmp(folder: Path)->Tuple[Path, Path]:
9898 shutil .rmtree (str (tmpdir ))
9999 tmpdir .mkdir (parents = True , exist_ok = True )
100100 return (folder , tmpdir )
101+
102+ def flush_package_info (content : str , package_info = {}, new_or_old = True ):
103+
104+ for pkg in content .split ('\n \n ' ):
105+ if len (pkg ) < 10 : # ignore blanks
106+ continue
107+ try :
108+ pkg_filename = pattern_package_name .search (pkg ).group (1 )
109+ pkg_size = int (pattern_package_size .search (pkg ).group (1 ))
110+ pkg_checksum = pattern_package_sha256 .search (pkg ).group (1 )
111+ if pkg_filename not in package_info :
112+ if new_or_old :
113+ pkg_info = {
114+ 'size' : pkg_size ,
115+ 'sha256' : {
116+ 'new' : pkg_checksum ,
117+ 'old' : None
118+ }
119+ }
120+ else :
121+ pkg_info = {
122+ 'size' : None ,
123+ 'sha256' : {
124+ 'new' : None ,
125+ 'old' : pkg_checksum
126+ }
127+ }
128+ else :
129+ pkg_info = package_info [pkg_filename ]
130+ if new_or_old :
131+ pkg_info ['size' ] = pkg_size
132+ pkg_info ['sha256' ]['new' ] = pkg_checksum
133+ else :
134+ pkg_info ['sha256' ]['old' ] = pkg_checksum
135+ package_info .update ({
136+ pkg_filename : pkg_info
137+ })
138+ except :
139+ print ("Failed to parse one package description" , flush = True )
140+ traceback .print_exc ()
141+ return package_info
142+ return package_info
101143
102144def move_files_in (src : Path , dst : Path ):
103145 empty = True
@@ -110,6 +152,9 @@ def move_files_in(src: Path, dst: Path):
110152 print (f"{ src } is empty" )
111153
112154def apt_mirror (base_url : str , dist : str , repo : str , arch : str , dest_base_dir : Path , deb_set : Dict [str , int ])-> int :
155+
156+ package_info = {}
157+
113158 if not dest_base_dir .is_dir ():
114159 print ("Destination directory is empty, cannot continue" )
115160 return 1
@@ -134,6 +179,7 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
134179 pkgidx_dir ,pkgidx_tmp_dir = mkdir_with_dot_tmp (comp_dir / arch_dir )
135180 with open (release_file , "r" ) as fd :
136181 pkgidx_content = None
182+ pkgidx_content_old = None
137183 cnt_start = False
138184 for line in fd :
139185 if cnt_start :
@@ -146,6 +192,8 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
146192 filename .startswith (f"Contents-{ arch } " ):
147193 fn = Path (filename )
148194 pkgidx_file = dist_dir / fn .parent / ".tmp" / fn .name
195+ if pkgidx_file .stem == 'Packages' :
196+ pkgidx_file_old = Path (f'{ dist_dir } /{ filename } ' )
149197 else :
150198 print (f"Ignore the file { filename } " )
151199 continue
@@ -176,12 +224,38 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
176224 pkgidx_content = content .decode ('utf-8' )
177225 else :
178226 print ("unsupported format" )
227+ continue
228+
229+ package_info = flush_package_info (pkgidx_content , package_info = package_info , new_or_old = True )
230+
231+ if not os .path .exists (pkgidx_file_old ):
232+ continue
233+
234+ with pkgidx_file_old .open ('rb' ) as t : content = t .read ()
235+ if pkgidx_content_old is None and pkgidx_file_old .stem == 'Packages' :
236+ print (f"getting packages index content from { pkgidx_file_old .name } " , flush = True )
237+ suffix = pkgidx_file .suffix
238+ if suffix == '.xz' :
239+ pkgidx_content_old = lzma .decompress (content ).decode ('utf-8' )
240+ elif suffix == '.bz2' :
241+ pkgidx_content_old = bz2 .decompress (content ).decode ('utf-8' )
242+ elif suffix == '.gz' :
243+ pkgidx_content_old = gzip .decompress (content ).decode ('utf-8' )
244+ elif suffix == '' :
245+ pkgidx_content_old = content .decode ('utf-8' )
246+ else :
247+ print ("unsupported format" )
248+ continue
249+
250+ package_info = flush_package_info (pkgidx_content_old , package_info = package_info , new_or_old = False )
251+
179252
180253 # Currently only support SHA-256 checksum, because
181254 # "Clients may not use the MD5Sum and SHA1 fields for security purposes, and must require a SHA256 or a SHA512 field."
182255 # from https://wiki.debian.org/DebianRepository/Format#A.22Release.22_files
183256 if line .startswith ('SHA256:' ):
184257 cnt_start = True
258+
185259 if not cnt_start :
186260 print ("Cannot find SHA-256 checksum" )
187261 return 1
@@ -216,18 +290,13 @@ def collect_tmp_dir():
216290 err = 0
217291 deb_count = 0
218292 deb_size = 0
219- for pkg in pkgidx_content .split ('\n \n ' ):
220- if len (pkg ) < 10 : # ignore blanks
221- continue
222- try :
223- pkg_filename = pattern_package_name .search (pkg ).group (1 )
224- pkg_size = int (pattern_package_size .search (pkg ).group (1 ))
225- pkg_checksum = pattern_package_sha256 .search (pkg ).group (1 )
226- except :
227- print ("Failed to parse one package description" , flush = True )
228- traceback .print_exc ()
229- err = 1
293+ for pkg_filename , pkg_info in package_info .items ():
294+ pkg_size = pkg_info ['size' ]
295+ pkg_checksum = pkg_info ['sha256' ]
296+
297+ if pkg_checksum ['new' ] is None and pkg_size is None :
230298 continue
299+
231300 deb_count += 1
232301 deb_size += pkg_size
233302
@@ -237,8 +306,8 @@ def collect_tmp_dir():
237306 dest_dir .mkdir (parents = True , exist_ok = True )
238307 if dest_filename .suffix == '.deb' :
239308 deb_set [str (dest_filename .relative_to (dest_base_dir ))] = pkg_size
240- if dest_filename .is_file () and dest_filename .stat ().st_size == pkg_size :
241- print (f"Skipping { pkg_filename } , size { pkg_size } " )
309+ if dest_filename .is_file () and dest_filename .stat ().st_size == pkg_size and pkg_checksum [ 'old' ] == pkg_checksum [ 'new' ] :
310+ print (f"Skipping { pkg_filename } , size { pkg_size } , sha256 { pkg_checksum [ 'new' ] } " )
242311 continue
243312
244313 pkg_url = f"{ base_url } /{ pkg_filename } "
@@ -253,8 +322,8 @@ def collect_tmp_dir():
253322 with dest_tmp_filename .open ("rb" ) as f :
254323 for block in iter (lambda : f .read (1024 ** 2 ), b"" ):
255324 sha .update (block )
256- if sha .hexdigest () != pkg_checksum :
257- print (f"Invalid checksum of { dest_filename } , expected { pkg_checksum } " )
325+ if sha .hexdigest () != pkg_checksum [ 'new' ] :
326+ print (f"Invalid checksum of { dest_filename } , expected { pkg_checksum [ 'new' ] } " )
258327 dest_tmp_filename .unlink ()
259328 continue
260329 dest_tmp_filename .rename (dest_filename )
0 commit comments