88 "os"
99 "os/exec"
1010 "path/filepath"
11+ "regexp"
1112 "strings"
1213 "time"
1314
@@ -106,90 +107,142 @@ func New(target *url.URL, outputDir string) *retriever {
106107}
107108
108109func (r * retriever ) checkVulnerable () error {
109- head , err := r .downloadFile ("HEAD" )
110- if err != nil {
110+ if err := r .downloadFile ("HEAD" ); err != nil {
111111 return fmt .Errorf ("%w: %s" , ErrNotVulnerable , err )
112112 }
113113
114+ filePath := filepath .Join (r .outputDir , ".git" , "HEAD" )
115+ head , err := ioutil .ReadFile (filePath )
116+ if err != nil {
117+ return err
118+ }
119+
114120 if ! strings .HasPrefix (string (head ), "ref: " ) {
115121 return ErrNotVulnerable
116122 }
117123
118124 return nil
119125}
120126
121- func (r * retriever ) downloadFile (path string ) ([]byte , error ) {
127+ func (r * retriever ) parsePackMetadata (meta []byte ) error {
128+ lines := strings .Split (string (meta ), "\n " )
129+ for _ , line := range lines {
130+ parts := strings .Split (strings .TrimSpace (line ), " " )
131+ if parts [0 ] == "P" && len (parts ) == 2 {
132+ if err := r .downloadFile (fmt .Sprintf ("objects/pack/%s" , parts [1 ])); err != nil {
133+ logrus .Debugf ("Failed to retrieve pack file %s: %s" , parts [1 ], err )
134+ }
135+ }
136+ }
137+ return nil
138+ }
139+
140+ func (r * retriever ) parsePackFile (filename string , data []byte ) error {
141+
142+ f , err := os .Open (filepath .Join (r .outputDir , ".git" , filename ))
143+ if err != nil {
144+ return err
145+ }
146+ defer func () { _ = f .Close () }()
147+
148+ cmd := exec .Command ("git" , "unpack-objects" )
149+ cmd .Stdin = f
150+ cmd .Dir = r .outputDir
151+ return cmd .Run ()
152+ }
153+
154+ func (r * retriever ) downloadFile (path string ) error {
122155
123156 path = strings .TrimSpace (path )
124157
125158 filePath := filepath .Join (r .outputDir , ".git" , path )
126159
127160 if r .downloaded [path ] {
128- return ioutil . ReadFile ( filePath )
161+ return nil
129162 }
130163 r .downloaded [path ] = true
131164
132165 relative , err := url .Parse (path )
133166 if err != nil {
134- return nil , err
167+ return err
135168 }
136169
137170 absolute := r .baseURL .ResolveReference (relative )
138171 resp , err := r .http .Get (absolute .String ())
139172 if err != nil {
140- return nil , fmt .Errorf ("failed to retrieve %s: %w" , absolute .String (), err )
173+ return fmt .Errorf ("failed to retrieve %s: %w" , absolute .String (), err )
141174 }
142175 defer func () { _ = resp .Body .Close () }()
143176
144177 if resp .StatusCode != http .StatusOK {
145- return nil , fmt .Errorf ("unexpected status code for url %s : %d" , absolute .String (), resp .StatusCode )
178+ return fmt .Errorf ("unexpected status code for url %s : %d" , absolute .String (), resp .StatusCode )
146179 }
147180
148181 content , err := ioutil .ReadAll (resp .Body )
149182 if err != nil {
150- return nil , err
183+ return err
151184 }
152185
153186 if err := os .MkdirAll (filepath .Dir (filePath ), 0755 ); err != nil {
154- return nil , err
187+ return err
155188 }
156189
157- if err := ioutil .WriteFile (filePath , content , 0640 ); err != nil {
158- return nil , fmt .Errorf ("failed to write %s: %w" , filePath , err )
190+ if ! strings .HasSuffix (path , "/" ) {
191+ if err := ioutil .WriteFile (filePath , content , 0640 ); err != nil {
192+ return fmt .Errorf ("failed to write %s: %w" , filePath , err )
193+ }
159194 }
160195
161- if path == "HEAD" {
196+ switch path {
197+ case "HEAD" :
162198 ref := strings .TrimPrefix (string (content ), "ref: " )
163- if _ , err := r .downloadFile (ref ); err != nil {
164- return nil , err
199+ if err := r .downloadFile (ref ); err != nil {
200+ return err
165201 }
166- return content , nil
202+ return nil
203+ case "config" :
204+ return r .analyseConfig (content )
205+ case "objects/pack/" :
206+ // parse the directory listing
207+ packFiles := packLinkRegex .FindAllStringSubmatch (string (content ), - 1 )
208+ for _ , packFile := range packFiles {
209+ if len (packFile ) <= 1 {
210+ continue
211+ }
212+ if err := r .downloadFile (fmt .Sprintf ("objects/pack/%s" , packFile [1 ])); err != nil {
213+ logrus .Debugf ("Failed to retrieve pack file %s: %s" , packFile [1 ], err )
214+ continue
215+ }
216+ }
217+ return nil
218+ case "objects/info/packs" :
219+ return r .parsePackMetadata (content )
167220 }
168221
169- if path == "config" {
170- return content , r . analyseConfig ( content )
222+ if strings . HasSuffix ( path , ".pack" ) {
223+ return r . parsePackFile ( path , content )
171224 }
172225
173226 if strings .HasPrefix (path , "refs/heads/" ) {
174227 if _ , err := r .downloadObject (string (content )); err != nil {
175- return nil , err
228+ return err
176229 }
177- return content , nil
230+ return nil
178231 }
179232
180233 hash := filepath .Base (filepath .Dir (path )) + filepath .Base (path )
181234
182235 objectType , err := r .getObjectType (hash )
183236 if err != nil {
184- return nil , err
237+ return err
185238 }
186239
187240 switch objectType {
188241 case GitCommitFile :
189242
190243 commit , err := r .readCommit (hash )
191244 if err != nil {
192- return nil , err
245+ return err
193246 }
194247
195248 logrus .Debugf ("Successfully retrieved commit %s." , hash )
@@ -209,7 +262,7 @@ func (r *retriever) downloadFile(path string) ([]byte, error) {
209262
210263 tree , err := r .readTree (hash )
211264 if err != nil {
212- return nil , err
265+ return err
213266 }
214267
215268 logrus .Debugf ("Successfully retrieved tree %s." , hash )
@@ -222,18 +275,18 @@ func (r *retriever) downloadFile(path string) ([]byte, error) {
222275 case GitBlobFile :
223276 logrus .Debugf ("Successfully retrieved blob %s." , hash )
224277 default :
225- return nil , fmt .Errorf ("unknown git file type for %s: %s" , path , objectType )
278+ return fmt .Errorf ("unknown git file type for %s: %s" , path , objectType )
226279 }
227280
228- return content , nil
281+ return nil
229282}
230283
231284func (r * retriever ) downloadObject (hash string ) (string , error ) {
232285
233286 logrus .Debugf ("Requesting hash [%s]\n " , hash )
234287
235288 path := fmt .Sprintf ("objects/%s/%s" , hash [:2 ], hash [2 :40 ])
236- if _ , err := r .downloadFile (path ); err != nil {
289+ if err := r .downloadFile (path ); err != nil {
237290 r .summary .MissingObjects = append (r .summary .MissingObjects , hash )
238291 return "" , err
239292 }
@@ -342,15 +395,33 @@ func (r *retriever) checkout() error {
342395
343396var ErrNoPackInfo = fmt .Errorf ("pack information (.git/objects/info/packs) is missing" )
344397
345- func (r * retriever ) handlePackFiles () error {
346- if _ , err := r .downloadFile ("objects/info/packs" ); err != nil {
398+ // e.g. href="pack-5b89658fae4313c1e25d629bfa95f809c77ff949.pack"
399+ var packLinkRegex = regexp .MustCompile ("href=[\" ']?(pack-[a-z0-9]{40}\\ .pack)" )
400+
401+ func (r * retriever ) locatePackFiles () error {
402+
403+ // first of all let's try a directory listing for all pack files
404+ _ = r .downloadFile ("objects/pack/" )
405+
406+ // otherwise hopefully the pak listing is available...
407+ if err := r .downloadFile ("objects/info/packs" ); err != nil {
347408 return ErrNoPackInfo
348409 }
349410
350- // TODO retrieve and unpack pack files...
351- // anything discovered should be removed from r.summary.MissingObjects and added to r.summary.FoundObjects
411+ // after handling pack files, let's check if anything is still missing...
412+ var newMissing []string
413+ for _ , hash := range r .summary .MissingObjects {
414+ path := filepath .Join (r .outputDir , ".git" , "objects" , hash [:2 ], hash [2 :40 ])
415+ if _ , err := os .Stat (path ); err != nil {
416+ newMissing = append (newMissing , hash )
417+ } else {
418+ r .summary .FoundObjects = append (r .summary .FoundObjects , hash )
419+ }
420+ }
352421
353- return fmt .Errorf ("unpacking pack files is not currently supported" )
422+ r .summary .MissingObjects = newMissing
423+
424+ return nil
354425}
355426
356427func (r * retriever ) Run () (* Summary , error ) {
@@ -359,21 +430,21 @@ func (r *retriever) Run() (*Summary, error) {
359430 return nil , err
360431 }
361432
362- if _ , err := r .downloadFile ("config" ); err != nil {
433+ if err := r .downloadFile ("config" ); err != nil {
363434 return nil , err
364435 }
365436
366- if _ , err := r .downloadFile ("HEAD" ); err != nil {
437+ if err := r .downloadFile ("HEAD" ); err != nil {
367438 return nil , err
368439 }
369440
370441 // common paths to check, not necessarily required
371442 for _ , path := range paths {
372- _ , _ = r .downloadFile (path )
443+ _ = r .downloadFile (path )
373444 }
374445
375446 // grab packed files
376- if err := r .handlePackFiles (); err == ErrNoPackInfo {
447+ if err := r .locatePackFiles (); err == ErrNoPackInfo {
377448 r .summary .PackInformationAvailable = false
378449 logrus .Debugf ("Pack information file is not available - some objects may be missing." )
379450 } else if err == nil {
0 commit comments