Skip to content

Commit 2aac240

Browse files
committed
chore: improve storage size of symbol server data (postgres)
1 parent 58302f0 commit 2aac240

File tree

5 files changed

+328
-70
lines changed

5 files changed

+328
-70
lines changed

.cursorignore

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# ipsw files
2+
*.ipsw
3+
*.zip
4+
*.im4p
5+
*.img4
6+
*_Restore
7+
usr
8+
*.xz
9+
*.bin
10+
*.s
11+
.DS_Store
12+
*.dmg
13+
*.aea
14+
*.signature
15+
16+
# Repo
17+
*.bu
18+
dist/
19+
test-caches/
20+
completions/
21+
manpages/
22+
*.txt
23+
*.swift
24+
*.prof
25+
*.download
26+
checksums.txt.sha1
27+
mg.*
28+
btm.go
29+
30+
# Docs
31+
public
32+
site/
33+
www/docs/static/releases*.json
34+
35+
# Other
36+
*.unstripped
37+
*.bndb
38+
*.lock
39+
internal/commands/ida/dscu/data/dscu.py
40+
venv/
41+
42+
# Unicorn EMU
43+
unicorn2/
44+
45+
.vscode/
46+
test-caches/
47+
dist/

internal/db/postgres.go

+215-4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"github.com/blacktop/ipsw/internal/model"
88
"gorm.io/driver/postgres"
99
"gorm.io/gorm"
10+
"gorm.io/gorm/clause"
1011
)
1112

1213
// Postgres is a database that stores data in a Postgres database.
@@ -58,14 +59,16 @@ func (p *Postgres) Connect() (err error) {
5859
&model.Kernelcache{},
5960
&model.DyldSharedCache{},
6061
&model.Macho{},
62+
&model.Path{},
6163
&model.Symbol{},
64+
&model.Name{},
6265
)
6366
}
6467

6568
// Create creates a new entry in the database.
6669
// It returns ErrAlreadyExists if the key already exists.
6770
func (p *Postgres) Create(value any) error {
68-
if result := p.db.Create(value); result.Error != nil {
71+
if result := p.db.FirstOrCreate(value); result.Error != nil {
6972
return result.Error
7073
}
7174
return nil
@@ -171,15 +174,223 @@ func (p *Postgres) GetSymbols(uuid string) ([]*model.Symbol, error) {
171174
return syms, nil
172175
}
173176

174-
// Set sets the value for the given key.
177+
// Save sets the value for the given key.
175178
// It overwrites any previous value for that key.
176179
func (p *Postgres) Save(value any) error {
177-
if result := p.db.Save(value); result.Error != nil {
178-
return result.Error
180+
if ipsw, ok := value.(*model.Ipsw); ok {
181+
// Start transaction
182+
return p.db.Transaction(func(tx *gorm.DB) error {
183+
// Defer foreign key checks
184+
// if err := tx.Exec("SET CONSTRAINTS ALL DEFERRED").Error; err != nil {
185+
// return err
186+
// }
187+
// Process Paths
188+
if err := p.processPaths(tx, ipsw); err != nil {
189+
return err
190+
}
191+
// Process Names
192+
if err := p.processNames(tx, ipsw); err != nil {
193+
return err
194+
}
195+
// Save the main IPSW entry
196+
if err := tx.Save(ipsw).Error; err != nil {
197+
return fmt.Errorf("failed to save IPSW: %w", err)
198+
}
199+
200+
return nil
201+
})
202+
}
203+
return fmt.Errorf("invalid value type: %T", value)
204+
}
205+
206+
func (p *Postgres) processPaths(tx *gorm.DB, ipsw *model.Ipsw) error {
207+
uniquePaths := make(map[string]struct{})
208+
209+
// Collect unique paths
210+
for _, kernel := range ipsw.Kernels {
211+
for _, kext := range kernel.Kexts {
212+
uniquePaths[kext.Path.Path] = struct{}{}
213+
}
214+
}
215+
for _, dsc := range ipsw.DSCs {
216+
for _, img := range dsc.Images {
217+
uniquePaths[img.Path.Path] = struct{}{}
218+
}
179219
}
220+
for _, fs := range ipsw.FileSystem {
221+
uniquePaths[fs.Path.Path] = struct{}{}
222+
}
223+
224+
// Process paths in batches
225+
paths := make([]string, 0, len(uniquePaths))
226+
for path := range uniquePaths {
227+
paths = append(paths, path)
228+
}
229+
230+
for i := 0; i < len(paths); i += p.BatchSize {
231+
end := i + p.BatchSize
232+
if end > len(paths) {
233+
end = len(paths)
234+
}
235+
batch := paths[i:end]
236+
237+
// Bulk create or get Paths
238+
if err := tx.Clauses(clause.OnConflict{
239+
Columns: []clause.Column{{Name: "path"}},
240+
DoNothing: true,
241+
}).Create(convertToPaths(batch)).Error; err != nil {
242+
return fmt.Errorf("failed to create paths: %w", err)
243+
}
244+
}
245+
246+
// Fetch all created/existing Paths in batches
247+
var allPaths []model.Path
248+
for i := 0; i < len(paths); i += p.BatchSize {
249+
end := i + p.BatchSize
250+
if end > len(paths) {
251+
end = len(paths)
252+
}
253+
batch := paths[i:end]
254+
255+
var batchPaths []model.Path
256+
if err := tx.Where("path IN ?", batch).Find(&batchPaths).Error; err != nil {
257+
return fmt.Errorf("failed to fetch paths: %w", err)
258+
}
259+
allPaths = append(allPaths, batchPaths...)
260+
}
261+
262+
// Create a map for quick Path lookup
263+
pathMap := make(map[string]*model.Path)
264+
for i := range allPaths {
265+
pathMap[allPaths[i].Path] = &allPaths[i]
266+
}
267+
268+
// Update Path IDs
269+
for _, kernel := range ipsw.Kernels {
270+
for _, kext := range kernel.Kexts {
271+
kext.Path.ID = pathMap[kext.GetPath()].ID
272+
}
273+
}
274+
for _, dsc := range ipsw.DSCs {
275+
for _, img := range dsc.Images {
276+
img.Path.ID = pathMap[img.GetPath()].ID
277+
}
278+
}
279+
for _, fs := range ipsw.FileSystem {
280+
fs.Path.ID = pathMap[fs.GetPath()].ID
281+
}
282+
180283
return nil
181284
}
182285

286+
func (p *Postgres) processNames(tx *gorm.DB, ipsw *model.Ipsw) error {
287+
uniqueNames := make(map[string]struct{})
288+
289+
// Collect unique names
290+
for _, kernel := range ipsw.Kernels {
291+
for _, kext := range kernel.Kexts {
292+
for _, sym := range kext.Symbols {
293+
uniqueNames[sym.GetName()] = struct{}{}
294+
}
295+
}
296+
}
297+
for _, dsc := range ipsw.DSCs {
298+
for _, img := range dsc.Images {
299+
for _, sym := range img.Symbols {
300+
uniqueNames[sym.GetName()] = struct{}{}
301+
}
302+
}
303+
}
304+
for _, fs := range ipsw.FileSystem {
305+
for _, sym := range fs.Symbols {
306+
uniqueNames[sym.GetName()] = struct{}{}
307+
}
308+
}
309+
310+
// Process names in batches
311+
names := make([]string, 0, len(uniqueNames))
312+
for name := range uniqueNames {
313+
names = append(names, name)
314+
}
315+
316+
for i := 0; i < len(names); i += p.BatchSize {
317+
end := i + p.BatchSize
318+
if end > len(names) {
319+
end = len(names)
320+
}
321+
batch := names[i:end]
322+
323+
// Bulk create or get Names
324+
if err := tx.Clauses(clause.OnConflict{
325+
Columns: []clause.Column{{Name: "name"}},
326+
DoNothing: true,
327+
}).Create(convertToNames(batch)).Error; err != nil {
328+
return fmt.Errorf("failed to create names: %w", err)
329+
}
330+
}
331+
332+
// Fetch all created/existing Names in batches
333+
var allNames []model.Name
334+
for i := 0; i < len(names); i += p.BatchSize {
335+
end := i + p.BatchSize
336+
if end > len(names) {
337+
end = len(names)
338+
}
339+
batch := names[i:end]
340+
341+
var batchNames []model.Name
342+
if err := tx.Where("name IN ?", batch).Find(&batchNames).Error; err != nil {
343+
return fmt.Errorf("failed to fetch names: %w", err)
344+
}
345+
allNames = append(allNames, batchNames...)
346+
}
347+
348+
// Create a map for quick Name lookup
349+
nameMap := make(map[string]*model.Name)
350+
for i := range allNames {
351+
nameMap[allNames[i].Name] = &allNames[i]
352+
}
353+
354+
// Update Symbols with Name IDs
355+
for _, kernel := range ipsw.Kernels {
356+
for _, kext := range kernel.Kexts {
357+
for _, sym := range kext.Symbols {
358+
sym.Name.ID = nameMap[sym.GetName()].ID
359+
}
360+
}
361+
}
362+
for _, dsc := range ipsw.DSCs {
363+
for _, img := range dsc.Images {
364+
for _, sym := range img.Symbols {
365+
sym.Name.ID = nameMap[sym.GetName()].ID
366+
}
367+
}
368+
}
369+
for _, fs := range ipsw.FileSystem {
370+
for _, sym := range fs.Symbols {
371+
sym.Name.ID = nameMap[sym.GetName()].ID
372+
}
373+
}
374+
375+
return nil
376+
}
377+
378+
func convertToPaths(paths []string) []model.Path {
379+
result := make([]model.Path, len(paths))
380+
for i, path := range paths {
381+
result[i] = model.Path{Path: path}
382+
}
383+
return result
384+
}
385+
386+
func convertToNames(names []string) []model.Name {
387+
result := make([]model.Name, len(names))
388+
for i, name := range names {
389+
result[i] = model.Name{Name: name}
390+
}
391+
return result
392+
}
393+
183394
// Delete removes the given key.
184395
// It returns ErrNotFound if the key does not exist.
185396
func (p *Postgres) Delete(key string) error {

internal/model/model.go

+25-14
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ type Ipsw struct {
2222
Version string `json:"version,omitempty"`
2323
BuildID string `json:"buildid,omitempty"`
2424
Devices []*Device `gorm:"many2many:ipsw_devices;" json:"devices,omitempty"`
25-
Date time.Time `json:"date,omitempty"`
2625
Kernels []*Kernelcache `gorm:"many2many:ipsw_kernels;" json:"kernels,omitempty"`
2726
DSCs []*DyldSharedCache `gorm:"many2many:ipsw_dscs;" json:"dscs,omitempty"`
2827
FileSystem []*Macho `gorm:"many2many:ipsw_files;" json:"file_system,omitempty"`
@@ -34,10 +33,6 @@ type Ipsw struct {
3433

3534
type Device struct {
3635
Name string `gorm:"primaryKey" json:"name"`
37-
38-
CreatedAt time.Time
39-
UpdatedAt time.Time
40-
DeletedAt gorm.DeletedAt `gorm:"index"`
4136
}
4237

4338
// Kernelcache is the model for a kernelcache.
@@ -62,29 +57,45 @@ type DyldSharedCache struct {
6257
Images []*Macho `gorm:"many2many:dsc_images;" json:"images,omitempty"`
6358
}
6459

60+
type Path struct {
61+
ID uint `gorm:"primaryKey"`
62+
Path string `gorm:"uniqueIndex" json:"name,omitempty"`
63+
}
64+
6565
type Macho struct {
6666
UUID string `gorm:"primaryKey" json:"uuid"`
67-
CreatedAt time.Time
68-
UpdatedAt time.Time
69-
DeletedAt gorm.DeletedAt `gorm:"index"`
70-
71-
Name string `json:"name,omitempty"`
67+
PathID uint
68+
Path Path `gorm:"foreignKey:PathID"`
7269
TextStart uint64 `gorm:"type:bigint" json:"text_start,omitempty"`
7370
TextEnd uint64 `gorm:"type:bigint" json:"text_end,omitempty"`
7471
Symbols []*Symbol `gorm:"many2many:macho_syms;"`
7572
}
7673

74+
func (m Macho) GetPath() string {
75+
return m.Path.Path
76+
}
77+
78+
type Name struct {
79+
ID uint `gorm:"primaryKey"`
80+
Name string `gorm:"uniqueIndex" json:"name,omitempty"`
81+
}
82+
7783
// swagger:model
7884
type Symbol struct {
7985
// swagger:ignore
80-
gorm.Model
81-
Symbol string `json:"symbol"`
86+
ID uint `gorm:"primaryKey"`
87+
NameID uint
88+
Name Name `gorm:"foreignKey:NameID"`
8289
Start uint64 `gorm:"type:bigint" json:"start"`
8390
End uint64 `gorm:"type:bigint" json:"end"`
8491
}
8592

93+
func (s Symbol) GetName() string {
94+
return s.Name.Name
95+
}
96+
8697
func (s Symbol) String() string {
87-
return fmt.Sprintf("%#x: %s", s.Start, s.Symbol)
98+
return fmt.Sprintf("%#x: %s", s.Start, s.Name.Name)
8899
}
89100

90101
func (s Symbol) MarshalJSON() ([]byte, error) {
@@ -93,7 +104,7 @@ func (s Symbol) MarshalJSON() ([]byte, error) {
93104
Start uint64 `json:"start"`
94105
End uint64 `json:"end"`
95106
}{
96-
Symbol: s.Symbol,
107+
Symbol: s.Name.Name,
97108
Start: s.Start,
98109
End: s.End,
99110
})

0 commit comments

Comments
 (0)