Skip to content

Commit b2152f4

Browse files
authored
feat: add more videos - update mapping_table.pbtxt (#139)
* feat: add more videos - update mapping_table.pbtxt * chore: update database * fix: only append unique arxivIDs * chore: add new mapping_table.pbtxt * chore: update database
1 parent c1c4c27 commit b2152f4

File tree

5 files changed

+51346
-17367
lines changed

5 files changed

+51346
-17367
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
/client/test/**/*.mocks.dart
55
.devcontainer/
66
/.env
7+
*.pbtxt.bak

dbctl/internal/transform/transform.go

+8-3
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,9 @@ func ExtractPaperIDsViaProgrammableSearch(title, cx, apiKey string, limiter *rat
118118
return nil, err
119119
}
120120

121-
var paperIDs []string
121+
var arxivIDs []string
122+
123+
seenArxivIDs := map[string]bool{}
122124

123125
for _, item := range do.Items {
124126
arxivID, err := ExtractArxivIDFromURL(item.Link)
@@ -128,11 +130,14 @@ func ExtractPaperIDsViaProgrammableSearch(title, cx, apiKey string, limiter *rat
128130
}
129131

130132
if arxivID != "" {
131-
paperIDs = append(paperIDs, arxivID)
133+
if !seenArxivIDs[arxivID] {
134+
seenArxivIDs[arxivID] = true
135+
arxivIDs = append(arxivIDs, arxivID)
136+
}
132137
}
133138
}
134139

135-
return paperIDs, nil
140+
return arxivIDs, nil
136141
}
137142

138143
// ExtractArxivIDFromURL extracts ArxivID from the URL.

server/internal/data/data_test.go

+23
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,36 @@ package data
22

33
import (
44
"regexp"
5+
"sort"
56
"strconv"
67
"strings"
78
"testing"
89

10+
"github.com/codingpot/pr12er/server/pkg/pr12er"
911
"github.com/stretchr/testify/assert"
12+
"google.golang.org/protobuf/encoding/prototext"
1013
)
1114

15+
func TestValidMappingTable(t *testing.T) {
16+
bs, err := pbFiles.ReadFile("mapping_table.pbtxt")
17+
assert.NoError(t, err)
18+
19+
// Unmarshalled correctly.
20+
var table pr12er.MappingTable
21+
err = prototext.Unmarshal(bs, &table)
22+
assert.NoError(t, err)
23+
24+
// Sorted by PR ID.
25+
sort.SliceIsSorted(table.GetRows(), func(i, j int) bool {
26+
return table.GetRows()[i].GetPrId() < table.GetRows()[j].GetPrId()
27+
})
28+
29+
// PR ID and Video ID must exist.
30+
for _, row := range table.GetRows() {
31+
assert.Greater(t, row.GetPrId(), int32(0))
32+
}
33+
}
34+
1235
// TestValidData validates data integrity of the database.
1336
func TestValidData(t *testing.T) {
1437
r := regexp.MustCompile(`pr-?(\d+)`)

0 commit comments

Comments
 (0)