@@ -23,31 +23,30 @@ pp <- function(percentage, accuracy = 0.01) {
23
23
# Extracting All GNDs of vd17
24
24
``` {r}
25
25
all_gnd_vd17 <- vd17_a %>%
26
- filter(subfield_code== "7")%>%
27
- select(record_number,field_code, value)%>%
28
- mutate(GND= value)%>%
29
- distinct(GND)%>%
26
+ filter(subfield_code == "7") %>%
27
+ select(record_number, field_code, value) %>%
28
+ mutate(GND = value) %>%
29
+ distinct(GND) %>%
30
30
collect()
31
-
32
31
```
33
32
34
33
``` {r install_python_packages}
35
34
reticulate::py_install("pandas")
36
35
```
37
36
# Downloading the xml files of authority records
38
37
``` {python}
39
- import urllib.request
38
+ import urllib.request
40
39
41
40
ids=[]
42
41
43
42
j=0
44
43
for id in r.all_gnd_vd17['GND']:
45
-
44
+
46
45
if j%10000==0:
47
46
print(j)
48
47
try:
49
48
result=urllib.request.urlretrieve("http://d-nb.info/"+id+"/about/marcxml", "data/work/vd17"+id+".xml")
50
-
49
+
51
50
except:
52
51
ids.append(id)
53
52
pass
@@ -56,62 +55,61 @@ for id in r.all_gnd_vd17['GND']:
56
55
```
57
56
# Checking to not miss any files regarding connection interrupt
58
57
``` {r}
59
- list <- list.files(path= "data/work/vd17")
58
+ list <- list.files(path = "data/work/vd17")
60
59
61
- list_gnd= as.list(all_gnd_vd17$GND)
60
+ list_gnd <- as.list(all_gnd_vd17$GND)
62
61
for (i in list_gnd)
63
62
{
64
- j <- paste(i,".xml",sep= "")
63
+ j <- paste(i, ".xml", sep = "")
65
64
result <- j %in% list
66
- if (result==FALSE)
67
- {print(j)}
65
+ if (result == FALSE) {
66
+ print(j)
67
+ }
68
68
}
69
-
70
69
```
71
70
# Checking the field_codes and sub_field_codes regarding GND and gender
72
71
``` {r}
73
- all_gnd_vd17$GND<- gsub("gnd/","",as.character(all_gnd_vd17$GND))
72
+ all_gnd_vd17$GND <- gsub("gnd/", "", as.character(all_gnd_vd17$GND))
74
73
gnd_authority <- read_tsv(here("vd17_auth.tsv.gz"), lazy = TRUE)
75
74
authority_gnd_list <- gnd_authority[gnd_authority$value %in% all_gnd_vd17$GND, , drop = FALSE] %>%
76
75
distinct(value)
77
76
gnd_authority_filter <- gnd_authority %>%
78
- filter(subfield_code== "a",field_code %in% c("024","375"))
77
+ filter(subfield_code == "a", field_code %in% c("024", "375"))
79
78
```
80
79
81
80
82
81
# For some GNDs, there is not field_code "375", so gender is not specified.
83
82
``` {r warning=FALSE}
84
- columns= c("GND","gender")
83
+ columns <- c("GND", "gender")
84
+
85
+ genders <- data.frame(matrix(nrow = 0, ncol = length(columns)))
86
+
87
+ colnames(genders) <- columns
85
88
86
- genders = data.frame(matrix(nrow = 0, ncol = length(columns)))
87
-
88
- colnames(genders) = columns
89
-
90
89
91
90
j <- 1
92
91
list_gnd_auth <- as.list(authority_gnd_list$value)
93
92
for (i in list_gnd_auth)
94
93
{
95
- gen=""
96
- record1 <- gnd_authority_filter[(gnd_authority_filter$value==i&gnd_authority_filter$field_code=="024"&gnd_authority_filter$subfield_code=="a"),]
97
- if (length(rownames(record1))>0)
98
- {rn <- record1$record_number
99
- record2 <- gnd_authority_filter[(gnd_authority_filter$record_number==rn&gnd_authority_filter$field_code=="375"&gnd_authority_filter$subfield_code=="a"),]
100
- if (length(rownames(record2))>0)
101
- {gen <- record2$value}
94
+ gen <- ""
95
+ record1 <- gnd_authority_filter[(gnd_authority_filter$value == i & gnd_authority_filter$field_code == "024" & gnd_authority_filter$subfield_code == "a"), ]
96
+ if (length(rownames(record1)) > 0) {
97
+ rn <- record1$record_number
98
+ record2 <- gnd_authority_filter[(gnd_authority_filter$record_number == rn & gnd_authority_filter$field_code == "375" & gnd_authority_filter$subfield_code == "a"), ]
99
+ if (length(rownames(record2)) > 0) {
100
+ gen <- record2$value
101
+ }
102
102
}
103
103
genders[j, ] <- c(i, gen)
104
- j <- j+ 1
104
+ j <- j + 1
105
105
}
106
-
107
106
```
108
107
109
108
``` {r}
110
109
gnd_gender <- gs4_create(
111
110
"sheets-gnd_gender",
112
- sheets = genders)
111
+ sheets = genders
112
+ )
113
113
114
114
gnd_gender
115
115
```
116
-
117
-
0 commit comments