Skip to content

Commit 0b44549

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 600e212 commit 0b44549

File tree

2 files changed

+38
-40
lines changed

2 files changed

+38
-40
lines changed

code/R/gender.Rmd

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -23,31 +23,30 @@ pp <- function(percentage, accuracy = 0.01) {
2323
# Extracting All GNDs of vd17
2424
```{r}
2525
all_gnd_vd17 <- vd17_a %>%
26-
filter(subfield_code=="7")%>%
27-
select(record_number,field_code, value)%>%
28-
mutate(GND=value)%>%
29-
distinct(GND)%>%
26+
filter(subfield_code == "7") %>%
27+
select(record_number, field_code, value) %>%
28+
mutate(GND = value) %>%
29+
distinct(GND) %>%
3030
collect()
31-
3231
```
3332

3433
```{r install_python_packages}
3534
reticulate::py_install("pandas")
3635
```
3736
# Downloading the xml files of authority records
3837
```{python}
39-
import urllib.request
38+
import urllib.request
4039
4140
ids=[]
4241
4342
j=0
4443
for id in r.all_gnd_vd17['GND']:
45-
44+
4645
if j%10000==0:
4746
print(j)
4847
try:
4948
result=urllib.request.urlretrieve("http://d-nb.info/"+id+"/about/marcxml", "data/work/vd17"+id+".xml")
50-
49+
5150
except:
5251
ids.append(id)
5352
pass
@@ -56,62 +55,61 @@ for id in r.all_gnd_vd17['GND']:
5655
```
5756
# Checking to not miss any files regarding connection interrupt
5857
```{r}
59-
list <- list.files(path="data/work/vd17")
58+
list <- list.files(path = "data/work/vd17")
6059
61-
list_gnd=as.list(all_gnd_vd17$GND)
60+
list_gnd <- as.list(all_gnd_vd17$GND)
6261
for (i in list_gnd)
6362
{
64-
j <- paste(i,".xml",sep="")
63+
j <- paste(i, ".xml", sep = "")
6564
result <- j %in% list
66-
if (result==FALSE)
67-
{print(j)}
65+
if (result == FALSE) {
66+
print(j)
67+
}
6868
}
69-
7069
```
7170
# Checking the field_codes and sub_field_codes regarding GND and gender
7271
```{r}
73-
all_gnd_vd17$GND<-gsub("gnd/","",as.character(all_gnd_vd17$GND))
72+
all_gnd_vd17$GND <- gsub("gnd/", "", as.character(all_gnd_vd17$GND))
7473
gnd_authority <- read_tsv(here("vd17_auth.tsv.gz"), lazy = TRUE)
7574
authority_gnd_list <- gnd_authority[gnd_authority$value %in% all_gnd_vd17$GND, , drop = FALSE] %>%
7675
distinct(value)
7776
gnd_authority_filter <- gnd_authority %>%
78-
filter(subfield_code=="a",field_code %in% c("024","375"))
77+
filter(subfield_code == "a", field_code %in% c("024", "375"))
7978
```
8079

8180

8281
# For some GNDs, there is not field_code "375", so gender is not specified.
8382
```{r warning=FALSE}
84-
columns= c("GND","gender")
83+
columns <- c("GND", "gender")
84+
85+
genders <- data.frame(matrix(nrow = 0, ncol = length(columns)))
86+
87+
colnames(genders) <- columns
8588
86-
genders = data.frame(matrix(nrow = 0, ncol = length(columns)))
87-
88-
colnames(genders) = columns
89-
9089
9190
j <- 1
9291
list_gnd_auth <- as.list(authority_gnd_list$value)
9392
for (i in list_gnd_auth)
9493
{
95-
gen=""
96-
record1 <- gnd_authority_filter[(gnd_authority_filter$value==i&gnd_authority_filter$field_code=="024"&gnd_authority_filter$subfield_code=="a"),]
97-
if (length(rownames(record1))>0)
98-
{rn <- record1$record_number
99-
record2 <- gnd_authority_filter[(gnd_authority_filter$record_number==rn&gnd_authority_filter$field_code=="375"&gnd_authority_filter$subfield_code=="a"),]
100-
if (length(rownames(record2))>0)
101-
{gen <- record2$value}
94+
gen <- ""
95+
record1 <- gnd_authority_filter[(gnd_authority_filter$value == i & gnd_authority_filter$field_code == "024" & gnd_authority_filter$subfield_code == "a"), ]
96+
if (length(rownames(record1)) > 0) {
97+
rn <- record1$record_number
98+
record2 <- gnd_authority_filter[(gnd_authority_filter$record_number == rn & gnd_authority_filter$field_code == "375" & gnd_authority_filter$subfield_code == "a"), ]
99+
if (length(rownames(record2)) > 0) {
100+
gen <- record2$value
101+
}
102102
}
103103
genders[j, ] <- c(i, gen)
104-
j <- j+1
104+
j <- j + 1
105105
}
106-
107106
```
108107

109108
```{r}
110109
gnd_gender <- gs4_create(
111110
"sheets-gnd_gender",
112-
sheets = genders)
111+
sheets = genders
112+
)
113113
114114
gnd_gender
115115
```
116-
117-

code/R/gender.nb.html

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,18 +1809,18 @@ <h1>Downloading the xml files of authority records</h1>
18091809
<!-- rnb-text-end -->
18101810
<!-- rnb-chunk-begin -->
18111811
<!-- rnb-source-begin eyJkYXRhIjoiYGBgcHl0aG9uXG5pbXBvcnQgdXJsbGliLnJlcXVlc3QgXG5cbmlkcz1bXVxuXG5qPTBcbmZvciBpZCBpbiByLmFsbF9nbmRfdmQxN1snR05EJ106XG4gIFxuICAgIGlmIGolMTAwMDA9PTA6XG4gICAgICAgIHByaW50KGopXG4gICAgdHJ5OlxuICAgICAgICByZXN1bHQ9dXJsbGliLnJlcXVlc3QudXJscmV0cmlldmUoXCJodHRwOi8vZC1uYi5pbmZvL1wiK2lkK1wiL2Fib3V0L21hcmN4bWxcIiwgXCJkYXRhL3dvcmsvdmQxN1wiK2lkK1wiLnhtbFwiKVxuICAgICAgICBcbiAgICBleGNlcHQ6XG4gICAgICAgIGlkcy5hcHBlbmQoaWQpXG4gICAgICAgIHBhc3NcbiAgICBqKz0xXG5cbmBgYCJ9 -->
1812-
<pre class="python"><code>import urllib.request
1812+
<pre class="python"><code>import urllib.request
18131813

18141814
ids=[]
18151815

18161816
j=0
18171817
for id in r.all_gnd_vd17[&#39;GND&#39;]:
1818-
1818+
18191819
if j%10000==0:
18201820
print(j)
18211821
try:
18221822
result=urllib.request.urlretrieve(&quot;http://d-nb.info/&quot;+id+&quot;/about/marcxml&quot;, &quot;data/work/vd17&quot;+id+&quot;.xml&quot;)
1823-
1823+
18241824
except:
18251825
ids.append(id)
18261826
pass
@@ -1888,12 +1888,12 @@ <h1>For some GNDs, there is not field_code “375”, so gender is not
18881888
<!-- rnb-text-end -->
18891889
<!-- rnb-chunk-begin -->
18901890
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuY29sdW1ucz0gYyhcIkdORFwiLFwiZ2VuZGVyXCIpIFxuXG5nZW5kZXJzID0gZGF0YS5mcmFtZShtYXRyaXgobnJvdyA9IDAsIG5jb2wgPSBsZW5ndGgoY29sdW1ucykpKSBcbiAgXG5jb2xuYW1lcyhnZW5kZXJzKSA9IGNvbHVtbnNcbiAgXG5cbmogPC0gMVxubGlzdF9nbmRfYXV0aCA8LSBhcy5saXN0KGF1dGhvcml0eV9nbmRfbGlzdCR2YWx1ZSlcbmZvciAoaSBpbiBsaXN0X2duZF9hdXRoKVxue1xuICBnZW49XCJcIlxuICByZWNvcmQxIDwtIGduZF9hdXRob3JpdHlfZmlsdGVyWyhnbmRfYXV0aG9yaXR5X2ZpbHRlciR2YWx1ZT09aSZnbmRfYXV0aG9yaXR5X2ZpbHRlciRmaWVsZF9jb2RlPT1cIjAyNFwiJmduZF9hdXRob3JpdHlfZmlsdGVyJHN1YmZpZWxkX2NvZGU9PVwiYVwiKSxdXG4gIGlmIChsZW5ndGgocm93bmFtZXMocmVjb3JkMSkpPjApXG4gIHtybiA8LSByZWNvcmQxJHJlY29yZF9udW1iZXJcbiAgcmVjb3JkMiA8LSBnbmRfYXV0aG9yaXR5X2ZpbHRlclsoZ25kX2F1dGhvcml0eV9maWx0ZXIkcmVjb3JkX251bWJlcj09cm4mZ25kX2F1dGhvcml0eV9maWx0ZXIkZmllbGRfY29kZT09XCIzNzVcIiZnbmRfYXV0aG9yaXR5X2ZpbHRlciRzdWJmaWVsZF9jb2RlPT1cImFcIiksXVxuICBpZiAobGVuZ3RoKHJvd25hbWVzKHJlY29yZDIpKT4wKVxuICB7Z2VuIDwtIHJlY29yZDIkdmFsdWV9XG4gIH1cbiAgZ2VuZGVyc1tqLCBdIDwtIGMoaSwgZ2VuKVxuICBqIDwtIGorMVxufVxuXG5gYGAifQ== -->
1891-
<pre class="r"><code>columns= c(&quot;GND&quot;,&quot;gender&quot;)
1891+
<pre class="r"><code>columns= c(&quot;GND&quot;,&quot;gender&quot;)
1892+
1893+
genders = data.frame(matrix(nrow = 0, ncol = length(columns)))
18921894

1893-
genders = data.frame(matrix(nrow = 0, ncol = length(columns)))
1894-
18951895
colnames(genders) = columns
1896-
1896+
18971897

18981898
j &lt;- 1
18991899
list_gnd_auth &lt;- as.list(authority_gnd_list$value)

0 commit comments

Comments
 (0)