Skip to content

Bump certifi from 2022.6.15 to 2022.12.7 #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 31 additions & 33 deletions code/R/gender.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,30 @@ pp <- function(percentage, accuracy = 0.01) {
# Extracting All GNDs of vd17
```{r}
all_gnd_vd17 <- vd17_a %>%
filter(subfield_code=="7")%>%
select(record_number,field_code, value)%>%
mutate(GND=value)%>%
distinct(GND)%>%
filter(subfield_code == "7") %>%
select(record_number, field_code, value) %>%
mutate(GND = value) %>%
distinct(GND) %>%
collect()

```

```{r install_python_packages}
reticulate::py_install("pandas")
```
# Downloading the xml files of authority records
```{python}
import urllib.request
import urllib.request

ids=[]

j=0
for id in r.all_gnd_vd17['GND']:

if j%10000==0:
print(j)
try:
result=urllib.request.urlretrieve("http://d-nb.info/"+id+"/about/marcxml", "data/work/vd17"+id+".xml")

except:
ids.append(id)
pass
Expand All @@ -56,62 +55,61 @@ for id in r.all_gnd_vd17['GND']:
```
# Checking to not miss any files regarding connection interrupt
```{r}
list <- list.files(path="data/work/vd17")
list <- list.files(path = "data/work/vd17")

list_gnd=as.list(all_gnd_vd17$GND)
list_gnd <- as.list(all_gnd_vd17$GND)
for (i in list_gnd)
{
j <- paste(i,".xml",sep="")
j <- paste(i, ".xml", sep = "")
result <- j %in% list
if (result==FALSE)
{print(j)}
if (result == FALSE) {
print(j)
}
}

```
# Checking the field_codes and sub_field_codes regarding GND and gender
```{r}
all_gnd_vd17$GND<-gsub("gnd/","",as.character(all_gnd_vd17$GND))
all_gnd_vd17$GND <- gsub("gnd/", "", as.character(all_gnd_vd17$GND))
gnd_authority <- read_tsv(here("vd17_auth.tsv.gz"), lazy = TRUE)
authority_gnd_list <- gnd_authority[gnd_authority$value %in% all_gnd_vd17$GND, , drop = FALSE] %>%
distinct(value)
gnd_authority_filter <- gnd_authority %>%
filter(subfield_code=="a",field_code %in% c("024","375"))
filter(subfield_code == "a", field_code %in% c("024", "375"))
```


# For some GNDs, there is not field_code "375", so gender is not specified.
```{r warning=FALSE}
columns= c("GND","gender")
columns <- c("GND", "gender")

genders <- data.frame(matrix(nrow = 0, ncol = length(columns)))

colnames(genders) <- columns

genders = data.frame(matrix(nrow = 0, ncol = length(columns)))

colnames(genders) = columns


j <- 1
list_gnd_auth <- as.list(authority_gnd_list$value)
for (i in list_gnd_auth)
{
gen=""
record1 <- gnd_authority_filter[(gnd_authority_filter$value==i&gnd_authority_filter$field_code=="024"&gnd_authority_filter$subfield_code=="a"),]
if (length(rownames(record1))>0)
{rn <- record1$record_number
record2 <- gnd_authority_filter[(gnd_authority_filter$record_number==rn&gnd_authority_filter$field_code=="375"&gnd_authority_filter$subfield_code=="a"),]
if (length(rownames(record2))>0)
{gen <- record2$value}
gen <- ""
record1 <- gnd_authority_filter[(gnd_authority_filter$value == i & gnd_authority_filter$field_code == "024" & gnd_authority_filter$subfield_code == "a"), ]
if (length(rownames(record1)) > 0) {
rn <- record1$record_number
record2 <- gnd_authority_filter[(gnd_authority_filter$record_number == rn & gnd_authority_filter$field_code == "375" & gnd_authority_filter$subfield_code == "a"), ]
if (length(rownames(record2)) > 0) {
gen <- record2$value
}
}
genders[j, ] <- c(i, gen)
j <- j+1
j <- j + 1
}

```

```{r}
gnd_gender <- gs4_create(
"sheets-gnd_gender",
sheets = genders)
sheets = genders
)

gnd_gender
```


14 changes: 7 additions & 7 deletions code/R/gender.nb.html
Original file line number Diff line number Diff line change
Expand Up @@ -1809,18 +1809,18 @@ <h1>Downloading the xml files of authority records</h1>
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgcHl0aG9uXG5pbXBvcnQgdXJsbGliLnJlcXVlc3QgXG5cbmlkcz1bXVxuXG5qPTBcbmZvciBpZCBpbiByLmFsbF9nbmRfdmQxN1snR05EJ106XG4gIFxuICAgIGlmIGolMTAwMDA9PTA6XG4gICAgICAgIHByaW50KGopXG4gICAgdHJ5OlxuICAgICAgICByZXN1bHQ9dXJsbGliLnJlcXVlc3QudXJscmV0cmlldmUoXCJodHRwOi8vZC1uYi5pbmZvL1wiK2lkK1wiL2Fib3V0L21hcmN4bWxcIiwgXCJkYXRhL3dvcmsvdmQxN1wiK2lkK1wiLnhtbFwiKVxuICAgICAgICBcbiAgICBleGNlcHQ6XG4gICAgICAgIGlkcy5hcHBlbmQoaWQpXG4gICAgICAgIHBhc3NcbiAgICBqKz0xXG5cbmBgYCJ9 -->
<pre class="python"><code>import urllib.request
<pre class="python"><code>import urllib.request

ids=[]

j=0
for id in r.all_gnd_vd17[&#39;GND&#39;]:

if j%10000==0:
print(j)
try:
result=urllib.request.urlretrieve(&quot;http://d-nb.info/&quot;+id+&quot;/about/marcxml&quot;, &quot;data/work/vd17&quot;+id+&quot;.xml&quot;)

except:
ids.append(id)
pass
Expand Down Expand Up @@ -1888,12 +1888,12 @@ <h1>For some GNDs, there is not field_code “375”, so gender is not
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuY29sdW1ucz0gYyhcIkdORFwiLFwiZ2VuZGVyXCIpIFxuXG5nZW5kZXJzID0gZGF0YS5mcmFtZShtYXRyaXgobnJvdyA9IDAsIG5jb2wgPSBsZW5ndGgoY29sdW1ucykpKSBcbiAgXG5jb2xuYW1lcyhnZW5kZXJzKSA9IGNvbHVtbnNcbiAgXG5cbmogPC0gMVxubGlzdF9nbmRfYXV0aCA8LSBhcy5saXN0KGF1dGhvcml0eV9nbmRfbGlzdCR2YWx1ZSlcbmZvciAoaSBpbiBsaXN0X2duZF9hdXRoKVxue1xuICBnZW49XCJcIlxuICByZWNvcmQxIDwtIGduZF9hdXRob3JpdHlfZmlsdGVyWyhnbmRfYXV0aG9yaXR5X2ZpbHRlciR2YWx1ZT09aSZnbmRfYXV0aG9yaXR5X2ZpbHRlciRmaWVsZF9jb2RlPT1cIjAyNFwiJmduZF9hdXRob3JpdHlfZmlsdGVyJHN1YmZpZWxkX2NvZGU9PVwiYVwiKSxdXG4gIGlmIChsZW5ndGgocm93bmFtZXMocmVjb3JkMSkpPjApXG4gIHtybiA8LSByZWNvcmQxJHJlY29yZF9udW1iZXJcbiAgcmVjb3JkMiA8LSBnbmRfYXV0aG9yaXR5X2ZpbHRlclsoZ25kX2F1dGhvcml0eV9maWx0ZXIkcmVjb3JkX251bWJlcj09cm4mZ25kX2F1dGhvcml0eV9maWx0ZXIkZmllbGRfY29kZT09XCIzNzVcIiZnbmRfYXV0aG9yaXR5X2ZpbHRlciRzdWJmaWVsZF9jb2RlPT1cImFcIiksXVxuICBpZiAobGVuZ3RoKHJvd25hbWVzKHJlY29yZDIpKT4wKVxuICB7Z2VuIDwtIHJlY29yZDIkdmFsdWV9XG4gIH1cbiAgZ2VuZGVyc1tqLCBdIDwtIGMoaSwgZ2VuKVxuICBqIDwtIGorMVxufVxuXG5gYGAifQ== -->
<pre class="r"><code>columns= c(&quot;GND&quot;,&quot;gender&quot;)
<pre class="r"><code>columns= c(&quot;GND&quot;,&quot;gender&quot;)

genders = data.frame(matrix(nrow = 0, ncol = length(columns)))

genders = data.frame(matrix(nrow = 0, ncol = length(columns)))

colnames(genders) = columns


j &lt;- 1
list_gnd_auth &lt;- as.list(authority_gnd_list$value)
Expand Down
12 changes: 6 additions & 6 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.