You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

48 lines
1.3 KiB

require(XML)
require(stringr)
setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
mdb <- readLines("politiker2.txt")
mdb_names <- unlist(str_extract_all(mdb, ".+?,.+?,"))
mdb_names <- str_replace_all(mdb_names, ",$", "")
mdb_party <- unlist(str_extract_all(mdb, "[[:alnum:] /]+$"))
mdb_party <- str_replace_all(mdb_party, "^ ", "")
mdb_party <- str_replace_all(mdb_party, "CDU/CSU", "cducsu")
mdb_party <- str_replace_all(mdb_party, "Bündnis 90/Die Grünen", "gruene")
mdb_party <- str_replace_all(mdb_party, "Die Linke", "linke")
mdb_party <- str_replace_all(mdb_party, "SPD", "spd")
mdb_list <- data.frame(name = mdb_names, party = mdb_party)
write.csv(mdb_list, "politiker2.csv", row.names=FALSE)
############
list1 <- read.csv("politiker2.csv")
list2 <- read.csv("politiker.csv")
for(i in 1:nrow(list1)) {
detect <- FALSE
name1 <- as.character(list1$name[i])
lastname1 <- str_extract(name1, "\\w+")
for(a in 1:nrow(list2)) {
name2 <- as.character(list2$name[a])
if(str_detect(name2, lastname1)) {
detect <- TRUE
acc <- as.character(list2$screenname[a])
verif <- as.character(list2$verified[a])
result <- str_c(name1," --> ",acc," (",name2,") ",verif)
write(result, "merge.txt", append = TRUE)
}
}
if(detect) {
write("", "merge.txt", append = TRUE)
}
}