48 lines
1.3 KiB
R
48 lines
1.3 KiB
R
|
require(XML)
|
||
|
require(stringr)
|
||
|
|
||
|
setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
|
||
|
|
||
|
mdb <- readLines("politiker2.txt")
|
||
|
|
||
|
mdb_names <- unlist(str_extract_all(mdb, ".+?,.+?,"))
|
||
|
mdb_names <- str_replace_all(mdb_names, ",$", "")
|
||
|
|
||
|
mdb_party <- unlist(str_extract_all(mdb, "[[:alnum:] /]+$"))
|
||
|
mdb_party <- str_replace_all(mdb_party, "^ ", "")
|
||
|
|
||
|
mdb_party <- str_replace_all(mdb_party, "CDU/CSU", "cducsu")
|
||
|
mdb_party <- str_replace_all(mdb_party, "Bündnis 90/Die Grünen", "gruene")
|
||
|
mdb_party <- str_replace_all(mdb_party, "Die Linke", "linke")
|
||
|
mdb_party <- str_replace_all(mdb_party, "SPD", "spd")
|
||
|
|
||
|
mdb_list <- data.frame(name = mdb_names, party = mdb_party)
|
||
|
|
||
|
write.csv(mdb_list, "politiker2.csv", row.names=FALSE)
|
||
|
|
||
|
############
|
||
|
|
||
|
list1 <- read.csv("politiker2.csv")
|
||
|
list2 <- read.csv("politiker.csv")
|
||
|
|
||
|
for(i in 1:nrow(list1)) {
|
||
|
detect <- FALSE
|
||
|
name1 <- as.character(list1$name[i])
|
||
|
lastname1 <- str_extract(name1, "\\w+")
|
||
|
|
||
|
for(a in 1:nrow(list2)) {
|
||
|
name2 <- as.character(list2$name[a])
|
||
|
|
||
|
if(str_detect(name2, lastname1)) {
|
||
|
detect <- TRUE
|
||
|
acc <- as.character(list2$screenname[a])
|
||
|
verif <- as.character(list2$verified[a])
|
||
|
result <- str_c(name1," --> ",acc," (",name2,") ",verif)
|
||
|
write(result, "merge.txt", append = TRUE)
|
||
|
}
|
||
|
}
|
||
|
if(detect) {
|
||
|
write("", "merge.txt", append = TRUE)
|
||
|
}
|
||
|
|
||
|
}
|