require(XML) require(stringr) setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp") mdb <- readLines("politiker2.txt") mdb_names <- unlist(str_extract_all(mdb, ".+?,.+?,")) mdb_names <- str_replace_all(mdb_names, ",$", "") mdb_party <- unlist(str_extract_all(mdb, "[[:alnum:] /]+$")) mdb_party <- str_replace_all(mdb_party, "^ ", "") mdb_party <- str_replace_all(mdb_party, "CDU/CSU", "cducsu") mdb_party <- str_replace_all(mdb_party, "Bündnis 90/Die Grünen", "gruene") mdb_party <- str_replace_all(mdb_party, "Die Linke", "linke") mdb_party <- str_replace_all(mdb_party, "SPD", "spd") mdb_list <- data.frame(name = mdb_names, party = mdb_party) write.csv(mdb_list, "politiker2.csv", row.names=FALSE) ############ list1 <- read.csv("politiker2.csv") list2 <- read.csv("politiker.csv") for(i in 1:nrow(list1)) { detect <- FALSE name1 <- as.character(list1$name[i]) lastname1 <- str_extract(name1, "\\w+") for(a in 1:nrow(list2)) { name2 <- as.character(list2$name[a]) if(str_detect(name2, lastname1)) { detect <- TRUE acc <- as.character(list2$screenname[a]) verif <- as.character(list2$verified[a]) result <- str_c(name1," --> ",acc," (",name2,") ",verif) write(result, "merge.txt", append = TRUE) } } if(detect) { write("", "merge.txt", append = TRUE) } }