better matching, now with plural forms and less distance
This commit is contained in:
+34
-3
@@ -29,6 +29,8 @@ issues[issueheads] <- 0
|
||||
tweets$issue <- ""
|
||||
tweets$tags <- ""
|
||||
|
||||
tagexpand <- c("", "s", "n", "en")
|
||||
|
||||
for(d in 1:nrow(issues)) {
|
||||
# Go through every day
|
||||
curdate <- issues$date[d]
|
||||
@@ -61,10 +63,23 @@ for(d in 1:nrow(issues)) {
|
||||
} else {
|
||||
curacro <- FALSE
|
||||
}
|
||||
|
||||
# Now expand the current tag by possible suffixes that may be plural forms
|
||||
if(!curacro) {
|
||||
for(e in 1:length(tagexpand)) {
|
||||
curtag[e] <- str_c(curtag[1], tagexpand[e])
|
||||
}
|
||||
}
|
||||
|
||||
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow 2 (Levenshtein distance)
|
||||
tags_found <- smartPatternMatch(curtext, curtag, curchars, curacro)
|
||||
if(tags_found == 1) {
|
||||
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance)
|
||||
tags_found <- NULL
|
||||
for(e in 1:length(curtag)) {
|
||||
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curchars, curacro)
|
||||
}
|
||||
tags_found <- any(tags_found)
|
||||
curtag <- curtag[1]
|
||||
|
||||
if(tags_found == TRUE) {
|
||||
# Raise number of findings on this day for this issue by 1
|
||||
issues[d,curissue] <- issues[d,curissue] + 1
|
||||
|
||||
@@ -117,6 +132,22 @@ g1
|
||||
|
||||
rm(g1, r)
|
||||
|
||||
|
||||
# Show party percentage of twitter users
|
||||
acc_parties <- data.frame(party = c("cducsu", "spd", "linke", "gruene"))
|
||||
acc_parties$btw13 <- c(49.3, 30.6, 10.1, 10.0) # seats of party / 631 seats
|
||||
acc_parties$twitter <- 0
|
||||
for(p in 1:nrow(acc_parties)) {
|
||||
acc_parties$twitter[p] <- round(nrow(acc_df[acc_df$party == as.character(acc_parties$party[p]), ]) / 280 * 100)
|
||||
}
|
||||
pie(acc_parties$btw13, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T,
|
||||
main = "Seats of parties in the parliament")
|
||||
pie(acc_parties$twitter, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T,
|
||||
main = "Percentage of parties' MdBs of all Twitter accounts")
|
||||
|
||||
rm(acc_parties, p)
|
||||
|
||||
|
||||
# VISUALS -----------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user