fixed some pattern detection bugs
This commit is contained in:
+21
-24
@@ -9,8 +9,11 @@ date_start <- as.Date("2014-01-01")
|
||||
date_end <- as.Date("2014-12-31")
|
||||
drange <- as.integer(date_end - date_start)
|
||||
drange <- date_start + days(0:drange)
|
||||
issues <- data.frame(date = drange)
|
||||
|
||||
|
||||
# MATCH TWEETS ------------------------------------------------------------
|
||||
|
||||
issues <- data.frame(date = drange)
|
||||
issuelist <- xmlToList("issues.xml")
|
||||
issueheads <- names(issuelist)
|
||||
issues[issueheads] <- 0
|
||||
@@ -28,44 +31,38 @@ for(d in 1:nrow(issues)) {
|
||||
curtext <- as.character(tweets_curday$text[t])
|
||||
curtext <- str_replace_all(curtext, "#", "")
|
||||
|
||||
# Now test each single issue (not tag!)
|
||||
for(i in 1:length(issuelist)) {
|
||||
curtags <- as.character(issuelist[[i]])
|
||||
curissue <- names(issuelist)[i]
|
||||
curtags <- str_c("\\W", curtags, "\\W")
|
||||
tags_found <- str_detect(curtext, sprintf("%s", curtags))
|
||||
tags_found <- any(tags_found)
|
||||
|
||||
######
|
||||
|
||||
# Test all tags in ONE issue
|
||||
|
||||
# Now test all tags of a single issue
|
||||
for(t in 1:length(curtags)) {
|
||||
curtag <- curtags[t]
|
||||
curchars <- nchar(curtag, type = "chars")
|
||||
curtag <- str_c("\\W", curtags[t], "\\W")
|
||||
curchars <- nchar(curtag, type = "chars") - 4
|
||||
|
||||
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow 2 (Levenshtein distance)
|
||||
tags_found <- smartPatternMatch(curtext, curtag, curchars)
|
||||
|
||||
if(tags_found == 1) {
|
||||
cat("Text contains at least the tag:", curtag, "\n")
|
||||
#cat("Matched", curtag, "with", curtext,"\n")
|
||||
issues[d,curissue] <- issues[d,curissue] + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
######
|
||||
|
||||
if(tags_found) {
|
||||
#cat("Positive in", curissue,"from",as.character(drange[d]),"\n")
|
||||
issues[d,curissue] <- issues[d,curissue] + 1
|
||||
}
|
||||
else {
|
||||
#cat("Nothing found\n")
|
||||
}
|
||||
else {
|
||||
#cat("Nothing found\n")
|
||||
}
|
||||
} # /for curtags
|
||||
|
||||
} # /for issuelist
|
||||
} # /for tweets_curday
|
||||
} # /for drange
|
||||
|
||||
|
||||
|
||||
# WEEKLY INTERVALS --------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
## Do not use days but week intervals
|
||||
|
||||
wrange <- (as.integer(date_end - date_start) / 7)
|
||||
|
||||
Reference in New Issue
Block a user