diff --git a/issuecomp-analysis.R b/issuecomp-analysis.R index e010780..c99dcc5 100644 --- a/issuecomp-analysis.R +++ b/issuecomp-analysis.R @@ -25,7 +25,7 @@ for(d in 1:nrow(issues)) { for(t in 1:nrow(tweets_curday)){ # Select tweet's text, make it lowercase and remove hashtag indicators (#) - curtext <- tolower(as.character(tweets_curday$text[t])) + curtext <- as.character(tweets_curday$text[t]) curtext <- str_replace_all(curtext, "#", "") for(i in 1:length(issuelist)) { @@ -35,6 +35,24 @@ for(d in 1:nrow(issues)) { tags_found <- str_detect(curtext, sprintf("%s", curtags)) tags_found <- any(tags_found) + ###### + + # Test all tags in ONE issue + for(t in 1:length(curtags)) { + curtag <- curtags[t] + curchars <- nchar(curtag, type = "chars") + + tags_found <- smartPatternMatch(curtext, curtag, curchars) + + if(tags_found == 1) { + cat("Text contains at least the tag:", curtag, "\n") + break + } + } + + + ###### + if(tags_found) { #cat("Positive in", curissue,"from",as.character(drange[d]),"\n") issues[d,curissue] <- issues[d,curissue] + 1 diff --git a/issuecomp-functions.R b/issuecomp-functions.R index 1100fef..d97496f 100644 --- a/issuecomp-functions.R +++ b/issuecomp-functions.R @@ -19,6 +19,27 @@ insertRow <- function(existingDF, newrow, r) { return(existingDF) } +convertLogical0 <- function(var) { + if(is.integer(var) && length(var) == 0) { + var <- 0 + } + return(var) +} + +smartPatternMatch <- function(string, pattern, chars) { + if(chars < 5) { + found <- agrep(pattern, string, max.distance = list(all = 0), ignore.case = TRUE) + } + if(chars > 7) { + found <- agrep(pattern, string, max.distance = list(all = 2), ignore.case = TRUE) + } + else { + found <- agrep(pattern, string, max.distance = list(all = 1), ignore.case = TRUE) + } + found <- convertLogical0(found) + return(found) +} + ## ERROR HANDLING # Check for empty API returns (0 or 1 or 2)