better matching, less bugs, more fun

This commit is contained in:
2015-01-12 23:52:24 +01:00
parent efcdfae80e
commit 9e29a03d80
5 changed files with 561 additions and 557 deletions
+33 -6
View File
@@ -26,20 +26,47 @@ convertLogical0 <- function(var) {
return(var)
}
smartPatternMatch <- function(string, pattern, chars) {
if(chars < 5) {
found <- agrep(pattern, string, max.distance = list(all = 0), ignore.case = TRUE, fixed = FALSE)
smartPatternMatch <- function(string, pattern, chars, acronym) {
pattern <- str_c("\\b", pattern, "\\b")
if(chars <= 4) {
found <- agrep(pattern, string, max.distance = list(all = 0), ignore.case = !acronym, fixed = FALSE)
}
else if(chars > 7) {
found <- agrep(pattern, string, max.distance = list(all = 2), ignore.case = TRUE, fixed = FALSE)
else if(chars >= 8) {
found <- agrep(pattern, string, max.distance = list(all = 2), ignore.case = !acronym, fixed = FALSE)
}
else {
found <- agrep(pattern, string, max.distance = list(all = 1), ignore.case = TRUE, fixed = FALSE)
found <- agrep(pattern, string, max.distance = list(all = 1), ignore.case = !acronym, fixed = FALSE)
}
found <- convertLogical0(found)
return(found)
}
viewMatchingTweets <- function(date, issue, folder) {
file <- str_c(folder,"/",issue,".csv")
df <- read.csv(file, sep = ";", colClasses="character", header = FALSE)
for(r in 1:nrow(df)) {
curdate <- as.character(df[r,1])
if(curdate == date) {
curid <- as.character(df[r,2])
curtag <- as.character(df[r,3])
cat(tweets$text[tweets$id_str == curid]," - ",curtag,"\n")
}
}
}
checkAcronym <- function(string, chars) {
curtag_up <- str_replace_all(string = curtag, pattern = "[[:lower:]]", replacement = "")
curchars_up <- nchar(curtag_up, type = "chars")
if(curchars_up == curchars) {
return(TRUE)
}
else {
return(FALSE)
}
}
## ERROR HANDLING
# Check for empty API returns (0 or 1 or 2)