better pattern matching logic

This commit is contained in:
2015-01-21 13:17:24 +01:00
parent a8987936c4
commit 54d1cd79aa
3 changed files with 228 additions and 215 deletions
+9 -1
View File
@@ -70,11 +70,19 @@ for(d in 1:nrow(issues)) {
curtag[e] <- str_c(curtag[1], tagexpand[e])
}
}
# Set Levenshtein distance depending on char length
if(curchars <= 4) {
curdistance <- 0
} else {
curdistance <- 1
}
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance)
tags_found <- NULL
# Match the tweet with each variation of tagexpand
for(e in 1:length(curtag)) {
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curchars, curacro)
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
}
tags_found <- any(tags_found)
curtag <- curtag[1]