better pattern matching logic
This commit is contained in:
@@ -70,11 +70,19 @@ for(d in 1:nrow(issues)) {
|
||||
curtag[e] <- str_c(curtag[1], tagexpand[e])
|
||||
}
|
||||
}
|
||||
|
||||
# Set Levenshtein distance depending on char length
|
||||
if(curchars <= 4) {
|
||||
curdistance <- 0
|
||||
} else {
|
||||
curdistance <- 1
|
||||
}
|
||||
|
||||
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance)
|
||||
tags_found <- NULL
|
||||
# Match the tweet with each variation of tagexpand
|
||||
for(e in 1:length(curtag)) {
|
||||
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curchars, curacro)
|
||||
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
|
||||
}
|
||||
tags_found <- any(tags_found)
|
||||
curtag <- curtag[1]
|
||||
|
||||
Reference in New Issue
Block a user