You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
155 lines
4.2 KiB
155 lines
4.2 KiB
require(stringr)
|
|
|
|
# Replace characters messing up JSON validation (\,\n,^)
|
|
correctJSON <- function(string) {
|
|
string <- gsub('\\\\\\\\\\"(\\w)', '\\1' , string)
|
|
string <- gsub('\\\\\\\\\\" ', ' ', string)
|
|
string <- gsub("\\\\{2,}", "", string)
|
|
string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
|
|
string <- str_replace_all(string, pattern = "&..;", replacement = " ")
|
|
string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
|
|
return(string)
|
|
}
|
|
|
|
insertRow <- function(existingDF, newrow, r) {
|
|
r <- as.numeric(nrow(existingDF)) + 1
|
|
existingDF <- rbind(existingDF,newrow)
|
|
existingDF <- existingDF[order(c(1:(nrow(existingDF)-1),r-0.5)),]
|
|
row.names(existingDF) <- 1:nrow(existingDF)
|
|
return(existingDF)
|
|
}
|
|
|
|
convertLogical0 <- function(var) {
|
|
if(is.integer(var) && length(var) == 0) {
|
|
var <- 0
|
|
}
|
|
return(var)
|
|
}
|
|
|
|
smartPatternMatch <- function(string, pattern, dist, acronym) {
|
|
patternrex <- str_c("\\b", pattern, "\\b")
|
|
|
|
found <- agrep(patternrex, string, max.distance = list(all = dist), ignore.case = !acronym, fixed = FALSE)
|
|
|
|
# if(chars <= 4) { # 4 or less
|
|
# found <- agrep(patternrex, string, max.distance = list(all = 0), ignore.case = !acronym, fixed = FALSE)
|
|
# }
|
|
# else if(chars >= 8) { # 8 or more
|
|
# found <- agrep(patternrex, string, max.distance = list(all = 1), ignore.case = !acronym, fixed = FALSE)
|
|
# # # Give longer words a chance by ignoring word boundaries \\b
|
|
# # if(convertLogical0(found) == 0) {
|
|
# # found <- grep(pattern, string, ignore.case = !acronym, fixed = FALSE)
|
|
# # }
|
|
# }
|
|
# else { # 5,6,7
|
|
# found <- agrep(patternrex, string, max.distance = list(all = 1), ignore.case = !acronym, fixed = FALSE)
|
|
# }
|
|
#
|
|
|
|
# Convert 0/1 to F/T
|
|
found <- convertLogical0(found)
|
|
if(found == 1) {
|
|
found <- TRUE
|
|
} else {
|
|
found <- FALSE
|
|
}
|
|
|
|
return(found)
|
|
}
|
|
|
|
viewMatchingTweets <- function(date, issue, folder) {
|
|
file <- str_c(folder,"/",issue,".csv")
|
|
df <- read.csv(file, sep = ";", colClasses="character", header = FALSE)
|
|
for(r in 1:nrow(df)) {
|
|
curdate <- as.character(df[r,1])
|
|
if(curdate == date) {
|
|
curid <- as.character(df[r,2])
|
|
curtag <- as.character(df[r,3])
|
|
cat(tweets$text[tweets$id_str == curid]," - ",curtag,"\n")
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
checkAcronym <- function(string) {
|
|
curtag_up <- str_replace_all(string = string, pattern = "[[:lower:]]", replacement = "")
|
|
#curchars_up <- nchar(curtag_up, type = "chars")
|
|
if(curtag_up == string) {
|
|
return(TRUE)
|
|
}
|
|
else {
|
|
return(FALSE)
|
|
}
|
|
}
|
|
|
|
## ERROR HANDLING
|
|
|
|
# Check for empty API returns (0 or 1 or 2)
|
|
errorEmptyAPI <- function(df) {
|
|
status <- length(df)
|
|
if(status == 0) {
|
|
if(error < 3) {
|
|
cat("[WARNING] Empty API result. Trying again.\n")
|
|
returncode <- 1
|
|
}
|
|
else {
|
|
cat("[WARNING] 3x empty API result. Aborting now.\n")
|
|
returncode <- 2
|
|
}
|
|
}
|
|
else {
|
|
returncode <- 0
|
|
}
|
|
return(returncode)
|
|
}
|
|
|
|
# Check if API output contains error fields (0 or 2)
|
|
errorErrorColumn <- function(df) {
|
|
status <- "error" %in% names(df)
|
|
if(status) {
|
|
cat("[WARNING] Error in API request:", df$error[1],"\n")
|
|
returncode <- 2
|
|
}
|
|
else {
|
|
returncode <- 0
|
|
}
|
|
return(returncode)
|
|
}
|
|
|
|
# Check if error codes exist (i.e. 34 or 88)
|
|
errorCheckCode <- function(df) {
|
|
status <- "errors" %in% names(df)
|
|
if(status) {
|
|
cat("[WARNING] Error in API request:", df$errors[1,1],"\n")
|
|
code <- df$errors[1,2]
|
|
}
|
|
else {
|
|
code <- 0
|
|
}
|
|
return(code)
|
|
}
|
|
|
|
# Handle code 88: rate limit exceeded (wait time)
|
|
errorCode88 <- function() {
|
|
rate_api_url <- "https://api.twitter.com/1.1/application/rate_limit_status.json"
|
|
rate_query <- c(resources="statuses")
|
|
resettime <- fromJSON(twitter_api_call(rate_api_url, rate_query, api_params))
|
|
resettime <- resettime$resources$statuses$`/statuses/user_timeline`$reset
|
|
curtime <- as.numeric(as.POSIXct(Sys.time()))
|
|
wait <- round(resettime - curtime + 10)
|
|
cat("[INFO] Rate limit is exceeded. Now waiting",wait,"seconds.\n")
|
|
return(wait)
|
|
}
|
|
|
|
# Handle code 34: Page does not exist (1 or 2)
|
|
errorCode34 <- function() {
|
|
if(error > 2) {
|
|
cat("[WARNING] 3x Not existing page. Aborting now.\n")
|
|
returncode <- 2
|
|
}
|
|
else {
|
|
returncode <- 1
|
|
}
|
|
return(returncode)
|
|
}
|