You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

155 lines
4.2 KiB

require(stringr)
# Replace characters messing up JSON validation (\,\n,^)
correctJSON <- function(string) {
string <- gsub('\\\\\\\\\\"(\\w)', '\\1' , string)
string <- gsub('\\\\\\\\\\" ', ' ', string)
string <- gsub("\\\\{2,}", "", string)
string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
string <- str_replace_all(string, pattern = "&..;", replacement = " ")
string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
return(string)
}
insertRow <- function(existingDF, newrow, r) {
r <- as.numeric(nrow(existingDF)) + 1
existingDF <- rbind(existingDF,newrow)
existingDF <- existingDF[order(c(1:(nrow(existingDF)-1),r-0.5)),]
row.names(existingDF) <- 1:nrow(existingDF)
return(existingDF)
}
convertLogical0 <- function(var) {
if(is.integer(var) && length(var) == 0) {
var <- 0
}
return(var)
}
smartPatternMatch <- function(string, pattern, dist, acronym) {
patternrex <- str_c("\\b", pattern, "\\b")
found <- agrep(patternrex, string, max.distance = list(all = dist), ignore.case = !acronym, fixed = FALSE)
# if(chars <= 4) { # 4 or less
# found <- agrep(patternrex, string, max.distance = list(all = 0), ignore.case = !acronym, fixed = FALSE)
# }
# else if(chars >= 8) { # 8 or more
# found <- agrep(patternrex, string, max.distance = list(all = 1), ignore.case = !acronym, fixed = FALSE)
# # # Give longer words a chance by ignoring word boundaries \\b
# # if(convertLogical0(found) == 0) {
# # found <- grep(pattern, string, ignore.case = !acronym, fixed = FALSE)
# # }
# }
# else { # 5,6,7
# found <- agrep(patternrex, string, max.distance = list(all = 1), ignore.case = !acronym, fixed = FALSE)
# }
#
# Convert 0/1 to F/T
found <- convertLogical0(found)
if(found == 1) {
found <- TRUE
} else {
found <- FALSE
}
return(found)
}
viewMatchingTweets <- function(date, issue, folder) {
file <- str_c(folder,"/",issue,".csv")
df <- read.csv(file, sep = ";", colClasses="character", header = FALSE)
for(r in 1:nrow(df)) {
curdate <- as.character(df[r,1])
if(curdate == date) {
curid <- as.character(df[r,2])
curtag <- as.character(df[r,3])
cat(tweets$text[tweets$id_str == curid]," - ",curtag,"\n")
}
}
}
checkAcronym <- function(string) {
curtag_up <- str_replace_all(string = string, pattern = "[[:lower:]]", replacement = "")
#curchars_up <- nchar(curtag_up, type = "chars")
if(curtag_up == string) {
return(TRUE)
}
else {
return(FALSE)
}
}
## ERROR HANDLING
# Check for empty API returns (0 or 1 or 2)
errorEmptyAPI <- function(df) {
status <- length(df)
if(status == 0) {
if(error < 3) {
cat("[WARNING] Empty API result. Trying again.\n")
returncode <- 1
}
else {
cat("[WARNING] 3x empty API result. Aborting now.\n")
returncode <- 2
}
}
else {
returncode <- 0
}
return(returncode)
}
# Check if API output contains error fields (0 or 2)
errorErrorColumn <- function(df) {
status <- "error" %in% names(df)
if(status) {
cat("[WARNING] Error in API request:", df$error[1],"\n")
returncode <- 2
}
else {
returncode <- 0
}
return(returncode)
}
# Check if error codes exist (i.e. 34 or 88)
errorCheckCode <- function(df) {
status <- "errors" %in% names(df)
if(status) {
cat("[WARNING] Error in API request:", df$errors[1,1],"\n")
code <- df$errors[1,2]
}
else {
code <- 0
}
return(code)
}
# Handle code 88: rate limit exceeded (wait time)
errorCode88 <- function() {
rate_api_url <- "https://api.twitter.com/1.1/application/rate_limit_status.json"
rate_query <- c(resources="statuses")
resettime <- fromJSON(twitter_api_call(rate_api_url, rate_query, api_params))
resettime <- resettime$resources$statuses$`/statuses/user_timeline`$reset
curtime <- as.numeric(as.POSIXct(Sys.time()))
wait <- round(resettime - curtime + 10)
cat("[INFO] Rate limit is exceeded. Now waiting",wait,"seconds.\n")
return(wait)
}
# Handle code 34: Page does not exist (1 or 2)
errorCode34 <- function() {
if(error > 2) {
cat("[WARNING] 3x Not existing page. Aborting now.\n")
returncode <- 2
}
else {
returncode <- 1
}
return(returncode)
}