Bachelor thesis: "The influence of sensational issues on the political agenda setting in social media"
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

issuecomp-functions.R 4.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. require(stringr)
  2. # Replace characters messing up JSON validation (\,\n,^)
  3. correctJSON <- function(string) {
  4. string <- gsub('\\\\\\\\\\"(\\w)', '\\1' , string)
  5. string <- gsub('\\\\\\\\\\" ', ' ', string)
  6. string <- gsub("\\\\{2,}", "", string)
  7. string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
  8. string <- str_replace_all(string, pattern = "&..;", replacement = " ")
  9. string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
  10. return(string)
  11. }
  12. insertRow <- function(existingDF, newrow, r) {
  13. r <- as.numeric(nrow(existingDF)) + 1
  14. existingDF <- rbind(existingDF,newrow)
  15. existingDF <- existingDF[order(c(1:(nrow(existingDF)-1),r-0.5)),]
  16. row.names(existingDF) <- 1:nrow(existingDF)
  17. return(existingDF)
  18. }
  19. convertLogical0 <- function(var) {
  20. if(is.integer(var) && length(var) == 0) {
  21. var <- 0
  22. }
  23. return(var)
  24. }
  25. smartPatternMatch <- function(string, pattern, dist, acronym) {
  26. patternrex <- str_c("\\b", pattern, "\\b")
  27. found <- agrep(patternrex, string, max.distance = list(all = dist), ignore.case = !acronym, fixed = FALSE)
  28. # if(chars <= 4) { # 4 or less
  29. # found <- agrep(patternrex, string, max.distance = list(all = 0), ignore.case = !acronym, fixed = FALSE)
  30. # }
  31. # else if(chars >= 8) { # 8 or more
  32. # found <- agrep(patternrex, string, max.distance = list(all = 1), ignore.case = !acronym, fixed = FALSE)
  33. # # # Give longer words a chance by ignoring word boundaries \\b
  34. # # if(convertLogical0(found) == 0) {
  35. # # found <- grep(pattern, string, ignore.case = !acronym, fixed = FALSE)
  36. # # }
  37. # }
  38. # else { # 5,6,7
  39. # found <- agrep(patternrex, string, max.distance = list(all = 1), ignore.case = !acronym, fixed = FALSE)
  40. # }
  41. #
  42. # Convert 0/1 to F/T
  43. found <- convertLogical0(found)
  44. if(found == 1) {
  45. found <- TRUE
  46. } else {
  47. found <- FALSE
  48. }
  49. return(found)
  50. }
  51. viewMatchingTweets <- function(date, issue, folder) {
  52. file <- str_c(folder,"/",issue,".csv")
  53. df <- read.csv(file, sep = ";", colClasses="character", header = FALSE)
  54. for(r in 1:nrow(df)) {
  55. curdate <- as.character(df[r,1])
  56. if(curdate == date) {
  57. curid <- as.character(df[r,2])
  58. curtag <- as.character(df[r,3])
  59. cat(tweets$text[tweets$id_str == curid]," - ",curtag,"\n")
  60. }
  61. }
  62. }
  63. checkAcronym <- function(string) {
  64. curtag_up <- str_replace_all(string = string, pattern = "[[:lower:]]", replacement = "")
  65. #curchars_up <- nchar(curtag_up, type = "chars")
  66. if(curtag_up == string) {
  67. return(TRUE)
  68. }
  69. else {
  70. return(FALSE)
  71. }
  72. }
  73. ## ERROR HANDLING
  74. # Check for empty API returns (0 or 1 or 2)
  75. errorEmptyAPI <- function(df) {
  76. status <- length(df)
  77. if(status == 0) {
  78. if(error < 3) {
  79. cat("[WARNING] Empty API result. Trying again.\n")
  80. returncode <- 1
  81. }
  82. else {
  83. cat("[WARNING] 3x empty API result. Aborting now.\n")
  84. returncode <- 2
  85. }
  86. }
  87. else {
  88. returncode <- 0
  89. }
  90. return(returncode)
  91. }
  92. # Check if API output contains error fields (0 or 2)
  93. errorErrorColumn <- function(df) {
  94. status <- "error" %in% names(df)
  95. if(status) {
  96. cat("[WARNING] Error in API request:", df$error[1],"\n")
  97. returncode <- 2
  98. }
  99. else {
  100. returncode <- 0
  101. }
  102. return(returncode)
  103. }
  104. # Check if error codes exist (i.e. 34 or 88)
  105. errorCheckCode <- function(df) {
  106. status <- "errors" %in% names(df)
  107. if(status) {
  108. cat("[WARNING] Error in API request:", df$errors[1,1],"\n")
  109. code <- df$errors[1,2]
  110. }
  111. else {
  112. code <- 0
  113. }
  114. return(code)
  115. }
  116. # Handle code 88: rate limit exceeded (wait time)
  117. errorCode88 <- function() {
  118. rate_api_url <- "https://api.twitter.com/1.1/application/rate_limit_status.json"
  119. rate_query <- c(resources="statuses")
  120. resettime <- fromJSON(twitter_api_call(rate_api_url, rate_query, api_params))
  121. resettime <- resettime$resources$statuses$`/statuses/user_timeline`$reset
  122. curtime <- as.numeric(as.POSIXct(Sys.time()))
  123. wait <- round(resettime - curtime + 10)
  124. cat("[INFO] Rate limit is exceeded. Now waiting",wait,"seconds.\n")
  125. return(wait)
  126. }
  127. # Handle code 34: Page does not exist (1 or 2)
  128. errorCode34 <- function() {
  129. if(error > 2) {
  130. cat("[WARNING] 3x Not existing page. Aborting now.\n")
  131. returncode <- 2
  132. }
  133. else {
  134. returncode <- 1
  135. }
  136. return(returncode)
  137. }