Bachelor thesis: "The influence of sensational issues on the political agenda setting in social media"
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

issuecomp-codingsample.R 2.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. require(stringr)
  2. require(XML)
  3. # FUNCTIONS ---------------------------------------------------------------
  4. readYN <- function(question) {
  5. n <- readline(prompt=question)
  6. n <- as.character(n)
  7. return(n)
  8. }
  9. checkIssue <- function(string, issuelist) {
  10. status <- any(str_detect(string, issuelist))
  11. return(status)
  12. }
  13. checkAllIssues <- function(string, issuelist) {
  14. status <- NULL
  15. for(i in 1:length(string)) {
  16. if(checkIssue(string[i], issuelist)) {
  17. status[i] <- TRUE
  18. }
  19. else {
  20. cat("Issue",string[i],"does not exist. Please try again.\n")
  21. status[i] <- FALSE
  22. }
  23. }
  24. return(status)
  25. }
  26. # SAMPLE OUT/INPUT --------------------------------------------------------
  27. # Read CSV of all tweets (with tags, if available)
  28. c_tweets <- read.csv("tweets.csv", colClasses="character")
  29. # Replace quotes because it may cause problems when saving and reading as CSV files
  30. for(r in 1:nrow(c_tweets)) {
  31. curtext <- as.character(c_tweets$text[r])
  32. if(str_detect(curtext, "\"")) {
  33. c_tweets$text[r] <- str_replace(curtext, "\"", "")
  34. }
  35. }
  36. c_tweets$X <- NULL
  37. # Read all issues from XML file
  38. c_issues <- data.frame(date = drange)
  39. c_issuelist <- xmlToList("issues-v2.xml")
  40. c_issueheads <- names(issuelist)
  41. c_issues[issueheads] <- 0
  42. # Run through as many tweets as wished to mark them as correct or incorrect
  43. source("issuecomp-codingsample-function.R")
  44. rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)
  45. # Now go through tweets/tags marked as false
  46. # Exit codes:
  47. # 0 = Correct tagging
  48. # 1 = At least one tag was incorrect
  49. # 2 = At least one tag was missing
  50. # 3 = Both 1 and 2
  51. c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
  52. names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
  53. for(r in 1:nrow(c_errors)) {
  54. c_errcode <- as.character(c_errors$code[r])
  55. c_errissue <- as.character(c_errors$issue[r])
  56. c_errtags <- as.character(c_errors$tags[r])
  57. c_errtext <- as.character(c_errors$text[r])
  58. c_errid <- as.character(c_errors$str_id[r])
  59. cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
  60. source("issuecomp-codingsample-function2.R")
  61. }
  62. # Now import the error files in a human readable data frame to improve the issue database
  63. # All tweets with WRONG ISSUES
  64. c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
  65. names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
  66. c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]
  67. # All tweets with MISSING ISSUES
  68. c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
  69. names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
  70. c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]
  71. # All CORRECT tweets
  72. c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character")
  73. names(c_tmp) <- c("str_id", "status", "issue", "tags", "text")
  74. c_correct <- c_tmp