Bachelor thesis: "The influence of sensational issues on the political agenda setting in social media"
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

issuecomp-3-calc.R 6.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. require(stringr)
  2. require(reshape2)
  3. require(ggplot2)
  4. require(vars)
  5. # Create dataframes with only non-sensational (i) and sensational (s) issue columns
  6. drop_s <- which(str_detect(names(issues), "^s"))
  7. drop_i <- which(str_detect(names(issues), "^i"))
  8. issues_i <- issues[,-drop_s]
  9. issues <- issues[,-drop_i]
  10. # #
  11. # ENTROPY
  12. # #
  13. # Entropy non-sensational issues
  14. issues_i$total <- rowSums(issues_i[2:ncol(issues_i)])
  15. issues_i$entropy <- 0
  16. for(r in 1:nrow(issues_i)) {
  17. curtotal <- as.numeric(issues_i$total[r])
  18. curp <- 0
  19. for(c in 2:ncol(issues_i)) {
  20. curcount <- as.numeric(issues_i[r,c])
  21. curp[c] <- curcount / curtotal
  22. }
  23. curp <- curp [2:length(curp)-2]
  24. curdrop <- which(curp==0)
  25. curp <- curp[-curdrop]
  26. issues_i$entropy[r] <- sum(-1 * curp * log(curp))
  27. }
  28. # Entropy sensational issues
  29. issues$total <- rowSums(issues[2:ncol(issues)])
  30. issues$entropy <- 0
  31. for(r in 1:nrow(issues)) {
  32. curtotal <- as.numeric(issues$total[r])
  33. curp <- 0
  34. for(c in 2:ncol(issues)) {
  35. curcount <- as.numeric(issues[r,c])
  36. curp[c] <- curcount / curtotal
  37. }
  38. curp <- curp [2:length(curp)-2]
  39. curdrop <- which(curp==0)
  40. curp <- curp[-curdrop]
  41. issues$entropy[r] <- sum(-1 * curp * log(curp))
  42. }
  43. # Entropy overall
  44. issues$total <- rowSums(issues[2:ncol(issues)])
  45. issues$entropy <- 0
  46. for(r in 1:nrow(issues)) {
  47. curtotal <- as.numeric(issues$total[r])
  48. curp <- 0
  49. for(c in 2:ncol(issues)) {
  50. curcount <- as.numeric(issues[r,c])
  51. curp[c] <- curcount / curtotal
  52. }
  53. curp <- curp [2:length(curp)-2]
  54. curdrop <- which(curp==0)
  55. curp <- curp[-curdrop]
  56. issues$entropy[r] <- sum(-1 * curp * log(curp))
  57. }
  58. # Compare total tweets vs. total sensational & total unsensational
  59. stats_total <- data.frame(date=drange)
  60. stats_total$tpd <- 0
  61. stats_total$ipd <- issues_i$total
  62. stats_total$spd <- issues$total
  63. # Total number of tweets per day over time
  64. for(r in 1:length(drange)) {
  65. stats_total$tpd[r] <- length(tweets[tweets[, "created_at"] == drange[r], "id_str"])
  66. }
  67. # VISUALS: Tweets per day vs. sensational vs. general findings
  68. stats_melt <- melt(stats_total, id="date")
  69. g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
  70. geom_line()+
  71. geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
  72. xlab("Zeitraum") + ylab("Tweets pro Tag") +
  73. scale_colour_discrete(name = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) +
  74. theme(legend.title = element_text(size=14)) +
  75. theme(legend.text = element_text(size=12)) +
  76. theme(axis.title = element_text(size = 14))
  77. g_perday
  78. # Visuals for entropy in time series
  79. stats_entropy <- data.frame(date=drange)
  80. stats_entropy$entropy <- issues$entropy
  81. stats_entropy <- melt(stats_entropy, id="date")
  82. g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
  83. geom_line() +
  84. geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
  85. xlab("Zeitraum") + ylab("Entropie") +
  86. scale_colour_discrete(name = "", labels = "Entropie") +
  87. theme(legend.title = element_text(size=14)) +
  88. theme(legend.text = element_text(size=12)) +
  89. theme(axis.title = element_text(size = 14))
  90. g_entrop
  91. # VAR ---------------------------------------------------------------------
  92. # test <- VAR(issues[,2:32], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
  93. # test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues[,2:3])
  94. # test <- VAR(issues[,2:11], p=1, type="none")
  95. # VAR(issues[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
  96. issues_ts <- as.ts(issues[,2:44])
  97. # Tests
  98. VARselect(issues_ts, lag.max = 5, type = "both")
  99. i <- 0
  100. i <- i + 1
  101. ur.df(issues_ts[, i], type ="none", lags=1)
  102. summary(issues[2:44])
  103. # VAR and IRF
  104. vIssues <- VAR(issues_ts, p=1, type="both")
  105. vIRF <- irf(vIssues, impulse = names(issues[2:23]), response = names(issues_i[2:22]))
  106. plot(vIRF)
  107. # capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
  108. # SOME TESTS --------------------------------------------------------------
  109. stats <- data.frame(date=drange)
  110. stats$tpd <- 0
  111. # Total number of tweets per day over time
  112. for(r in 1:length(drange)) {
  113. stats$tpd[r] <- length(tweets[tweets[, "created_at"] == drange[r], "id_str"])
  114. }
  115. stats_melt <- melt(stats, id="date")
  116. g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
  117. geom_line() +
  118. geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
  119. g1
  120. rm(g1, r)
  121. # Show party percentage of twitter users
  122. acc_parties <- data.frame(party = c("cducsu", "spd", "gruene", "linke"))
  123. acc_parties$btw13 <- c(49.3, 30.6, 10.0, 10.1) # seats of party / 631 seats
  124. acc_parties$twitter <- 0
  125. for(p in 1:nrow(acc_parties)) {
  126. acc_parties$twitter[p] <- round(nrow(acc_df[acc_df$party == as.character(acc_parties$party[p]), ]) / 280 * 100)
  127. }
  128. pie(acc_parties$btw13, col=c("black", "red", "green", "purple"),
  129. labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10,0%)", "Die LINKE (10,1%)"),
  130. clockwise = T)
  131. pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
  132. labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
  133. clockwise = T)
  134. rm(acc_parties, p)
  135. # Count all tags
  136. num <- 0
  137. for(i in 1:length(issuelist)) {
  138. j <- length(issuelist[[i]])
  139. num <- num + j
  140. rm(j)
  141. }
  142. num
  143. # VISUALS -----------------------------------------------------------------
  144. # Level: days
  145. issues_melt <- melt(issues,id="date")
  146. ggplot(issues_melt,aes(x=date,y=value,colour=variable,group=variable)) + geom_line(size=1)
  147. ggplot(issues_melt,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=1,method="loess",formula = y ~ x, se=FALSE)
  148. # POSSIBLY USEFUL CODE ----------------------------------------------------
  149. # List all issues in one row
  150. for(i in 1:length(issueheads)) {
  151. cat(issueheads[i], "\n")
  152. }
  153. # Limits of list
  154. length(issuelist)
  155. length(issuelist[[2]])
  156. # Select all tweets from current day in drange
  157. tweets_curday <- tweets[tweets[, "created_at"] == drange[5], ]
  158. # Is column a issue counting column?
  159. str_detect(names(issues[2]), "^issue")