Browse Source

update to latest version

mxmehl 5 years ago
parent
commit
5d1877aa41
7 changed files with 574 additions and 514 deletions
  1. 474
    474
      .Rhistory
  2. 1
    0
      .gitignore
  3. 1
    1
      issuecomp-2-analysis.R
  4. 71
    34
      issuecomp-3-calc.R
  5. 0
    5
      twitter-api-credentials.txt
  6. 5
    0
      twitter-api-credentials.txt.sample
  7. 22
    0
      ur.df-testvalues.txt

+ 474
- 474
.Rhistory View File

@@ -1,281 +1,175 @@
1
-load(file = "tweets_untagged.RData")
2
-setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
3
-results_files <- "matched-ids/all.csv"
4
-load(file = "tweets_untagged.RData")
5
-View(issues)
6
-issues <- data.frame(date = drange)
7
-issuelist <- readLines("issues.xml")
8
-issuelist <- str_replace_all(string = issuelist, pattern = ".*<!-- .+ -->", "")
9
-issuelist <- xmlToList(issuelist)
10
-issueheads <- names(issuelist)
11
-issues[issueheads] <- 0
12
-tweets$issue <- ""
13
-tweets$tags <- ""
14
-View(results)
15
-rm(r, results_temp, results_files)
16
-results <- results[!duplicated(results), ]
17
-names(results) <- c("date", "id_str", "issue", "tags")
18
-results <- results[order(results$id_str), ]
19
-row.names(results) <- NULL
20
-for(r in 1:nrow(results)) {
21
-curdate <- as.character(results$date[r])
22
-curid <- as.character(results$id_str[r])
23
-curissue <- as.character(results$issue[r])
24
-curtag <- as.character(results$tags[r])
25
-cat("Sorting match", r, "of 53383 \n")
26
-# Update issue counter (date and issue)
27
-issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
28
-# Update tweet dataframe (id, issue and tags)
29
-oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
30
-tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
31
-oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
32
-tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
33
-}
34
-curdate
35
-curissue
36
-issues[issues[, "date"] == curdate, curissue]
37
-issueheads
38
-issuelist <- readLines("issues-v2.xml")
39
-issues <- data.frame(date = drange)
40
-issuelist <- readLines("issues-v2.xml")
41
-issuelist <- str_replace_all(string = issuelist, pattern = ".*<!-- .+ -->", "")
42
-issuelist <- xmlToList(issuelist)
43
-issueheads <- names(issuelist)
44
-issues[issueheads] <- 0
45
-tweets$issue <- ""
46
-tweets$tags <- ""
47
-for(r in 1:nrow(results)) {
48
-curdate <- as.character(results$date[r])
49
-curid <- as.character(results$id_str[r])
50
-curissue <- as.character(results$issue[r])
51
-curtag <- as.character(results$tags[r])
52
-cat("Sorting match", r, "of 53383 \n")
53
-# Update issue counter (date and issue)
54
-issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
55
-# Update tweet dataframe (id, issue and tags)
56
-oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
57
-tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
58
-oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
59
-tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
60
-}
61
-results[33170,]
62
-results[33171,]
63
-results$date[33170]
64
-results$date[33170] <- "2014-08-21"
65
-for(r in 33170:nrow(results)) {
66
-curdate <- as.character(results$date[r])
67
-curid <- as.character(results$id_str[r])
68
-curissue <- as.character(results$issue[r])
69
-curtag <- as.character(results$tags[r])
70
-cat("Sorting match", r, "of 53383 \n")
71
-# Update issue counter (date and issue)
72
-issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
73
-# Update tweet dataframe (id, issue and tags)
74
-oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
75
-tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
76
-oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
77
-tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
78
-}
79
-save(tweets, file="tweets_tagged.RData")
80
-write.csv(tweets, file="tweets.csv")
81
-save(issues, file="issues.RData")
1
+xlab("Zeitraum") + ylab("Tweets pro Tag") +
2
+scale_colour_discrete(name  = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) +
3
+theme(legend.title = element_text(size=14)) +
4
+theme(legend.text = element_text(size=12)) +
5
+theme(axis.text = element_text(size = 18))
6
+g_perday
7
+g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
8
+geom_line()+
9
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
10
+xlab("Zeitraum") + ylab("Tweets pro Tag") +
11
+scale_colour_discrete(name  = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) +
12
+theme(legend.title = element_text(size=14)) +
13
+theme(legend.text = element_text(size=12)) +
14
+theme(axis.title = element_text(size = 18))
15
+g_perday
16
+g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
17
+geom_line()+
18
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
19
+xlab("Zeitraum") + ylab("Tweets pro Tag") +
20
+scale_colour_discrete(name  = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) +
21
+theme(legend.title = element_text(size=14)) +
22
+theme(legend.text = element_text(size=12)) +
23
+theme(axis.title = element_text(size = 12))
24
+g_perday
25
+g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
26
+geom_line()+
27
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
28
+xlab("Zeitraum") + ylab("Tweets pro Tag") +
29
+scale_colour_discrete(name  = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) +
30
+theme(legend.title = element_text(size=14)) +
31
+theme(legend.text = element_text(size=12)) +
32
+theme(axis.title = element_text(size = 13))
33
+g_perday
34
+g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
35
+geom_line()+
36
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
37
+xlab("Zeitraum") + ylab("Tweets pro Tag") +
38
+scale_colour_discrete(name  = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) +
39
+theme(legend.title = element_text(size=14, face="plain")) +
40
+theme(legend.text = element_text(size=12)) +
41
+theme(axis.title = element_text(size = 13))
42
+g_perday
43
+g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
44
+geom_line()+
45
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
46
+xlab("Zeitraum") + ylab("Tweets pro Tag") +
47
+scale_colour_discrete(name  = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) +
48
+theme(legend.title = element_text(size=14)) +
49
+theme(legend.text = element_text(size=12)) +
50
+theme(axis.title = element_text(size = 13))
51
+g_perday
52
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
53
+geom_line() +
54
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
55
+xlab("Zeitraum") + ylab("Entropie") +
56
+scale_colour_discrete(name  = "", labels = "Entropie") +
57
+theme(legend.title = element_text(size=14)) +
58
+theme(legend.text = element_text(size=12)) +
59
+theme(axis.title = element_text(size = 13))
60
+g_entrop
61
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
62
+geom_line() +
63
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
64
+xlab("Zeitraum") + ylab("Entropie") +
65
+scale_colour_discrete(name  = "", labels = "Entropie")# +
66
+#   theme(legend.title = element_text(size=14)) +
67
+#   theme(legend.text = element_text(size=12)) +
68
+#   theme(axis.title = element_text(size = 13))
69
+g_entrop
70
+detach("package:ggplot2", unload=TRUE)
71
+library("ggplot2", lib.loc="/usr/lib/R/site-library")
72
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
73
+geom_line() +
74
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
75
+xlab("Zeitraum") + ylab("Entropie") +
76
+scale_colour_discrete(name  = "", labels = "Entropie")# +
77
+#   theme(legend.title = element_text(size=14)) +
78
+#   theme(legend.text = element_text(size=12)) +
79
+#   theme(axis.title = element_text(size = 13))
80
+g_entrop
81
+theme()
82 82
 require(stringr)
83 83
 require(reshape2)
84 84
 require(ggplot2)
85 85
 require(vars)
86
-drop_s <- which(str_detect(names(issues), "^s"))
87
-drop_i <- which(str_detect(names(issues), "^i"))
88
-issues_i <- issues[,-drop_s]
89
-issues_s <- issues[,-drop_i]
90
-issues_i$total <- rowSums(issues_i[2:ncol(issues_i)])
91
-issues_i$entropy <- 0
92
-for(r in 1:nrow(issues_i)) {
93
-curtotal <- as.numeric(issues_i$total[r])
94
-curp <- 0
95
-for(c in 2:ncol(issues_i)) {
96
-curcount <- as.numeric(issues_i[r,c])
97
-curp[c] <- curcount / curtotal
98
-}
99
-curp <- curp [2:length(curp)-2]
100
-curdrop <- which(curp==0)
101
-curp <- curp[-curdrop]
102
-issues_i$entropy[r] <- sum(-1 * curp * log(curp))
103
-}
104
-issues_s$total <- rowSums(issues_s[2:ncol(issues_s)])
105
-issues_s$entropy <- 0
106
-for(r in 1:nrow(issues_s)) {
107
-curtotal <- as.numeric(issues_s$total[r])
108
-curp <- 0
109
-for(c in 2:ncol(issues_s)) {
110
-curcount <- as.numeric(issues_s[r,c])
111
-curp[c] <- curcount / curtotal
112
-}
113
-curp <- curp [2:length(curp)-2]
114
-curdrop <- which(curp==0)
115
-curp <- curp[-curdrop]
116
-issues_s$entropy[r] <- sum(-1 * curp * log(curp))
117
-}
118
-stats_total <- data.frame(date=drange)
119
-stats_total$tpd <- 0
120
-stats_total$ipd <- issues_i$total
121
-stats_total$spd <- issues_s$total
122
-# Total number of tweets per day over time
123
-for(r in 1:length(drange)) {
124
-stats_total$tpd[r] <- length(tweets[tweets[, "created_at"] == drange[r], "id_str"])
125
-}
126
-g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
86
+theme()
87
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
88
+geom_line() +
89
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
90
+xlab("Zeitraum") + ylab("Entropie") +
91
+scale_colour_discrete(name  = "", labels = "Entropie")# +
92
+#   theme(legend.title = element_text(size=14)) +
93
+#   theme(legend.text = element_text(size=12)) +
94
+#   theme(axis.title = element_text(size = 13))
95
+g_entrop
96
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
97
+geom_line() +
98
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
99
+xlab("Zeitraum") + ylab("Entropie") +
100
+scale_colour_discrete(name  = "", labels = "Entropie")
101
+g_entrop
102
+g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
127 103
 geom_line()+
104
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
105
+xlab("Zeitraum") + ylab("Tweets pro Tag") +
106
+scale_colour_discrete(name  = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation"))
107
+g_perday
108
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
109
+geom_line() +
110
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
111
+xlab("Zeitraum") + ylab("Entropie") +
112
+scale_colour_discrete(name  = "", labels = "Entropie")
113
+g_entrop
114
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
115
+geom_line() +
128 116
 geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
129
-g1
117
+g_entrop
130 118
 stats_entropy <- data.frame(date=drange)
131 119
 stats_entropy$entropy <- issues_i$entropy
132 120
 stats_entropy <- melt(stats_entropy, id="date")
133
-g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
121
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
134 122
 geom_line() +
135
-geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
136
-g1
137
-test <- VAR(issues[,2:32], p=1, type="none")
138
-View(issues_i)
139
-View(issues_s)
140
-View(issues)
141
-test <- VAR(issues[,2:44], p=1, type="none")
142
-VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
143
-plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
144
-rm(c_correct, c_curissue, c_errcode, c_errid, c_errissue, c_error1, c_error2, c_errors)
145
-rm(c_issues, c_issuelist, c_issueheads)
146
-rm(c_errtags, c_errtext, c_result, c_tag, c_tmp, c_tweets)
147
-require(stringr)
148
-require(XML)
149
-readYN <- function(question) {
150
-n <- readline(prompt=question)
151
-n <- as.character(n)
152
-return(n)
153
-}
154
-checkIssue <- function(string, issuelist) {
155
-status <- any(str_detect(string, issuelist))
156
-return(status)
157
-}
158
-checkAllIssues <- function(string, issuelist) {
159
-status <- NULL
160
-for(i in 1:length(string)) {
161
-if(checkIssue(string[i], issuelist)) {
162
-status[i] <- TRUE
163
-}
164
-else {
165
-cat("Issue",string[i],"does not exist. Please try again.\n")
166
-status[i] <- FALSE
167
-}
168
-}
169
-return(status)
170
-}
171
-View(tweets)
172
-c_tweets <- read.csv("tweets.csv", colClasses="character")
173
-for(r in 1:nrow(c_tweets)) {
174
-curtext <- as.character(c_tweets$text[r])
175
-if(str_detect(curtext, "\"")) {
176
-c_tweets$text[r] <- str_replace(curtext, "\"", "")
177
-}
123
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
124
+xlab("Zeitraum") + ylab("Entropie") +
125
+scale_colour_discrete(name  = "", labels = "Entropie")
126
+g_entrop
127
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
128
+geom_line() +
129
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
130
+xlab("Zeitraum") + ylab("Entropie") +
131
+scale_colour_discrete(name  = "", labels = "Entropie") +
132
+theme(legend.title = element_text(size=14)) +
133
+theme(legend.text = element_text(size=12)) +
134
+theme(axis.title = element_text(size = 13))
135
+g_entrop
136
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
137
+geom_line() +
138
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
139
+xlab("Zeitraum") + ylab("Entropie") +
140
+scale_colour_discrete(name  = "", labels = "Entropie") +
141
+theme(legend.title = element_text(size=14)) +
142
+theme(legend.text = element_text(size=12)) +
143
+theme(axis.title = element_text(size = 14))
144
+g_entrop
145
+g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
146
+geom_line()+
147
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
148
+xlab("Zeitraum") + ylab("Tweets pro Tag") +
149
+scale_colour_discrete(name  = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) +
150
+theme(legend.title = element_text(size=14)) +
151
+theme(legend.text = element_text(size=12)) +
152
+theme(axis.title = element_text(size = 14))
153
+g_perday
154
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
155
+geom_line() +
156
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
157
+xlab("Zeitraum") + ylab("Entropie") +
158
+scale_colour_discrete(name  = "", labels = "Entropie") +
159
+theme(legend.title = element_text(size=14)) +
160
+theme(legend.text = element_text(size=12)) +
161
+theme(axis.title = element_text(size = 14))
162
+g_entrop
163
+acc_parties <- data.frame(party = c("cducsu", "spd", "linke", "gruene"))
164
+acc_parties$btw13 <- c(49.3, 30.6, 10.1, 10.0) # seats of party / 631 seats
165
+acc_parties$twitter <- 0
166
+for(p in 1:nrow(acc_parties)) {
167
+acc_parties$twitter[p] <- round(nrow(acc_df[acc_df$party == as.character(acc_parties$party[p]), ]) / 280 * 100)
178 168
 }
179
-c_tweets$X <- NULL
180
-c_issues <- data.frame(date = drange)
181
-c_issuelist <- xmlToList("issues-v2.xml")
182
-c_issueheads <- names(issuelist)
183
-c_issues[issueheads] <- 0
184
-source("issuecomp-codingsample-function.R")
185
-require(stringr)
186
-curtext
187
-curtext <- str_replace_all(curtext, "#", "")
188
-curtext <- str_replace_all(curtext, "-", " ")
189
-curtext
190
-curtext
191
-str_replace_all(curtext, "[^[:alnum:]]", "")
192
-str_replace_all(curtext, "[^[:alnum:]\s]", "")
193
-str_replace_all(curtext, "[^[:alnum:]\\s]", "")
194
-str_replace_all(curtext, "[^[:alnum:]^\\s]", "")
195
-str_replace_all(curtext, "[^[:alnum:]^\\S]", "")
196
-str_replace_all(curtext, "[^[:alnum:]][^\\s]", "")
197
-str_replace_all(curtext, "[^[:alnum:]][^\\S]", "")
198
-str_replace_all(curtext, "[^[:alnum:]][^[:blank]]", "")
199
-str_replace_all(curtext, "[^[:alnum:]][^[:blank:]]", "")
200
-str_replace_all(curtext, "[^[:alnum:]]", "")
201
-str_replace_all(curtext, "\\W", "")
202
-str_replace_all(curtext, "[\\W|\\S]", "")
203
-str_replace_all(curtext, "(\\W|\\S)", "")
204
-str_replace_all(curtext, "\\W|\\S", "")
205
-str_replace_all(curtext, "\\W", "")
206
-str_replace_all(curtext, "[\\W\\S]", "")
207
-str_replace_all(curtext, "[\\S\\W]", "")
208
-str_replace_all(curtext, "[\\s\\W]", "")
209
-str_replace_all(curtext, "[\\W\\s]", "")
210
-str_replace_all(curtext, "[\\W\s]", "")
211
-str_replace_all(curtext, "[\\Ws]", "")
212
-str_replace_all(curtext, "[\\W]", "")
213
-str_replace_all(curtext, "\\W", "")
214
-str_replace_all(curtext, "\\W|\\S", "")
215
-str_replace_all(curtext, "\\W|\\s", "")
216
-str_replace_all(curtext, "[^[:alnum:]]", "")
217
-str_replace_all(curtext, "[^[:alnum:] ]", "")
218
-str_replace_all(curtext, "[^[:alnum:]\\s]", "")
219
-str_replace_all(curtext, "[^[:alnum:] ]", "")
220
-curtext
221
-curtext <- "liebe @cdu, wir finden #Steuer gut, aber die KFZ-Steuer nicht!"
222
-curtext <- str_replace_all(curtext, "-", " ")
223
-curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "")
224
-curtext
225
-curtext <- "liebe @cdu, wir finden #Steuer gut, aber die KFZ--Steuer nicht!"
226
-curtext <- str_replace_all(curtext, "-", " ")
227
-curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "")
228
-curtext
229
-str_replace_all(curtext, "  ", " ")
230
-smartPatternMatch
231
-require(vars)
169
+require(jsonlite)
232 170
 require(stringr)
233
-adf1 <- summary(ur.df(issues))
234
-issues
235
-summary(issues)
236
-summary(issues[2:44])
237
-summary(issues[2:44], digits = 2)
238
-adf1 <- summary(ur.df(issues[, 2:44]), type ="trend", lags=1)
239
-data("Canda")
240
-data("Canada")
241
-class(Canada)
242
-class(issues)
243
-view(Canada)
244
-View(Canada)
245
-as.ts(issues)
246
-issues_ts <- as.ts(issues)
247
-class(issues_ts)
248
-View(issues_ts)
249
-View(issues)
250
-adf1 <- summary(ur.df(issues_ts[, 2:44]), type ="trend", lags=1)
251
-adf1 <- summary(ur.df(issues_ts[, 2]), type ="trend", lags=1)
252
-adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
253
-adf1 <- summary(ur.df(issues_ts[, 2:44], type ="trend", lags=1))
254
-adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
255
-adf1
256
-adf1 <- summary(ur.df(issues_ts[, 3], type ="trend", lags=1))
257
-adf1
258
-adf1 <- summary(ur.df(issues_ts[, 2], type ="none", lags=1))
259
-adf1
260
-adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
261
-adf1
262
-summary(ur.df(issues_ts[, 2], type ="none", lags=1))
263
-VARselect(issues_ts[2:44], lag.max = 8, type = "both")
264
-VARselect(issues_ts[1:44], lag.max = 8, type = "both")
265
-VARselect(issues[1:44], lag.max = 8, type = "both")
266
-VARselect(issues[2:44], lag.max = 8, type = "both")
267
-VARselect(issues_ts[2:44], lag.max = 8, type = "both")
268
-VARselect(issues[2:44], lag.max = 8, type = "none")
269
-VARselect(issues[2:44], lag.max = 8, type = "trend")
270
-VARselect(issues[2:44], lag.max = 8, type = "const")
271
-VARselect(issues[2:44], lag.max = 8, type = "both")
272
-test <- VAR(issues[,2:44], p=1, type="both")
273
-# VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
274
-plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
275
-summary(ur.df(issues_ts[, 2], type ="both", lags=1))
276
-summary(ur.df(issues_ts[, 2], type ="none", lags=1))
277
-test <- VAR(issues_ts[,2:44], p=1, type="both")
278
-plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
171
+require(devtools)
172
+require(RTwitterAPI)
279 173
 acc_df <- read.csv("MdB-twitter.csv")
280 174
 delrow <- NULL
281 175
 for(r in 1:nrow(acc_df)) {
@@ -288,225 +182,331 @@ acc_df <- acc_df[-delrow, ]
288 182
 rm(delrow, r, acc)
289 183
 acc_df$row.names <- NULL
290 184
 row.names(acc_df) <- NULL
291
-View(acc_df)
292
-View(c_tweets)
185
+acc_parties <- data.frame(party = c("cducsu", "spd", "linke", "gruene"))
186
+acc_parties$btw13 <- c(49.3, 30.6, 10.1, 10.0) # seats of party / 631 seats
187
+acc_parties$twitter <- 0
188
+for(p in 1:nrow(acc_parties)) {
189
+acc_parties$twitter[p] <- round(nrow(acc_df[acc_df$party == as.character(acc_parties$party[p]), ]) / 280 * 100)
190
+}
191
+pie(acc_parties$btw13, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T,
192
+main = "Seats of parties in the parliament")
193
+pie(acc_parties$twitter, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T,
194
+main = "Percentage of parties' MdBs of all Twitter accounts")
195
+pie(acc_parties$btw13, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T)
196
+pie(acc_parties$twitter, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T)
197
+View(acc_parties)
198
+pie(acc_parties$btw13, col=c("black", "red", "purple", "green"),
199
+labels = c("CDU/CSU (49.3%)", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T)
200
+pie(acc_parties$btw13, col=c("black", "red", "purple", "green"),
201
+labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Die LINKE (10,1%)", "Bündnis 90/Grüne(10.0%)"),
202
+clockwise = T)
203
+acc_parties <- data.frame(party = c("cducsu", "spd", "gruene", "linke"))
204
+acc_parties$btw13 <- c(49.3, 30.6, 10.0, 10.1) # seats of party / 631 seats
205
+acc_parties$twitter <- 0
206
+for(p in 1:nrow(acc_parties)) {
207
+acc_parties$twitter[p] <- round(nrow(acc_df[acc_df$party == as.character(acc_parties$party[p]), ]) / 280 * 100)
208
+}
209
+pie(acc_parties$btw13, col=c("black", "red", "green", "purple"),
210
+labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10.0%)", "Die LINKE (10,1%)"),
211
+clockwise = T)
212
+pie(acc_parties$btw13, col=c("black", "red", "green", "purple"),
213
+pie(acc_parties$btw13, col=c("black", "red", "green", "purple"),
214
+labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10,0%)", "Die LINKE (10,1%)"),
215
+clockwise = T)
216
+pie(acc_parties$btw13, col=c("black", "red", "green", "purple"),
217
+labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10,0%)", "Die LINKE (10,1%)"),
218
+clockwise = T)
219
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
220
+labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
221
+clockwise = T)
222
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
223
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
224
+labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
225
+clockwise = T, init.angle = 90)
226
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
227
+labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
228
+clockwise = T)
229
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
230
+labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
231
+clockwise = T, init.angle = 180)
232
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
233
+labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
234
+clockwise = T, init.angle = 270)
235
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
236
+labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
237
+clockwise = T, init.angle = 360)
238
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
239
+labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
240
+clockwise = T, init.angle = 20)
241
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
242
+labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
243
+clockwise = T, init.angle = 20)
244
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
245
+labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
246
+clockwise = T, init.angle = 90)
247
+pie(acc_parties$btw13, col=c("black", "red", "green", "purple"),
248
+labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10,0%)", "Die LINKE (10,1%)"),
249
+clockwise = T)
250
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"),
251
+labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"),
252
+clockwise = T)
253
+2359 / 200 * 100
254
+issues_ts <- as.ts(issues[,2:44])
255
+VARselect(issues_ts, lag.max = 5, type = "both")
256
+vIssues <- VAR(issues_ts, p=5, type="both")
257
+vIssues <- VAR(issues_ts, p=1, type="both")
258
+issues_ts <- as.ts(issues)
259
+VARselect(issues[2:44], lag.max = 8, type = "both")
260
+summary(ur.df(issues_ts[, 2], type ="none", lags=1))
261
+VARselect(issues_ts, lag.max = 5, type = "both")
262
+issues_ts <- as.ts(issues[,2:44])
263
+VARselect(issues_ts, lag.max = 5, type = "both")
264
+VARselect(issues_ts, lag.max = 5, type = "both")
265
+VARselect(issues_ts, lag.max = 5, type = "both")
266
+VARselect(issues_ts, lag.max = 5, type = "both")
267
+VARselect(issues_ts, lag.max = 5, type = "both")
268
+VARselect(issues_ts, lag.max = 5, type = "both")
269
+VARselect(issues_ts, lag.max = 5, type = "both")
270
+summary(ur.df(issues_ts[, 2], type ="none", lags=1))
271
+ur.df(issues_ts[, 2], type ="none", lags=1)
272
+head(issues_ts)
273
+issues_ts$i1.macro
274
+issues_ts[, "i1.macro"]
275
+summary(ur.df(issues_ts[, "i1.macro"], type ="none", lags=1))
276
+ncol(issues_ts)
277
+for(i in 2:ncol(issues_ts)) {
278
+summary(ur.df(issues_ts[, i], type ="none", lags=1))
279
+}
280
+cat(summary(ur.df(issues_ts[, "i1.macro"], type ="none", lags=1)))
281
+paste(summary(ur.df(issues_ts[, "i1.macro"], type ="none", lags=1)))
282
+i
283
+summary(ur.df(issues_ts[, i], type ="none", lags=1))
284
+summary(ur.df(issues_ts[, 1], type ="none", lags=1))
285
+summary(ur.df(issues_ts[, 2], type ="none", lags=1))
286
+summary(ur.df(issues_ts[, 1], type ="none", lags=1))
287
+summary(ur.df(issues_ts[, 1], type ="none", lags=1))
288
+names(issues_ts)
289
+issues_ts[1,]
290
+summary(ur.df(issues_ts[, 1], type ="none", lags=1))
291
+summary(ur.df(issues_ts[, 2], type ="none", lags=1))
292
+summary(ur.df(issues_ts[, 43], type ="none", lags=1))
293
+summary(ur.df(issues_ts[, 43], type ="trend", lags=1))
294
+summary(ur.df(issues_ts[, 43], type ="none", lags=1))
295
+summary(ur.df(issues_ts[, 1], type ="none", lags=1))
296
+ur.df(issues_ts[, 1], type ="none", lags=1)
297
+for(i in 2:ncol(issues_ts)) {
298
+ur.df(issues_ts[, i], type ="none", lags=1)
299
+}
300
+ur.df(issues_ts[, i], type ="none", lags=1)
301
+test <- ur.df(issues_ts[, i], type ="none", lags=1)
302
+test
303
+for(i in 2:ncol(issues_ts)) {
304
+test <- ur.df(issues_ts[, i], type ="none", lags=1)
305
+cat(test)
306
+}
307
+class(test)
308
+as.character(test)
309
+for(i in 2:ncol(issues_ts)) {
310
+test[i] <- ur.df(issues_ts[, i], type ="none", lags=1)
311
+}
312
+ur.df(issues_ts[, 1], type ="none", lags=1)
313
+ur.df(issues_ts[, sprintf("%s", c(1,2,3))], type ="none", lags=1)
314
+ur.df(issues_ts[, sprintf("%i", c(1,2,3))], type ="none", lags=1)
315
+ur.df(issues_ts[, 2], type ="none", lags=1)
316
+ur.df(issues_ts[, 3], type ="none", lags=1)
317
+i <- 0
318
+i <- i + 1
319
+ur.df(issues_ts[, i], type ="none", lags=1)
320
+i <- i + 1
321
+ur.df(issues_ts[, i], type ="none", lags=1)
322
+i <- i + 1
323
+ur.df(issues_ts[, i], type ="none", lags=1)
324
+i <- i + 1
325
+ur.df(issues_ts[, i], type ="none", lags=1)
326
+i <- i + 1
327
+ur.df(issues_ts[, i], type ="none", lags=1)
328
+i <- i + 1
329
+ur.df(issues_ts[, i], type ="none", lags=1)
330
+i <- i + 1
331
+ur.df(issues_ts[, i], type ="none", lags=1)
332
+i <- i + 1
333
+ur.df(issues_ts[, i], type ="none", lags=1)
334
+i <- i + 1
335
+ur.df(issues_ts[, i], type ="none", lags=1)
336
+i <- i + 1
337
+ur.df(issues_ts[, i], type ="none", lags=1)
338
+i <- i + 1
339
+ur.df(issues_ts[, i], type ="none", lags=1)
340
+i <- i + 1
341
+ur.df(issues_ts[, i], type ="none", lags=1)
342
+i <- i + 1
343
+ur.df(issues_ts[, i], type ="none", lags=1)
344
+i <- i + 1
345
+ur.df(issues_ts[, i], type ="none", lags=1)
346
+i <- i + 1
347
+ur.df(issues_ts[, i], type ="none", lags=1)
348
+i <- i + 1
349
+ur.df(issues_ts[, i], type ="none", lags=1)
350
+i <- i + 1
351
+ur.df(issues_ts[, i], type ="none", lags=1)
352
+i <- i + 1
353
+ur.df(issues_ts[, i], type ="none", lags=1)
354
+i <- i + 1
355
+ur.df(issues_ts[, i], type ="none", lags=1)
356
+i <- i + 1
357
+ur.df(issues_ts[, i], type ="none", lags=1)
358
+i <- i + 1
359
+ur.df(issues_ts[, i], type ="none", lags=1)
360
+i <- i + 1
361
+ur.df(issues_ts[, i], type ="none", lags=1)
362
+i <- i + 1
363
+ur.df(issues_ts[, i], type ="none", lags=1)
364
+i <- i + 1
365
+ur.df(issues_ts[, i], type ="none", lags=1)
366
+i <- i + 1
367
+ur.df(issues_ts[, i], type ="none", lags=1)
368
+i <- i + 1
369
+ur.df(issues_ts[, i], type ="none", lags=1)
370
+i <- i + 1
371
+ur.df(issues_ts[, i], type ="none", lags=1)
372
+i <- i + 1
373
+ur.df(issues_ts[, i], type ="none", lags=1)
374
+i <- i + 1
375
+ur.df(issues_ts[, i], type ="none", lags=1)
376
+i <- i + 1
377
+ur.df(issues_ts[, i], type ="none", lags=1)
378
+i <- i + 1
379
+ur.df(issues_ts[, i], type ="none", lags=1)
380
+i <- i + 1
381
+ur.df(issues_ts[, i], type ="none", lags=1)
382
+i <- i + 1
383
+ur.df(issues_ts[, i], type ="none", lags=1)
384
+i <- i + 1
385
+ur.df(issues_ts[, i], type ="none", lags=1)
386
+i <- i + 1
387
+ur.df(issues_ts[, i], type ="none", lags=1)
388
+i <- i + 1
389
+ur.df(issues_ts[, i], type ="none", lags=1)
390
+i <- i + 1
391
+ur.df(issues_ts[, i], type ="none", lags=1)
392
+i <- i + 1
393
+ur.df(issues_ts[, i], type ="none", lags=1)
394
+i <- i + 1
395
+ur.df(issues_ts[, i], type ="none", lags=1)
396
+i <- i + 1
397
+ur.df(issues_ts[, i], type ="none", lags=1)
398
+i <- i + 1
399
+ur.df(issues_ts[, i], type ="none", lags=1)
400
+i <- i + 1
401
+ur.df(issues_ts[, i], type ="none", lags=1)
402
+i <- i + 1
403
+ur.df(issues_ts[, i], type ="none", lags=1)
404
+i <- i + 1
405
+ur.df(issues_ts[, i], type ="none", lags=1)
406
+i
293 407
 issueheads
294
-length(issueheads)
295
-issuelist
296
-length(issuelist)
297
-length(issuelist[*])
298
-length(issuelist[[*]])
299
-length(issuelist[1:43])
300
-length(issuelist[1)
301
-length(issuelist[1])
302
-length(issuelist[2])
303
-length(issuelist[[1]])
304
-length(issuelist[[2]])
305
-length(issuelist[[70]])
306
-length(issuelist[[43]])
307
-length(issuelist[[44]])
308
-length(issuelist[[1:43]])
309
-length(issuelist[[1-43]])
310 408
 length(issuelist[[2]])
311
-test <- 0
312
-num <- 0
313
-for(i in 1:length(issuelist)) {
314
-j <- length(issuelist[[i]])
315
-num <- num + j
316
-rm(j)
409
+# List all issues in one row
410
+for(i in 1:length(issueheads)) {
411
+cat(issueheads[i], "\n")
317 412
 }
318
-num
319
-drop_s <- which(str_detect(names(issues), "^s"))
320
-drop_i <- which(str_detect(names(issues), "^i"))
321
-issues_i <- issues[,-drop_s]
322
-issues_s <- issues[,-drop_i]
413
+vIssues <- VAR(issues_ts, p=1, type="both")
414
+issues_ts[1:20,1]
415
+issues_ts[,1]
416
+issues_ts[1,]
417
+issues_ts[1:21,]
418
+issues_ts[,1:21]
419
+issues_ts[1,1:21]
420
+issues_ts[1,22:43]
421
+issues_ts[1,22:44]
422
+issues_ts[1,22:43]
423
+plot(irf(vIssues, impulse = names(issues_ts[1:21]), response = names(issues_i[22:43])))
323 424
 require(stringr)
324
-drop_s <- which(str_detect(names(issues), "^s"))
325
-drop_i <- which(str_detect(names(issues), "^i"))
326
-issues_i <- issues[,-drop_s]
327
-issues_s <- issues[,-drop_i]
328
-issues_i$total <- rowSums(issues_i[2:ncol(issues_i)])
329
-issues_i$entropy <- 0
330
-for(r in 1:nrow(issues_i)) {
331
-curtotal <- as.numeric(issues_i$total[r])
332
-curp <- 0
333
-for(c in 2:ncol(issues_i)) {
334
-curcount <- as.numeric(issues_i[r,c])
335
-curp[c] <- curcount / curtotal
336
-}
337
-curp <- curp [2:length(curp)-2]
338
-curdrop <- which(curp==0)
339
-curp <- curp[-curdrop]
340
-issues_i$entropy[r] <- sum(-1 * curp * log(curp))
341
-}
342
-issues_s$total <- rowSums(issues_s[2:ncol(issues_s)])
343
-issues_s$entropy <- 0
344
-for(r in 1:nrow(issues_s)) {
345
-curtotal <- as.numeric(issues_s$total[r])
425
+require(reshape2)
426
+require(ggplot2)
427
+require(vars)
428
+vIssues
429
+plot(irf(vIssues, impulse = names(issues_ts[1:21]), response = names(issues_i[22:43])))
430
+plot(irf(vIssues, impulse = names(issues_ts[1:21]), response = names(issues_ts[22:43])))
431
+issues_s
432
+names(issues_s)
433
+names(issues_s[2:23])
434
+names(issuesi[2:22])
435
+names(issues_i[2:22])
436
+plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
437
+plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
438
+plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
439
+irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22]))
440
+vIRF <- irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22]))
441
+summary(vIRF)
442
+vIRF$irf
443
+vIRF$boot
444
+vIRF$ortho
445
+vIRF$Lower
446
+vIRF$irf[1]
447
+vIRF$irf["s.boko"]
448
+summary(issues$hk)
449
+summary(issues$s.hk)
450
+summary(issues$s.nsa)
451
+summary(issues$s.gaza)
452
+summary(issues$s.boko)
453
+summary(issues$s.ebola)
454
+summary(issues$s.edathy)
455
+summary(issues$s.ferguson)
456
+summary(issues$s.gurlitt)
457
+summary(issues$s.is)
458
+summary(issues$s.pegida)
459
+summary(issues$s.schumi)
460
+summary(issues$s.tebartz)
461
+summary(issues$s.wm)
462
+summary(issues$s.wulff)
463
+plot(vIRF)
464
+names(issues)
465
+summary(issues[2:44])
466
+plot(vIRF)
467
+plot(vIRF, x=300, y=200)
468
+plot(vIRF, res = 300)
469
+plot(vIRF[1])
470
+plot(vIRF$irf[1])
471
+summary(issues[2:44])
472
+ur.df(issues_ts[, i], type ="none", lags=1)
473
+summary(ur.df(issues_ts[, i], type ="none", lags=1))
474
+summary(ur.df(issues_ts[,30], type ="none", lags=1))
475
+summary(issues[2])
476
+stats_entropy
477
+names(issues)
478
+issues_bak <- issues
479
+issues$total <- rowSums(issues[2:ncol(issues)])
480
+issues$entropy <- 0
481
+names(issues)
482
+issues$total <- rowSums(issues[2:ncol(issues)])
483
+issues$entropy <- 0
484
+for(r in 1:nrow(issues)) {
485
+curtotal <- as.numeric(issues$total[r])
346 486
 curp <- 0
347
-for(c in 2:ncol(issues_s)) {
348
-curcount <- as.numeric(issues_s[r,c])
487
+for(c in 2:ncol(issues)) {
488
+curcount <- as.numeric(issues[r,c])
349 489
 curp[c] <- curcount / curtotal
350 490
 }
351 491
 curp <- curp [2:length(curp)-2]
352 492
 curdrop <- which(curp==0)
353 493
 curp <- curp[-curdrop]
354
-issues_s$entropy[r] <- sum(-1 * curp * log(curp))
355
-}
356
-stats_total <- data.frame(date=drange)
357
-stats_total$tpd <- 0
358
-stats_total$ipd <- issues_i$total
359
-stats_total$spd <- issues_s$total
360
-# Total number of tweets per day over time
361
-for(r in 1:length(drange)) {
362
-stats_total$tpd[r] <- length(tweets[tweets[, "created_at"] == drange[r], "id_str"])
494
+issues$entropy[r] <- sum(-1 * curp * log(curp))
363 495
 }
364
-stats_melt <- melt(stats_total, id="date")
365
-g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
366
-geom_line()+
367
-geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
368
-g1
369
-require(ggplot2)
370
-stats_melt <- melt(stats_total, id="date")
371
-g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
372
-geom_line()+
373
-geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
374
-g1
375
-g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
376
-geom_line()+
377
-geom_smooth(size=1,formula = y ~ x, method="lm", se=FALSE, color=1)
378
-g1
379
-g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
380
-geom_line()+
381
-geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
382
-g1
383
-# Visuals for entropy in time series
384 496
 stats_entropy <- data.frame(date=drange)
385
-stats_entropy$entropy <- issues_i$entropy
386
-stats_entropy <- melt(stats_entropy, id="date")
387
-require(reshape2)
388
-stats_melt <- melt(stats_total, id="date")
389
-g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
390
-geom_line()+
391
-geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
392
-g1
393
-stats_entropy <- data.frame(date=drange)
394
-stats_entropy$entropy <- issues_i$entropy
497
+stats_entropy$entropy <- issues$entropy
395 498
 stats_entropy <- melt(stats_entropy, id="date")
396
-g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
499
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
397 500
 geom_line() +
398
-geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
399
-g1
400
-g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
401
-geom_line() +
402
-geom_smooth(size=1,formula = y ~ x, method="lm", se=FALSE, color=1)
403
-g1
404
-vIssues <- VAR(issues_ts[,2:44], p=1, type="both")
405
-require(vars)
406
-VARselect(issues_ts, lag.max = 8, type = "both")
407
-vIssues <- VAR(issues_ts[,2:44], p=1, type="both")
408
-VARselect(issues_ts, lag.max = 16, type = "both")
409
-VARselect(issues_ts, lag.max = 4, type = "both")
410
-VARselect(issues_ts, lag.max = 5, type = "both")
411
-VARselect(issues_ts, lag.max = 6, type = "both")
412
-VARselect(issues_ts, lag.max = 5, type = "both")
413
-names(issues_ts)
414
-issues_ts
415
-issues_ts[2:44]
416
-issues_ts <- as.ts(issues[,2:44])
417
-issues_ts[1:1]
418
-issues_ts[,1]
419
-issues_ts[1,1]
420
-issues_ts2,1]
421
-issues_ts[2,1]
422
-issues_ts <- as.ts(issues[,2:44])
423
-VARselect(issues_ts, lag.max = 5, type = "both")
424
-VARselect(issues_ts, lag.max = 8, type = "both")
425
-VARselect(issues_ts, lag.max = 7, type = "both")
426
-VARselect(issues_ts, lag.max = 5, type = "both")
427
-vIssues <- VAR(issues_ts[,2:44], p=5, type="both")
428
-vIssues <- VAR(issues_ts, p=5, type="both")
429
-plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
430
-require(stringr)
431
-require(XML)
432
-c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
433
-names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
434
-for(r in 1:nrow(c_errors)) {
435
-c_errcode <- as.character(c_errors$code[r])
436
-c_errissue <- as.character(c_errors$issue[r])
437
-c_errtags <- as.character(c_errors$tags[r])
438
-c_errtext <- as.character(c_errors$text[r])
439
-c_errid <- as.character(c_errors$str_id[r])
440
-cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
441
-source("issuecomp-codingsample-function2.R")
442
-}
443
-for(r in 1:nrow(c_errors)) {
444
-c_errcode <- as.character(c_errors$code[r])
445
-c_errissue <- as.character(c_errors$issue[r])
446
-c_errtags <- as.character(c_errors$tags[r])
447
-c_errtext <- as.character(c_errors$text[r])
448
-c_errid <- as.character(c_errors$str_id[r])
449
-cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
450
-source("issuecomp-codingsample-function2.R")
451
-}
452
-issueheads
453
-for(i in 1:length(issueheads)) {paste(issueheads[i])}
454
-for(i in 1:length(issueheads)) {cat(issueheads[i], "\n")}
455
-for(r in 1:nrow(c_errors)) {
456
-c_errcode <- as.character(c_errors$code[r])
457
-c_errissue <- as.character(c_errors$issue[r])
458
-c_errtags <- as.character(c_errors$tags[r])
459
-c_errtext <- as.character(c_errors$text[r])
460
-c_errid <- as.character(c_errors$str_id[r])
461
-cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
462
-source("issuecomp-codingsample-function2.R")
463
-}
464
-for(r in 1:nrow(c_errors)) {
465
-c_errcode <- as.character(c_errors$code[r])
466
-c_errissue <- as.character(c_errors$issue[r])
467
-c_errtags <- as.character(c_errors$tags[r])
468
-c_errtext <- as.character(c_errors$text[r])
469
-c_errid <- as.character(c_errors$str_id[r])
470
-cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
471
-source("issuecomp-codingsample-function2.R")
472
-}
473
-c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
474
-names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
475
-for(r in 1:nrow(c_errors)) {
476
-c_errcode <- as.character(c_errors$code[r])
477
-c_errissue <- as.character(c_errors$issue[r])
478
-c_errtags <- as.character(c_errors$tags[r])
479
-c_errtext <- as.character(c_errors$text[r])
480
-c_errid <- as.character(c_errors$str_id[r])
481
-cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
482
-source("issuecomp-codingsample-function2.R")
483
-}
484
-c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
485
-names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
486
-c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]
487
-c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
488
-names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
489
-c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]
490
-View(c_error2)
491
-summary(ur.df(issues_ts[, 2], type ="none", lags=1))
492
-require(vars)
493
-summary(ur.df(issues_ts[, 2], type ="none", lags=1))
494
-stability(vIssues)
495
-stability(vIssues[2:])
496
-stability(vIssues[2:44])
497
-plot(stability(vIssues))
498
-class(vIssues)
499
-summary(vIssues)
500
-plot(stability(vIssues[2]))
501
-plot(stability(vIssues), nc=2)
502
-plot(stability(vIssues), h=0.15)
503
-stability(vIssues)
504
-efp(formula = formula, data = data, type = type, h = h, dynamic = dynamic,
505
-rescale = rescale)
506
-plot(stability(vIssues), h=0.15)
507
-plot(stability(vIssues, h=0.15))
508
-plot(stability(vIssues, h=0.15, rescale = TRUE))
509
-plot(stability(vIssues, h=0.15, rescale = TRUE), nc=2)
510
-par("mar")
511
-par(mar=c(1,1,1,1))
512
-plot(stability(vIssues, h=0.15, rescale = TRUE), nc=2)
501
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
502
+xlab("Zeitraum") + ylab("Entropie") +
503
+scale_colour_discrete(name  = "", labels = "Entropie") +
504
+theme(legend.title = element_text(size=14)) +
505
+theme(legend.text = element_text(size=12)) +
506
+theme(axis.title = element_text(size = 14))
507
+g_entrop
508
+View(issues)
509
+summary(issues$entropy)
510
+summary(issues$total)
511
+stats_total
512
+summary(issues[2:44])

+ 1
- 0
.gitignore View File

@@ -6,3 +6,4 @@ issuecomp-analysis.log
6 6
 issuecomp-codingsample-correct.csv
7 7
 issuecomp-codingsample-error.csv
8 8
 issuecomp-codingsample-error2.csv
9
+twitter-api-credentials.txt

+ 1
- 1
issuecomp-2-analysis.R View File

@@ -21,7 +21,7 @@ drange <- date_start + days(0:drange)
21 21
 # Import issues and prepare everything
22 22
 # Will only be filled after the large categorisation loop
23 23
 issues <- data.frame(date = drange)
24
-issuelist <- readLines("issues-v2.xml")
24
+issuelist <- readLines("issues-v3.xml")
25 25
 issuelist <- str_replace_all(string = issuelist, pattern = ".*<!-- .+ -->", "")
26 26
 issuelist <- xmlToList(issuelist)
27 27
 issueheads <- names(issuelist)

+ 71
- 34
issuecomp-3-calc.R View File

@@ -7,7 +7,7 @@ require(vars)
7 7
 drop_s <- which(str_detect(names(issues), "^s"))
8 8
 drop_i <- which(str_detect(names(issues), "^i"))
9 9
 issues_i <- issues[,-drop_s]
10
-issues_s <- issues[,-drop_i]
10
+issues <- issues[,-drop_i]
11 11
 
12 12
 # #
13 13
 # ENTROPY
@@ -15,7 +15,6 @@ issues_s <- issues[,-drop_i]
15 15
 # Entropy non-sensational issues
16 16
 issues_i$total <- rowSums(issues_i[2:ncol(issues_i)])
17 17
 issues_i$entropy <- 0
18
-
19 18
 for(r in 1:nrow(issues_i)) {
20 19
   curtotal <- as.numeric(issues_i$total[r])
21 20
   curp <- 0
@@ -30,71 +29,102 @@ for(r in 1:nrow(issues_i)) {
30 29
 }
31 30
 
32 31
 # Entropy sensational issues
33
-issues_s$total <- rowSums(issues_s[2:ncol(issues_s)])
34
-issues_s$entropy <- 0
32
+issues$total <- rowSums(issues[2:ncol(issues)])
33
+issues$entropy <- 0
34
+for(r in 1:nrow(issues)) {
35
+  curtotal <- as.numeric(issues$total[r])
36
+  curp <- 0
37
+  for(c in 2:ncol(issues)) {
38
+    curcount <- as.numeric(issues[r,c])
39
+    curp[c] <- curcount / curtotal
40
+  }
41
+  curp <- curp [2:length(curp)-2]
42
+  curdrop <- which(curp==0)
43
+  curp <- curp[-curdrop]
44
+  issues$entropy[r] <- sum(-1 * curp * log(curp))
45
+}
35 46
 
36
-for(r in 1:nrow(issues_s)) {
37
-  curtotal <- as.numeric(issues_s$total[r])
47
+# Entropy overall
48
+issues$total <- rowSums(issues[2:ncol(issues)])
49
+issues$entropy <- 0
50
+for(r in 1:nrow(issues)) {
51
+  curtotal <- as.numeric(issues$total[r])
38 52
   curp <- 0
39
-  for(c in 2:ncol(issues_s)) {
40
-    curcount <- as.numeric(issues_s[r,c])
53
+  for(c in 2:ncol(issues)) {
54
+    curcount <- as.numeric(issues[r,c])
41 55
     curp[c] <- curcount / curtotal
42 56
   }
43 57
   curp <- curp [2:length(curp)-2]
44 58
   curdrop <- which(curp==0)
45 59
   curp <- curp[-curdrop]
46
-  issues_s$entropy[r] <- sum(-1 * curp * log(curp))
60
+  issues$entropy[r] <- sum(-1 * curp * log(curp))
47 61
 }
48 62
 
49 63
 
50
-# Compare total tweets vs. total issue findings
64
+
65
+# Compare total tweets vs. total sensational & total unsensational
51 66
 stats_total <- data.frame(date=drange)
52 67
 stats_total$tpd <- 0
53 68
 stats_total$ipd <- issues_i$total
54
-stats_total$spd <- issues_s$total
69
+stats_total$spd <- issues$total
55 70
 # Total number of tweets per day over time
56 71
 for(r in 1:length(drange)) {
57 72
   stats_total$tpd[r] <- length(tweets[tweets[, "created_at"] == drange[r], "id_str"])
58 73
 }
59 74
 
75
+# VISUALS: Tweets per day vs. sensational vs. general findings
60 76
 stats_melt <- melt(stats_total, id="date")
61
-g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + 
77
+g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + 
62 78
   geom_line()+ 
63
-  geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
64
-g1
79
+  geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
80
+  xlab("Zeitraum") + ylab("Tweets pro Tag") + 
81
+  scale_colour_discrete(name  = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) + 
82
+  theme(legend.title = element_text(size=14)) + 
83
+  theme(legend.text = element_text(size=12)) + 
84
+  theme(axis.title = element_text(size = 14))
85
+g_perday
65 86
 
66 87
 # Visuals for entropy in time series
67 88
 stats_entropy <- data.frame(date=drange)
68
-stats_entropy$entropy <- issues_i$entropy
89
+stats_entropy$entropy <- issues$entropy
69 90
 
70 91
 stats_entropy <- melt(stats_entropy, id="date")
71 92
 
72
-g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + 
93
+g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + 
73 94
   geom_line() + 
74
-  geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
75
-g1
95
+  geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) +
96
+  xlab("Zeitraum") + ylab("Entropie") + 
97
+  scale_colour_discrete(name  = "", labels = "Entropie") + 
98
+  theme(legend.title = element_text(size=14)) + 
99
+  theme(legend.text = element_text(size=12)) + 
100
+  theme(axis.title = element_text(size = 14))
101
+g_entrop
76 102
 
77 103
 
78 104
 
79 105
 # VAR ---------------------------------------------------------------------
80 106
 
81 107
 # test <- VAR(issues[,2:32], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
82
-# test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
83
-# test <- VAR(issues_s[,2:11], p=1, type="none")
84
-# VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
108
+# test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues[,2:3])
109
+# test <- VAR(issues[,2:11], p=1, type="none")
110
+# VAR(issues[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
85 111
 
86 112
 issues_ts <- as.ts(issues[,2:44])
113
+
114
+# Tests
87 115
 VARselect(issues_ts, lag.max = 5, type = "both")
88
-vIssues <- VAR(issues_ts, p=5, type="both")
116
+i <- 0
117
+i <- i + 1
118
+ur.df(issues_ts[, i], type ="none", lags=1)
119
+summary(issues[2:44])
89 120
 
90
-plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
91 121
 
92
-capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
122
+# VAR and IRF
123
+vIssues <- VAR(issues_ts, p=1, type="both")
124
+vIRF <- irf(vIssues, impulse = names(issues[2:23]), response = names(issues_i[2:22]))
125
+plot(vIRF)
93 126
 
94
-# Tests
95
-issues_ts <- as.ts(issues)
96
-VARselect(issues[2:44], lag.max = 8, type = "both")
97
-summary(ur.df(issues_ts[, 2], type ="none", lags=1))
127
+# capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
98 128
 
99 129
 # SOME TESTS --------------------------------------------------------------
100 130
 
@@ -116,16 +146,18 @@ rm(g1, r)
116 146
 
117 147
 
118 148
 # Show party percentage of twitter users
119
-acc_parties <- data.frame(party = c("cducsu", "spd", "linke", "gruene"))
120
-acc_parties$btw13 <- c(49.3, 30.6, 10.1, 10.0) # seats of party / 631 seats
149
+acc_parties <- data.frame(party = c("cducsu", "spd", "gruene", "linke"))
150
+acc_parties$btw13 <- c(49.3, 30.6, 10.0, 10.1) # seats of party / 631 seats
121 151
 acc_parties$twitter <- 0
122 152
 for(p in 1:nrow(acc_parties)) {
123 153
   acc_parties$twitter[p] <- round(nrow(acc_df[acc_df$party == as.character(acc_parties$party[p]), ]) / 280 * 100)
124 154
 }
125
-pie(acc_parties$btw13, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T,
126
-    main = "Seats of parties in the parliament")
127
-pie(acc_parties$twitter, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T,
128
-    main = "Percentage of parties' MdBs of all Twitter accounts")
155
+pie(acc_parties$btw13, col=c("black", "red", "green", "purple"), 
156
+    labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10,0%)", "Die LINKE (10,1%)"), 
157
+    clockwise = T)
158
+pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), 
159
+    labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), 
160
+    clockwise = T)
129 161
 
130 162
 rm(acc_parties, p)
131 163
 
@@ -152,6 +184,11 @@ ggplot(issues_melt,aes(x=date,y=value,colour=variable,group=variable)) + geom_sm
152 184
 
153 185
 # POSSIBLY USEFUL CODE ----------------------------------------------------
154 186
 
187
+# List all issues in one row
188
+for(i in 1:length(issueheads)) {
189
+  cat(issueheads[i], "\n")
190
+}
191
+
155 192
 # Limits of list
156 193
 length(issuelist)
157 194
 length(issuelist[[2]])

+ 0
- 5
twitter-api-credentials.txt View File

@@ -1,5 +0,0 @@
1
-# 1. line: consumer key, 2. consumer secret, 3. oauth token, 4. oauth secret
2
-c9Ob2fWNSONMC0mA2JlNaeRke
3
-cZ3Il2hmbLgK0Lc57mj5kUvymjVdsmZKYwKOGHR3NhCpvWgEOI
4
-1007025684-RFxCDFc4OPkt02bASmdci00TB4jgaPjfqxLRT58
5
-rvfv8MgexFKTqrPNSoGrdrZVNhV4fTJb2Bgz249nbvKNg

+ 5
- 0
twitter-api-credentials.txt.sample View File

@@ -0,0 +1,5 @@
1
+# 1. line: consumer key, 2. consumer secret, 3. oauth token, 4. oauth secret
2
+xxxxxxxxxxxxxxxxxxxx
3
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
4
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
5
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxx

+ 22
- 0
ur.df-testvalues.txt View File

@@ -0,0 +1,22 @@
1
+i1.macro  & -5,869 & s.nsa  & -7,3292 \\
2
+i2.civil  & -4,4172 & s.is  & -7,762 \\
3
+i3.health  & -9,5973 & s.ebola  & -6,1723 \\
4
+i4.agrar  & -8,5183 & s.edathy  & -7,0335 \\
5
+i5.labor  & -7,523 & s.ukraine  & -5,6195 \\
6
+i6.edu  & -6,4374 & s.hk  & -6,7599 \\
7
+i7.env  & -7,1426 & s.mh17  & -6,1481 \\
8
+i8.energy  & -7,3613 & s.gaza  & -5,3861 \\
9
+i10.trans  & -5,6718 & s.ferguson  & -8,8098 \\
10
+i12.law  & -5,9882 & s.boko  & -10,4431 \\
11
+i13.social  & -6,7765 & s.pegida  & -6,3831 \\
12
+i14.house  & -8,9577 & s.schumi  & -12,4947 \\
13
+i15.finance  & -5,9094 & s.mh370  & -7,8991 \\
14
+i16.defense  & -6,9535 & s.esc  & -11,6332 \\
15
+i17.science  & -5,9651 & s.wulff  & -12,4189 \\
16
+i18.trade  & -7,6121 & s.tebartz  & -12,6301 \\
17
+i19.ib  & -3,8057 & s.gurlitt  & -10,7665 \\
18
+i20.pubadmin  & -6,7123 & s.hoen  & -9,3721 \\
19
+i21.publand  & -9,9521 & s.pistorius  & -13,435 \\
20
+i24.stateadm  & -13,435 & s.philae  & -12,7024 \\
21
+i25.reuni  & -5,2011 & s.wm  & -8,7283 \\
22
+ &  & s.sotschi  & -8,2339 \\

Loading…
Cancel
Save