Browse Source

v3 tagging done

mxmehl 5 years ago
parent
commit
e009060084
8 changed files with 273 additions and 243 deletions
  1. 221
    221
      .Rhistory
  2. 6
    5
      issuecomp-2-analysis.R
  3. 4
    3
      issuecomp-3-calc.R
  4. 0
    10
      issues-expand.xml
  5. 42
    4
      issues-v3.xml
  6. BIN
      issues.RData
  7. BIN
      matched-ids-v3.zip
  8. BIN
      tweets_tagged.RData

+ 221
- 221
.Rhistory View File

@@ -1,224 +1,3 @@
1
-c_errcode <- as.character(c_errors$code[r])
2
-c_errissue <- as.character(c_errors$issue[r])
3
-c_errtags <- as.character(c_errors$tags[r])
4
-c_errtext <- as.character(c_errors$text[r])
5
-c_errid <- as.character(c_errors$str_id[r])
6
-cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
7
-source("issuecomp-codingsample-function2.R")
8
-}
9
-c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
10
-View(c_tmp)
11
-View(c_errors)
12
-View(c_tmp)
13
-names(c_tmp) <- c("str_id", "all", "wrong", "tags", "text")
14
-View(c_tmp)
15
-c_tmp[, c("wrong", "tagged", "all", "text")]
16
-View(c_tmp)
17
-names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
18
-c_tmp[, c("wrong", "tagged", "all", "text")]
19
-c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]
20
-View(c_error1)
21
-c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
22
-View(c_tmp)
23
-c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
24
-names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
25
-c_error1 <- c_tmp[, c("missing", "tagged", "all", "text")]
26
-c_error2 <- c_tmp[, c("missing", "tagged", "all", "text")]
27
-View(c_error2)
28
-c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]
29
-View(c_error2)
30
-View(c_error1)
31
-View(c_error2)
32
-c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character")
33
-View(c_tmp)
34
-names(c_tmp) <- c("str_id", "status", "issue", "tags", "text")
35
-View(c_tmp)
36
-c_currect <- c_tmp
37
-c_correct <- c_tmp
38
-rm(c_currect)
39
-View(c_correct)
40
-source("issuecomp-codingsample-function.R")
41
-rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)
42
-c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
43
-c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
44
-c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
45
-c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
46
-c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character", quote = "")
47
-View(c_errors)
48
-c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
49
-test <- "Zitat "total dämlich!""
50
-tweets$id_str == "523512815425175552"
51
-tweets[tweets$id_str == "523512815425175552"]
52
-tweets[tweets$id_str == "523512815425175552", ]
53
-tweets[tweets$id_str == "523512815425175552", "text"]
54
-test <- tweets[tweets$id_str == "523512815425175552", "text"]
55
-test
56
-test <- c_tweets[ctweets$id_str == "523512815425175552", "text"]
57
-test <- c_tweets[c_tweets$id_str == "523512815425175552", "text"]
58
-test
59
-str_replace(test, "\\"", ")
60
-str_replace(test, "\\"", "")
61
-str_replace(test, "\"", "")
62
-str_detect(test, "\"")
63
-test <- as.character(c_tweets[c_tweets$id_str == "523512815425175552", "text"])
64
-test
65
-c_tweets <- read.csv("tweets.csv", colClasses="character")
66
-for(r in 1:nrow(c_tweets)) {
67
-curtext <- as.character(c_tweets$text[r])
68
-if(str_detect(curtext, "\"") {
69
-c_tweets$text[r] <- str_replace(curtext, "\"", "")
70
-}
71
-}
72
-for(r in 1:nrow(c_tweets)) {
73
-curtext <- as.character(c_tweets$text[r])
74
-if(str_detect(curtext, "\"") {
75
-c_tweets$text[r] <- str_replace(curtext, "\"", "")
76
-} else {}
77
-}
78
-for(r in 1:nrow(c_tweets)) {
79
-curtext <- as.character(c_tweets$text[r])
80
-if(str_detect(curtext, "\"") {
81
-c_tweets$text[r] <- str_replace(curtext, "\"", "")
82
-} else {
83
-}
84
-}
85
-for(r in 1:nrow(c_tweets)) {
86
-curtext <- as.character(c_tweets$text[r])
87
-if(str_detect(curtext, "\"")) {
88
-c_tweets$text[r] <- str_replace(curtext, "\"", "")
89
-}
90
-}
91
-test <- as.character(c_tweets[c_tweets$id_str == "523512815425175552", "text"])
92
-test
93
-View(c_tweets)
94
-c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
95
-View(c_errors)
96
-names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
97
-View(c_errors)
98
-for(r in 1:nrow(c_errors)) {
99
-c_errcode <- as.character(c_errors$code[r])
100
-c_errissue <- as.character(c_errors$issue[r])
101
-c_errtags <- as.character(c_errors$tags[r])
102
-c_errtext <- as.character(c_errors$text[r])
103
-c_errid <- as.character(c_errors$str_id[r])
104
-cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
105
-source("issuecomp-codingsample-function2.R")
106
-}
107
-issueheads
108
-for(r in 1:nrow(c_errors)) {
109
-c_errcode <- as.character(c_errors$code[r])
110
-c_errissue <- as.character(c_errors$issue[r])
111
-c_errtags <- as.character(c_errors$tags[r])
112
-c_errtext <- as.character(c_errors$text[r])
113
-c_errid <- as.character(c_errors$str_id[r])
114
-cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
115
-source("issuecomp-codingsample-function2.R")
116
-}
117
-# All tweets with WRONG ISSUES
118
-c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
119
-names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
120
-c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]
121
-# All tweets with MISSING ISSUES
122
-c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
123
-names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
124
-c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]
125
-# All CORRECT tweets
126
-c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character")
127
-names(c_tmp) <- c("str_id", "status", "issue", "tags", "text")
128
-c_correct <- c_tmp
129
-View(c_error1)
130
-View(c_error2)
131
-View(c_error1)
132
-View(c_correct)
133
-test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
134
-plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
135
-test <- VAR(issues[,2:32], p=1, type="none")
136
-plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
137
-VARselect(issues[,2:32], lag.max=8, type="none")
138
-VARselect(issues[,2:32], lag.max=8, type="both")
139
-VARselect(issues[,2:32], lag.max=30, type="both")
140
-VARselect(issues[,2:32], lag.max=15, type="both")
141
-for(i in 1:20) { cat(i,"\n") Sys.sleep(10)}
142
-for(i in 1:20) { cat(i,"\n")Sys.sleep(10)}
143
-for(i in 1:20) { cat(i,"\n")Sys.sleep(10)}
144
-for(i in 1:20) { cat(i,"\n")
145
-Sys.sleep(10)}
146
-list.dirs()
147
-list.files()
148
-rm(results)
149
-setwd("matched-ids/")
150
-results_files <- list.files()
151
-results_files
152
-results_files <- "all.csv"
153
-for(r in 1:length(results_files)) {
154
-if(r == 1) {
155
-results <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
156
-} else {
157
-results_temp <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
158
-results <- insertRow(results, results_temp)
159
-}
160
-}
161
-rm(r, results_temp, results_files)
162
-results <- results[!duplicated(results), ]
163
-names(results) <- c("date", "id_str", "issue", "tags")
164
-results <- results[order(results$id_str), ]
165
-row.names(results) <- NULL
166
-results[23381,]
167
-results[53381,]
168
-results[43253,]
169
-for(r in 53371:nrow(results)) {
170
-curdate <- as.character(results$date[r])
171
-curid <- as.character(results$id_str[r])
172
-curissue <- as.character(results$issue[r])
173
-curtag <- as.character(results$tags[r])
174
-cat("Sorting match", r, "of 53383 \n")
175
-# Update issue counter (date and issue)
176
-issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
177
-# Update tweet dataframe (id, issue and tags)
178
-oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
179
-tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
180
-oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
181
-tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
182
-}
183
-issues[issueheads] <- 0
184
-View(issues)
185
-for(r in 1:nrow(results)) {
186
-curdate <- as.character(results$date[r])
187
-curid <- as.character(results$id_str[r])
188
-curissue <- as.character(results$issue[r])
189
-curtag <- as.character(results$tags[r])
190
-cat("Sorting match", r, "of 53383 \n")
191
-# Update issue counter (date and issue)
192
-issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
193
-# Update tweet dataframe (id, issue and tags)
194
-oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
195
-tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
196
-oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
197
-tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
198
-}
199
-require(lubridate)
200
-require(XML)
201
-require(ggplot2)
202
-require(reshape2)
203
-require(stringr)
204
-require(foreach)
205
-require(doParallel)
206
-for(r in 1:nrow(results)) {
207
-curdate <- as.character(results$date[r])
208
-curid <- as.character(results$id_str[r])
209
-curissue <- as.character(results$issue[r])
210
-curtag <- as.character(results$tags[r])
211
-cat("Sorting match", r, "of 53383 \n")
212
-# Update issue counter (date and issue)
213
-issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
214
-# Update tweet dataframe (id, issue and tags)
215
-oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
216
-tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
217
-oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
218
-tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
219
-}
220
-results[119,]
221
-results[120,]
222 1
 load(file = "tweets_untagged.RData")
223 2
 setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
224 3
 results_files <- "matched-ids/all.csv"
@@ -510,3 +289,224 @@ rm(delrow, r, acc)
510 289
 acc_df$row.names <- NULL
511 290
 row.names(acc_df) <- NULL
512 291
 View(acc_df)
292
+View(c_tweets)
293
+issueheads
294
+length(issueheads)
295
+issuelist
296
+length(issuelist)
297
+length(issuelist[*])
298
+length(issuelist[[*]])
299
+length(issuelist[1:43])
300
+length(issuelist[1)
301
+length(issuelist[1])
302
+length(issuelist[2])
303
+length(issuelist[[1]])
304
+length(issuelist[[2]])
305
+length(issuelist[[70]])
306
+length(issuelist[[43]])
307
+length(issuelist[[44]])
308
+length(issuelist[[1:43]])
309
+length(issuelist[[1-43]])
310
+length(issuelist[[2]])
311
+test <- 0
312
+num <- 0
313
+for(i in 1:length(issuelist)) {
314
+j <- length(issuelist[[i]])
315
+num <- num + j
316
+rm(j)
317
+}
318
+num
319
+drop_s <- which(str_detect(names(issues), "^s"))
320
+drop_i <- which(str_detect(names(issues), "^i"))
321
+issues_i <- issues[,-drop_s]
322
+issues_s <- issues[,-drop_i]
323
+require(stringr)
324
+drop_s <- which(str_detect(names(issues), "^s"))
325
+drop_i <- which(str_detect(names(issues), "^i"))
326
+issues_i <- issues[,-drop_s]
327
+issues_s <- issues[,-drop_i]
328
+issues_i$total <- rowSums(issues_i[2:ncol(issues_i)])
329
+issues_i$entropy <- 0
330
+for(r in 1:nrow(issues_i)) {
331
+curtotal <- as.numeric(issues_i$total[r])
332
+curp <- 0
333
+for(c in 2:ncol(issues_i)) {
334
+curcount <- as.numeric(issues_i[r,c])
335
+curp[c] <- curcount / curtotal
336
+}
337
+curp <- curp [2:length(curp)-2]
338
+curdrop <- which(curp==0)
339
+curp <- curp[-curdrop]
340
+issues_i$entropy[r] <- sum(-1 * curp * log(curp))
341
+}
342
+issues_s$total <- rowSums(issues_s[2:ncol(issues_s)])
343
+issues_s$entropy <- 0
344
+for(r in 1:nrow(issues_s)) {
345
+curtotal <- as.numeric(issues_s$total[r])
346
+curp <- 0
347
+for(c in 2:ncol(issues_s)) {
348
+curcount <- as.numeric(issues_s[r,c])
349
+curp[c] <- curcount / curtotal
350
+}
351
+curp <- curp [2:length(curp)-2]
352
+curdrop <- which(curp==0)
353
+curp <- curp[-curdrop]
354
+issues_s$entropy[r] <- sum(-1 * curp * log(curp))
355
+}
356
+stats_total <- data.frame(date=drange)
357
+stats_total$tpd <- 0
358
+stats_total$ipd <- issues_i$total
359
+stats_total$spd <- issues_s$total
360
+# Total number of tweets per day over time
361
+for(r in 1:length(drange)) {
362
+stats_total$tpd[r] <- length(tweets[tweets[, "created_at"] == drange[r], "id_str"])
363
+}
364
+stats_melt <- melt(stats_total, id="date")
365
+g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
366
+geom_line()+
367
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
368
+g1
369
+require(ggplot2)
370
+stats_melt <- melt(stats_total, id="date")
371
+g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
372
+geom_line()+
373
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
374
+g1
375
+g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
376
+geom_line()+
377
+geom_smooth(size=1,formula = y ~ x, method="lm", se=FALSE, color=1)
378
+g1
379
+g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
380
+geom_line()+
381
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
382
+g1
383
+# Visuals for entropy in time series
384
+stats_entropy <- data.frame(date=drange)
385
+stats_entropy$entropy <- issues_i$entropy
386
+stats_entropy <- melt(stats_entropy, id="date")
387
+require(reshape2)
388
+stats_melt <- melt(stats_total, id="date")
389
+g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
390
+geom_line()+
391
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
392
+g1
393
+stats_entropy <- data.frame(date=drange)
394
+stats_entropy$entropy <- issues_i$entropy
395
+stats_entropy <- melt(stats_entropy, id="date")
396
+g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
397
+geom_line() +
398
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
399
+g1
400
+g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
401
+geom_line() +
402
+geom_smooth(size=1,formula = y ~ x, method="lm", se=FALSE, color=1)
403
+g1
404
+vIssues <- VAR(issues_ts[,2:44], p=1, type="both")
405
+require(vars)
406
+VARselect(issues_ts, lag.max = 8, type = "both")
407
+vIssues <- VAR(issues_ts[,2:44], p=1, type="both")
408
+VARselect(issues_ts, lag.max = 16, type = "both")
409
+VARselect(issues_ts, lag.max = 4, type = "both")
410
+VARselect(issues_ts, lag.max = 5, type = "both")
411
+VARselect(issues_ts, lag.max = 6, type = "both")
412
+VARselect(issues_ts, lag.max = 5, type = "both")
413
+names(issues_ts)
414
+issues_ts
415
+issues_ts[2:44]
416
+issues_ts <- as.ts(issues[,2:44])
417
+issues_ts[1:1]
418
+issues_ts[,1]
419
+issues_ts[1,1]
420
+issues_ts2,1]
421
+issues_ts[2,1]
422
+issues_ts <- as.ts(issues[,2:44])
423
+VARselect(issues_ts, lag.max = 5, type = "both")
424
+VARselect(issues_ts, lag.max = 8, type = "both")
425
+VARselect(issues_ts, lag.max = 7, type = "both")
426
+VARselect(issues_ts, lag.max = 5, type = "both")
427
+vIssues <- VAR(issues_ts[,2:44], p=5, type="both")
428
+vIssues <- VAR(issues_ts, p=5, type="both")
429
+plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
430
+require(stringr)
431
+require(XML)
432
+c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
433
+names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
434
+for(r in 1:nrow(c_errors)) {
435
+c_errcode <- as.character(c_errors$code[r])
436
+c_errissue <- as.character(c_errors$issue[r])
437
+c_errtags <- as.character(c_errors$tags[r])
438
+c_errtext <- as.character(c_errors$text[r])
439
+c_errid <- as.character(c_errors$str_id[r])
440
+cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
441
+source("issuecomp-codingsample-function2.R")
442
+}
443
+for(r in 1:nrow(c_errors)) {
444
+c_errcode <- as.character(c_errors$code[r])
445
+c_errissue <- as.character(c_errors$issue[r])
446
+c_errtags <- as.character(c_errors$tags[r])
447
+c_errtext <- as.character(c_errors$text[r])
448
+c_errid <- as.character(c_errors$str_id[r])
449
+cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
450
+source("issuecomp-codingsample-function2.R")
451
+}
452
+issueheads
453
+for(i in 1:length(issueheads)) {paste(issueheads[i])}
454
+for(i in 1:length(issueheads)) {cat(issueheads[i], "\n")}
455
+for(r in 1:nrow(c_errors)) {
456
+c_errcode <- as.character(c_errors$code[r])
457
+c_errissue <- as.character(c_errors$issue[r])
458
+c_errtags <- as.character(c_errors$tags[r])
459
+c_errtext <- as.character(c_errors$text[r])
460
+c_errid <- as.character(c_errors$str_id[r])
461
+cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
462
+source("issuecomp-codingsample-function2.R")
463
+}
464
+for(r in 1:nrow(c_errors)) {
465
+c_errcode <- as.character(c_errors$code[r])
466
+c_errissue <- as.character(c_errors$issue[r])
467
+c_errtags <- as.character(c_errors$tags[r])
468
+c_errtext <- as.character(c_errors$text[r])
469
+c_errid <- as.character(c_errors$str_id[r])
470
+cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
471
+source("issuecomp-codingsample-function2.R")
472
+}
473
+c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
474
+names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
475
+for(r in 1:nrow(c_errors)) {
476
+c_errcode <- as.character(c_errors$code[r])
477
+c_errissue <- as.character(c_errors$issue[r])
478
+c_errtags <- as.character(c_errors$tags[r])
479
+c_errtext <- as.character(c_errors$text[r])
480
+c_errid <- as.character(c_errors$str_id[r])
481
+cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
482
+source("issuecomp-codingsample-function2.R")
483
+}
484
+c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
485
+names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
486
+c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]
487
+c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
488
+names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
489
+c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]
490
+View(c_error2)
491
+summary(ur.df(issues_ts[, 2], type ="none", lags=1))
492
+require(vars)
493
+summary(ur.df(issues_ts[, 2], type ="none", lags=1))
494
+stability(vIssues)
495
+stability(vIssues[2:])
496
+stability(vIssues[2:44])
497
+plot(stability(vIssues))
498
+class(vIssues)
499
+summary(vIssues)
500
+plot(stability(vIssues[2]))
501
+plot(stability(vIssues), nc=2)
502
+plot(stability(vIssues), h=0.15)
503
+stability(vIssues)
504
+efp(formula = formula, data = data, type = type, h = h, dynamic = dynamic,
505
+rescale = rescale)
506
+plot(stability(vIssues), h=0.15)
507
+plot(stability(vIssues, h=0.15))
508
+plot(stability(vIssues, h=0.15, rescale = TRUE))
509
+plot(stability(vIssues, h=0.15, rescale = TRUE), nc=2)
510
+par("mar")
511
+par(mar=c(1,1,1,1))
512
+plot(stability(vIssues, h=0.15, rescale = TRUE), nc=2)

+ 6
- 5
issuecomp-2-analysis.R View File

@@ -147,9 +147,9 @@ stopCluster(cl)
147 147
 # IMPORT RESULTS ----------------------------------------------------------
148 148
 
149 149
 # Import all files which have been generated at the categorisation run above.
150
-#setwd("matched-ids/")
151
-#results_files <- list.files()
152
-results_files <- "matched-ids/all.csv"
150
+
151
+results_files <- list.files("matched-ids/", full.names = T)
152
+
153 153
 for(r in 1:length(results_files)) {
154 154
   if(r == 1) {
155 155
     results <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
@@ -172,13 +172,14 @@ row.names(results) <- NULL
172 172
 # Reset issues counter
173 173
 #issues[issueheads] <- 0
174 174
 
175
-for(r in 33170:nrow(results)) {
175
+nrow_results <- nrow(results)
176
+for(r in 1:nrow_results) {
176 177
   curdate <- as.character(results$date[r])
177 178
   curid <- as.character(results$id_str[r])
178 179
   curissue <- as.character(results$issue[r])
179 180
   curtag <- as.character(results$tags[r])
180 181
   
181
-  cat("Sorting match", r, "of 53383 \n")
182
+  cat("Sorting match", r, "of", nrow_results, "\n")
182 183
   
183 184
   # Update issue counter (date and issue)
184 185
   issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1

+ 4
- 3
issuecomp-3-calc.R View File

@@ -83,10 +83,11 @@ g1
83 83
 # test <- VAR(issues_s[,2:11], p=1, type="none")
84 84
 # VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
85 85
 
86
-issues_ts <- as.ts(issues)
87
-vIssues <- VAR(issues_ts[,2:44], p=1, type="both")
86
+issues_ts <- as.ts(issues[,2:44])
87
+VARselect(issues_ts, lag.max = 5, type = "both")
88
+vIssues <- VAR(issues_ts, p=5, type="both")
88 89
 
89
-plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
90
+plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
90 91
 
91 92
 capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
92 93
 

+ 0
- 10
issues-expand.xml View File

@@ -1,11 +1 @@
1
-<s.ukraine>
2
-  <tag>#Janukowitsch</tag>
3
-</s.ukraine>
4 1
 
5
-<i2.civil>
6
-  <tag>Foltermethode</tag>
7
-</i2.civil>
8
-
9
-<i19.ib>
10
-  --<tag>Afghanistan</tag>
11
-</i19.ib>

+ 42
- 4
issues-v3.xml View File

@@ -46,6 +46,17 @@
46 46
 </i1.macro>
47 47
 
48 48
 <i2.civil>
49
+  <tag>Foltermethode</tag>
50
+  <tag>Immigrant</tag>
51
+  <tag>Menschengerichtshof</tag>
52
+  <tag>Einwander</tag>
53
+  <tag>Faschismus</tag>
54
+  <tag>Faschist</tag>
55
+  <tag>Antisemitismus</tag>
56
+  <tag>Antisemitist</tag>
57
+  <tag>Nationalismus</tag>
58
+  <tag>Nationalist</tag>
59
+  <tag>Flüchtlingspolitik</tag>
49 60
   <tag>Migration</tag>
50 61
   <tag>Migrant</tag>
51 62
   <tag>Flüchtlingsstrom</tag>
@@ -67,6 +78,7 @@
67 78
   <tag>Rassismus</tag>
68 79
   <tag>rassistisch</tag>
69 80
   <tag>Rechtsextremismus</tag>
81
+  <tag>Nazi</tag>
70 82
   <tag>Nazis</tag>
71 83
 
72 84
   <tag>Wahlrecht</tag>
@@ -397,6 +409,11 @@
397 409
 </i6.edu>
398 410
 
399 411
 <i7.env>
412
+  <tag>Energiestrategie</tag>
413
+  <tag>#EEG</tag>
414
+  <tag>bundnaturschutz</tag>
415
+  <tag>BUND</tag>
416
+  <tag>Klimakonferenz</tag>
400 417
   <tag>Energiewende</tag>
401 418
   <tag>Klimaschutz</tag>
402 419
   <tag>Klimagipfel</tag>
@@ -607,6 +624,8 @@
607 624
 </i8.energy>
608 625
 
609 626
 <i10.trans>
627
+  <tag>Deutsche Bahn</tag>
628
+  <tag>#GDL</tag>
610 629
   <tag>LKWs</tag> 
611 630
   <tag>PKWs</tag>
612 631
   
@@ -741,6 +760,12 @@
741 760
 </i10.trans>
742 761
 
743 762
 <i12.law>
763
+  <tag>Euthanasie</tag>
764
+  <tag>Familienarbeitszeit</tag>
765
+  <tag>Waffenarsenal</tag>
766
+  <tag>NSU</tag>
767
+  <tag>Crystal Meth</tag>
768
+  <tag>Ecstacy</tag>
744 769
   <tag>Vorratsdatenspeicherung</tag>
745 770
   <tag>VDS</tag>
746 771
   <tag>Cybercrime</tag>
@@ -904,15 +929,14 @@
904 929
   <tag>Erdbeben</tag> 
905 930
   <tag>Frühwarnsystem</tag> 
906 931
   <!-- 1227 -->
907
-  <tag>Terrorismus</tag> 
908 932
   <tag>Personalausweis</tag> 
909 933
   <tag>Ausweis</tag> 
910
-  <tag>Terrorist</tag> 
911 934
   <!-- 1299 -->
912 935
   <tag>Opferentschädigung</tag> 
913 936
 </i12.law>
914 937
 
915 938
 <i13.social>
939
+  <tag>Pflegezeit</tag>
916 940
   <!-- 1300 --> 
917 941
   <tag>Sozialpolitik</tag>
918 942
   <tag>Pflegeversicherung</tag>
@@ -982,6 +1006,8 @@
982 1006
 </i13.social>
983 1007
 
984 1008
 <i14.house>
1009
+  <tag>preiswert Wohnen</tag>
1010
+  <tag>preiswertes Wohnen</tag>
985 1011
   <!-- 1400 --> 
986 1012
   <tag>Wohnungswesen</tag>
987 1013
   <tag>Raumungordnung</tag>
@@ -1157,6 +1183,7 @@
1157 1183
   <tag>Reiseversicherung</tag>
1158 1184
   <!-- 1525 --> 
1159 1185
   <tag>Verbaucherschutz</tag>
1186
+  <tag>Verbraucherinteressen</tag>
1160 1187
   <tag>Verbaucherbetrug</tag>
1161 1188
   <tag>Werbebetrug</tag>
1162 1189
   <tag>Verbraucherschutzministerium</tag>
@@ -1189,6 +1216,8 @@
1189 1216
 
1190 1217
 <i16.defense>
1191 1218
   <tag>Auschwitz</tag>
1219
+  <tag>2 Weltkrieg</tag>
1220
+  <tag>zweiter Weltkrieg</tag>
1192 1221
 
1193 1222
   <tag>Rüstungsbudget</tag>
1194 1223
   <tag>Rüstungsausgaben</tag>
@@ -1352,6 +1381,7 @@
1352 1381
 
1353 1382
 <i17.science>
1354 1383
   <tag>Sicherheitslücke</tag>
1384
+  <tag>Internetsteuer</tag>
1355 1385
 
1356 1386
   <!-- 1700 --> 
1357 1387
   <tag>Weltraumforschung</tag>
@@ -1494,9 +1524,12 @@
1494 1524
 </i18.trade>
1495 1525
 
1496 1526
 <i19.ib>
1527
+  <tag>#EU</tag>
1528
+  <tag>Ungarn</tag>
1497 1529
   <tag>Außenpolitik</tag>
1498 1530
   <tag>außenpolitisch</tag>
1499 1531
   <tag>menschenrechtsbetont</tag>
1532
+  <tag>Türkei</tag>
1500 1533
   
1501 1534
   <!-- 1900 --> 
1502 1535
   <tag>internationale Beziehungen</tag>
@@ -1619,7 +1652,6 @@
1619 1652
   <tag>Japan</tag>
1620 1653
   <tag>Südostasien</tag>
1621 1654
   <tag>Indien</tag>
1622
-  <tag>Afghanistan</tag>
1623 1655
   <tag>China</tag>
1624 1656
   <tag>chinesisch</tag>
1625 1657
   <tag>Taiwan</tag>
@@ -1644,7 +1676,6 @@
1644 1676
   <tag>religiöse Verfolgung</tag>
1645 1677
   <tag>Verbrechen gegen die Menschheit</tag>
1646 1678
   <tag>Verbrechen gegen die Menschlichkeit</tag>
1647
-  <tag>Folter</tag>
1648 1679
   <tag>Kindersoldat</tag>
1649 1680
   <tag>Menschenrechtskonvention</tag>
1650 1681
   <!-- 1926 --> 
@@ -1824,6 +1855,8 @@
1824 1855
 </s.nsa>
1825 1856
 
1826 1857
 <s.is>
1858
+  <tag>Islamischer Staat</tag>
1859
+  <tag>Islamischen Staates</tag>
1827 1860
   <tag>ISIS</tag>
1828 1861
   <tag>IS</tag>
1829 1862
   <tag>al Baghdadi</tag>
@@ -1858,6 +1891,8 @@
1858 1891
   <tag>Donetsk</tag>
1859 1892
   <tag>Donezk</tag>
1860 1893
   <tag>Euromaidan</tag>
1894
+  <tag>#Maidan</tag>
1895
+  <tag>#Janukowitsch</tag>
1861 1896
 </s.ukraine>
1862 1897
 
1863 1898
 <s.hk>
@@ -1891,6 +1926,7 @@
1891 1926
 </s.ferguson>
1892 1927
 
1893 1928
 <s.boko>
1929
+  <tag>200 entführte Mädchen</tag>
1894 1930
   <tag>Boko Haram</tag>
1895 1931
 </s.boko>
1896 1932
 
@@ -1970,6 +2006,8 @@
1970 2006
   <tag>Brasilien</tag>
1971 2007
   <tag>#WorldCup</tag>
1972 2008
   <tag>#WM2014</tag>
2009
+  <tag>#GERALG</tag>
2010
+  <tag>#ALGGER</tag>
1973 2011
 </s.wm>
1974 2012
 
1975 2013
 <s.sotschi>

BIN
issues.RData View File


BIN
matched-ids-v3.zip View File


BIN
tweets_tagged.RData View File


Loading…
Cancel
Save