Browse Source

second run; improving behaviour at different places

mxmehl 5 years ago
parent
commit
42bfe4c773
5 changed files with 233 additions and 231 deletions
  1. 217
    217
      .Rhistory
  2. 9
    7
      issuecomp-2-analysis.R
  3. 7
    7
      issuecomp-3-calc.R
  4. BIN
      matched-ids-v2.zip
  5. BIN
      tweets_tagged.RData

+ 217
- 217
.Rhistory View File

@@ -1,220 +1,3 @@
1
-curchars <- curchars - 1
2
-} else {
3
-curhash <- FALSE
4
-}
5
-# Now expand the current tag by possible suffixes that may be plural forms
6
-# Only do if it isn't an acronym or specific hastag
7
-if(!curacro && !curhash) {
8
-for(e in 1:length(tagexpand)) {
9
-curtag[e] <- str_c(curtag[1], tagexpand[e])
10
-}
11
-}
12
-##############
13
-if(curchars <= 4 || curacro || curhash) {
14
-cat("distance 0\n")
15
-} else {
16
-cat("distance 1\n")
17
-}
18
-curtag <- "EURATOM"
19
-curchars <- nchar(curtag, type = "chars")
20
-# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
21
-curacro <- checkAcronym(string = curtag)
22
-# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
23
-if(str_detect(curtag, "^#")) {
24
-curacro <- FALSE
25
-curhash <- TRUE
26
-curtag <- str_replace(curtag, "#", "")
27
-curchars <- curchars - 1
28
-} else {
29
-curhash <- FALSE
30
-}
31
-# Now expand the current tag by possible suffixes that may be plural forms
32
-# Only do if it isn't an acronym or specific hastag
33
-if(!curacro && !curhash) {
34
-for(e in 1:length(tagexpand)) {
35
-curtag[e] <- str_c(curtag[1], tagexpand[e])
36
-}
37
-}
38
-##############
39
-if(curchars <= 4 || curacro || curhash) {
40
-cat("distance 0\n")
41
-} else {
42
-cat("distance 1\n")
43
-}
44
-curtag <- "Energiewende"
45
-curchars <- nchar(curtag, type = "chars")
46
-# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
47
-curacro <- checkAcronym(string = curtag)
48
-# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
49
-if(str_detect(curtag, "^#")) {
50
-curacro <- FALSE
51
-curhash <- TRUE
52
-curtag <- str_replace(curtag, "#", "")
53
-curchars <- curchars - 1
54
-} else {
55
-curhash <- FALSE
56
-}
57
-# Now expand the current tag by possible suffixes that may be plural forms
58
-# Only do if it isn't an acronym or specific hastag
59
-if(!curacro && !curhash) {
60
-for(e in 1:length(tagexpand)) {
61
-curtag[e] <- str_c(curtag[1], tagexpand[e])
62
-}
63
-}
64
-##############
65
-if(curchars <= 4 || curacro || curhash) {
66
-cat("distance 0\n")
67
-} else {
68
-cat("distance 1\n")
69
-}
70
-curtag <- "bnd"
71
-curchars <- nchar(curtag, type = "chars")
72
-# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
73
-curacro <- checkAcronym(string = curtag)
74
-# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
75
-if(str_detect(curtag, "^#")) {
76
-curacro <- FALSE
77
-curhash <- TRUE
78
-curtag <- str_replace(curtag, "#", "")
79
-curchars <- curchars - 1
80
-} else {
81
-curhash <- FALSE
82
-}
83
-# Now expand the current tag by possible suffixes that may be plural forms
84
-# Only do if it isn't an acronym or specific hastag
85
-if(!curacro && !curhash) {
86
-for(e in 1:length(tagexpand)) {
87
-curtag[e] <- str_c(curtag[1], tagexpand[e])
88
-}
89
-}
90
-##############
91
-if(curchars <= 4 || curacro || curhash) {
92
-cat("distance 0\n")
93
-} else {
94
-cat("distance 1\n")
95
-}
96
-curtag <- "#WM"
97
-curchars <- nchar(curtag, type = "chars")
98
-# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
99
-curacro <- checkAcronym(string = curtag)
100
-# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
101
-if(str_detect(curtag, "^#")) {
102
-curacro <- FALSE
103
-curhash <- TRUE
104
-curtag <- str_replace(curtag, "#", "")
105
-curchars <- curchars - 1
106
-} else {
107
-curhash <- FALSE
108
-}
109
-# Now expand the current tag by possible suffixes that may be plural forms
110
-# Only do if it isn't an acronym or specific hastag
111
-if(!curacro && !curhash) {
112
-for(e in 1:length(tagexpand)) {
113
-curtag[e] <- str_c(curtag[1], tagexpand[e])
114
-}
115
-}
116
-##############
117
-if(curchars <= 4 || curacro || curhash) {
118
-cat("distance 0\n")
119
-} else {
120
-cat("distance 1\n")
121
-}
122
-curtag
123
-curtag <- "Energiewende"
124
-curchars <- nchar(curtag, type = "chars")
125
-# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
126
-curacro <- checkAcronym(string = curtag)
127
-# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
128
-if(str_detect(curtag, "^#")) {
129
-curacro <- FALSE
130
-curhash <- TRUE
131
-curtag <- str_replace(curtag, "#", "")
132
-curchars <- curchars - 1
133
-} else {
134
-curhash <- FALSE
135
-}
136
-# Now expand the current tag by possible suffixes that may be plural forms
137
-# Only do if it isn't an acronym or specific hastag
138
-if(!curacro && !curhash) {
139
-for(e in 1:length(tagexpand)) {
140
-curtag[e] <- str_c(curtag[1], tagexpand[e])
141
-}
142
-}
143
-##############
144
-if(curchars <= 4 || curacro || curhash) {
145
-cat("distance 0\n")
146
-} else {
147
-cat("distance 1\n")
148
-}
149
-curtag <- "Energiewende"
150
-curchars <- nchar(curtag, type = "chars")
151
-# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
152
-curacro <- checkAcronym(string = curtag)
153
-# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
154
-if(str_detect(curtag, "^#")) {
155
-curacro <- FALSE
156
-curhash <- TRUE
157
-curtag <- str_replace(curtag, "#", "")
158
-curchars <- curchars - 1
159
-} else {
160
-curhash <- FALSE
161
-}
162
-# Now expand the current tag by possible suffixes that may be plural forms
163
-# Only do if it isn't an acronym or specific hastag
164
-if(!curacro && !curhash) {
165
-for(e in 1:length(tagexpand)) {
166
-curtag[e] <- str_c(curtag[1], tagexpand[e])
167
-}
168
-}
169
-# Set Levenshtein distance depending on char length, acronym and hashtag status
170
-if(curchars <= 4 || curacro || curhash) {
171
-curdistance <- 0
172
-} else {
173
-curdistance <- 1
174
-}
175
-curtag
176
-smartPatternMatch("Die Energiewende ist toll!", curtag, curdistance, curacro)
177
-smartPatternMatch("Die Energiewende ist toll!", curtag[1], curdistance, curacro)
178
-smartPatternMatch("Die Energiewende ist toll!", curtag[2], curdistance, curacro)
179
-smartPatternMatch("Die Energiewende ist toll!", sprintf("%s", curtag), curdistance, curacro)
180
-tags_found <- NULL
181
-# Match the tweet with each variation of tagexpand
182
-for(e in 1:length(curtag)) {
183
-tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
184
-}
185
-curtext <- "Die Energiewende ist toll!"
186
-tags_found <- NULL
187
-# Match the tweet with each variation of tagexpand
188
-for(e in 1:length(curtag)) {
189
-tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
190
-}
191
-tags_found
192
-curtag
193
-curtag <- "#WM2014"
194
-curtext <- "Ich freu mich auf wm2014 sehr"
195
-curchars <- nchar(curtag, type = "chars")
196
-# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
197
-curacro <- checkAcronym(string = curtag)
198
-# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
199
-if(str_detect(curtag, "^#")) {
200
-curacro <- FALSE
201
-curhash <- TRUE
202
-curtag <- str_replace(curtag, "#", "")
203
-curchars <- curchars - 1
204
-} else {
205
-curhash <- FALSE
206
-}
207
-# Now expand the current tag by possible suffixes that may be plural forms
208
-# Only do if it isn't an acronym or specific hastag
209
-if(!curacro && !curhash) {
210
-for(e in 1:length(tagexpand)) {
211
-curtag[e] <- str_c(curtag[1], tagexpand[e])
212
-}
213
-}
214
-# Set Levenshtein distance depending on char length, acronym and hashtag status
215
-if(curchars <= 4 || curacro || curhash) {
216
-curdistance <- 0
217
-} else {
218 1
 curdistance <- 1
219 2
 }
220 3
 # Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance)
@@ -510,3 +293,220 @@ for(i in 1:20) { cat(i,"\n")
510 293
 Sys.sleep(10)}
511 294
 list.dirs()
512 295
 list.files()
296
+rm(results)
297
+setwd("matched-ids/")
298
+results_files <- list.files()
299
+results_files
300
+results_files <- "all.csv"
301
+for(r in 1:length(results_files)) {
302
+if(r == 1) {
303
+results <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
304
+} else {
305
+results_temp <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
306
+results <- insertRow(results, results_temp)
307
+}
308
+}
309
+rm(r, results_temp, results_files)
310
+results <- results[!duplicated(results), ]
311
+names(results) <- c("date", "id_str", "issue", "tags")
312
+results <- results[order(results$id_str), ]
313
+row.names(results) <- NULL
314
+results[23381,]
315
+results[53381,]
316
+results[43253,]
317
+for(r in 53371:nrow(results)) {
318
+curdate <- as.character(results$date[r])
319
+curid <- as.character(results$id_str[r])
320
+curissue <- as.character(results$issue[r])
321
+curtag <- as.character(results$tags[r])
322
+cat("Sorting match", r, "of 53383 \n")
323
+# Update issue counter (date and issue)
324
+issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
325
+# Update tweet dataframe (id, issue and tags)
326
+oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
327
+tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
328
+oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
329
+tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
330
+}
331
+issues[issueheads] <- 0
332
+View(issues)
333
+for(r in 1:nrow(results)) {
334
+curdate <- as.character(results$date[r])
335
+curid <- as.character(results$id_str[r])
336
+curissue <- as.character(results$issue[r])
337
+curtag <- as.character(results$tags[r])
338
+cat("Sorting match", r, "of 53383 \n")
339
+# Update issue counter (date and issue)
340
+issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
341
+# Update tweet dataframe (id, issue and tags)
342
+oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
343
+tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
344
+oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
345
+tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
346
+}
347
+require(lubridate)
348
+require(XML)
349
+require(ggplot2)
350
+require(reshape2)
351
+require(stringr)
352
+require(foreach)
353
+require(doParallel)
354
+for(r in 1:nrow(results)) {
355
+curdate <- as.character(results$date[r])
356
+curid <- as.character(results$id_str[r])
357
+curissue <- as.character(results$issue[r])
358
+curtag <- as.character(results$tags[r])
359
+cat("Sorting match", r, "of 53383 \n")
360
+# Update issue counter (date and issue)
361
+issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
362
+# Update tweet dataframe (id, issue and tags)
363
+oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
364
+tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
365
+oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
366
+tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
367
+}
368
+results[119,]
369
+results[120,]
370
+load(file = "tweets_untagged.RData")
371
+setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
372
+results_files <- "matched-ids/all.csv"
373
+load(file = "tweets_untagged.RData")
374
+View(issues)
375
+issues <- data.frame(date = drange)
376
+issuelist <- readLines("issues.xml")
377
+issuelist <- str_replace_all(string = issuelist, pattern = ".*<!-- .+ -->", "")
378
+issuelist <- xmlToList(issuelist)
379
+issueheads <- names(issuelist)
380
+issues[issueheads] <- 0
381
+tweets$issue <- ""
382
+tweets$tags <- ""
383
+View(results)
384
+rm(r, results_temp, results_files)
385
+results <- results[!duplicated(results), ]
386
+names(results) <- c("date", "id_str", "issue", "tags")
387
+results <- results[order(results$id_str), ]
388
+row.names(results) <- NULL
389
+for(r in 1:nrow(results)) {
390
+curdate <- as.character(results$date[r])
391
+curid <- as.character(results$id_str[r])
392
+curissue <- as.character(results$issue[r])
393
+curtag <- as.character(results$tags[r])
394
+cat("Sorting match", r, "of 53383 \n")
395
+# Update issue counter (date and issue)
396
+issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
397
+# Update tweet dataframe (id, issue and tags)
398
+oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
399
+tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
400
+oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
401
+tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
402
+}
403
+curdate
404
+curissue
405
+issues[issues[, "date"] == curdate, curissue]
406
+issueheads
407
+issuelist <- readLines("issues-v2.xml")
408
+issues <- data.frame(date = drange)
409
+issuelist <- readLines("issues-v2.xml")
410
+issuelist <- str_replace_all(string = issuelist, pattern = ".*<!-- .+ -->", "")
411
+issuelist <- xmlToList(issuelist)
412
+issueheads <- names(issuelist)
413
+issues[issueheads] <- 0
414
+tweets$issue <- ""
415
+tweets$tags <- ""
416
+for(r in 1:nrow(results)) {
417
+curdate <- as.character(results$date[r])
418
+curid <- as.character(results$id_str[r])
419
+curissue <- as.character(results$issue[r])
420
+curtag <- as.character(results$tags[r])
421
+cat("Sorting match", r, "of 53383 \n")
422
+# Update issue counter (date and issue)
423
+issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
424
+# Update tweet dataframe (id, issue and tags)
425
+oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
426
+tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
427
+oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
428
+tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
429
+}
430
+results[33170,]
431
+results[33171,]
432
+results$date[33170]
433
+results$date[33170] <- "2014-08-21"
434
+for(r in 33170:nrow(results)) {
435
+curdate <- as.character(results$date[r])
436
+curid <- as.character(results$id_str[r])
437
+curissue <- as.character(results$issue[r])
438
+curtag <- as.character(results$tags[r])
439
+cat("Sorting match", r, "of 53383 \n")
440
+# Update issue counter (date and issue)
441
+issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
442
+# Update tweet dataframe (id, issue and tags)
443
+oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
444
+tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
445
+oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
446
+tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
447
+}
448
+save(tweets, file="tweets_tagged.RData")
449
+write.csv(tweets, file="tweets.csv")
450
+save(issues, file="issues.RData")
451
+require(stringr)
452
+require(reshape2)
453
+require(ggplot2)
454
+require(vars)
455
+drop_s <- which(str_detect(names(issues), "^s"))
456
+drop_i <- which(str_detect(names(issues), "^i"))
457
+issues_i <- issues[,-drop_s]
458
+issues_s <- issues[,-drop_i]
459
+issues_i$total <- rowSums(issues_i[2:ncol(issues_i)])
460
+issues_i$entropy <- 0
461
+for(r in 1:nrow(issues_i)) {
462
+curtotal <- as.numeric(issues_i$total[r])
463
+curp <- 0
464
+for(c in 2:ncol(issues_i)) {
465
+curcount <- as.numeric(issues_i[r,c])
466
+curp[c] <- curcount / curtotal
467
+}
468
+curp <- curp [2:length(curp)-2]
469
+curdrop <- which(curp==0)
470
+curp <- curp[-curdrop]
471
+issues_i$entropy[r] <- sum(-1 * curp * log(curp))
472
+}
473
+issues_s$total <- rowSums(issues_s[2:ncol(issues_s)])
474
+issues_s$entropy <- 0
475
+for(r in 1:nrow(issues_s)) {
476
+curtotal <- as.numeric(issues_s$total[r])
477
+curp <- 0
478
+for(c in 2:ncol(issues_s)) {
479
+curcount <- as.numeric(issues_s[r,c])
480
+curp[c] <- curcount / curtotal
481
+}
482
+curp <- curp [2:length(curp)-2]
483
+curdrop <- which(curp==0)
484
+curp <- curp[-curdrop]
485
+issues_s$entropy[r] <- sum(-1 * curp * log(curp))
486
+}
487
+stats_total <- data.frame(date=drange)
488
+stats_total$tpd <- 0
489
+stats_total$ipd <- issues_i$total
490
+stats_total$spd <- issues_s$total
491
+# Total number of tweets per day over time
492
+for(r in 1:length(drange)) {
493
+stats_total$tpd[r] <- length(tweets[tweets[, "created_at"] == drange[r], "id_str"])
494
+}
495
+g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) +
496
+geom_line()+
497
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
498
+g1
499
+stats_entropy <- data.frame(date=drange)
500
+stats_entropy$entropy <- issues_i$entropy
501
+stats_entropy <- melt(stats_entropy, id="date")
502
+g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) +
503
+geom_line() +
504
+geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
505
+g1
506
+test <- VAR(issues[,2:32], p=1, type="none")
507
+View(issues_i)
508
+View(issues_s)
509
+View(issues)
510
+test <- VAR(issues[,2:44], p=1, type="none")
511
+VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
512
+plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))

+ 9
- 7
issuecomp-2-analysis.R View File

@@ -21,7 +21,7 @@ drange <- date_start + days(0:drange)
21 21
 # Import issues and prepare everything
22 22
 # Will only be filled after the large categorisation loop
23 23
 issues <- data.frame(date = drange)
24
-issuelist <- readLines("issues.xml")
24
+issuelist <- readLines("issues-v2.xml")
25 25
 issuelist <- str_replace_all(string = issuelist, pattern = ".*<!-- .+ -->", "")
26 26
 issuelist <- xmlToList(issuelist)
27 27
 issueheads <- names(issuelist)
@@ -66,7 +66,8 @@ foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% {
66 66
     for(i in 1:length(issueheads)) {
67 67
       curissue <- issueheads[i]
68 68
       curtags <- as.character(issuelist[[curissue]])  
69
-      curfile <- str_c(id_folder,"/",curissue,".csv")
69
+#       curfile <- str_c(id_folder,"/",curissue,".csv")
70
+      curfile <- str_c(id_folder,"/",curdate,".csv")  # Possible solution to avoid buggy files when using many processes
70 71
       
71 72
       # Now test all tags of a single issue
72 73
       for(s in 1:length(curtags)) {
@@ -144,8 +145,9 @@ stopCluster(cl)
144 145
 # IMPORT RESULTS ----------------------------------------------------------
145 146
 
146 147
 # Import all files which have been generated at the categorisation run above.
147
-setwd("matched-ids/")
148
-results_files <- list.files()
148
+#setwd("matched-ids/")
149
+#results_files <- list.files()
150
+results_files <- "matched-ids/all.csv"
149 151
 for(r in 1:length(results_files)) {
150 152
   if(r == 1) {
151 153
     results <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
@@ -166,15 +168,15 @@ row.names(results) <- NULL
166 168
 # (which wasn't possible in the categorisation process because of parallelisation)
167 169
 
168 170
 # Reset issues counter
169
-# issues[issueheads] <- 0
171
+#issues[issueheads] <- 0
170 172
 
171
-for(r in 1:nrow(results)) {
173
+for(r in 33170:nrow(results)) {
172 174
   curdate <- as.character(results$date[r])
173 175
   curid <- as.character(results$id_str[r])
174 176
   curissue <- as.character(results$issue[r])
175 177
   curtag <- as.character(results$tags[r])
176 178
   
177
-  cat("Sorting match", r, "of 62827 \n")
179
+  cat("Sorting match", r, "of 53383 \n")
178 180
   
179 181
   # Update issue counter (date and issue)
180 182
   issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1

+ 7
- 7
issuecomp-3-calc.R View File

@@ -72,19 +72,19 @@ stats_entropy <- melt(stats_entropy, id="date")
72 72
 g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + 
73 73
   geom_line() + 
74 74
   geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1)
75
-# g1
75
+g1
76 76
 
77 77
 
78 78
 
79 79
 # VAR ---------------------------------------------------------------------
80 80
 
81
-test <- VAR(issues[,2:32], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
82
-test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
83
-test <- VAR(issues_s[,2:11], p=1, type="none")
84
-test <- VAR(issues[,2:32], p=1, type="none")
85
-VAR(issues_s[,2:11], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
81
+# test <- VAR(issues[,2:32], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
82
+# test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
83
+# test <- VAR(issues_s[,2:11], p=1, type="none")
84
+test <- VAR(issues[,2:44], p=1, type="none")
85
+# VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
86 86
 
87
-plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
87
+plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
88 88
 
89 89
 capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
90 90
 

BIN
matched-ids-v2.zip View File


BIN
tweets_tagged.RData View File


Loading…
Cancel
Save