|
|
|
@ -1,151 +1,3 @@
|
|
|
|
|
curdistance <- 1
|
|
|
|
|
}
|
|
|
|
|
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance)
|
|
|
|
|
tags_found <- NULL
|
|
|
|
|
# Match the tweet with each variation of tagexpand
|
|
|
|
|
for(e in 1:length(curtag)) {
|
|
|
|
|
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
|
|
|
|
|
}
|
|
|
|
|
tags_found <- any(tags_found)
|
|
|
|
|
tags_found
|
|
|
|
|
curtag
|
|
|
|
|
curtext
|
|
|
|
|
curdistance
|
|
|
|
|
test <- VAR(issues[,2:32], p=3, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
|
|
|
|
|
test
|
|
|
|
|
test <- VAR(issues[,2:32], p=1, type="none")
|
|
|
|
|
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
|
|
|
|
|
View(issues)
|
|
|
|
|
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2])
|
|
|
|
|
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
|
|
|
|
|
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
|
|
|
|
|
irf(test)
|
|
|
|
|
test <- VAR(issues_s[,2:11], p=1, type="none")
|
|
|
|
|
irf(test)
|
|
|
|
|
plot(irf(test))
|
|
|
|
|
test <- VAR(issues[,2:32], p=1, type="none")
|
|
|
|
|
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
|
|
|
|
|
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22]), n.ahead = 5))
|
|
|
|
|
require(stringr)
|
|
|
|
|
require(XML)
|
|
|
|
|
readYN <- function(question) {
|
|
|
|
|
n <- readline(prompt=question)
|
|
|
|
|
n <- as.character(n)
|
|
|
|
|
return(n)
|
|
|
|
|
}
|
|
|
|
|
checkIssue <- function(string, issuelist) {
|
|
|
|
|
status <- any(str_detect(string, issuelist))
|
|
|
|
|
return(status)
|
|
|
|
|
}
|
|
|
|
|
checkAllIssues <- function(string, issuelist) {
|
|
|
|
|
status <- NULL
|
|
|
|
|
for(i in 1:length(string)) {
|
|
|
|
|
if(checkIssue(string[i], issuelist)) {
|
|
|
|
|
status[i] <- TRUE
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
cat("Issue",string[i],"does not exist. Please try again.\n")
|
|
|
|
|
status[i] <- FALSE
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return(status)
|
|
|
|
|
}
|
|
|
|
|
View(tweets)
|
|
|
|
|
write.csv(tweets, file="tweets.csv")
|
|
|
|
|
c_tweets <- read.csv("tweets.csv", colClasses="character")
|
|
|
|
|
View(c_tweets)
|
|
|
|
|
c_tweets$X <- NULL
|
|
|
|
|
c_issues <- data.frame(date = drange)
|
|
|
|
|
c_issuelist <- xmlToList("issues.xml")
|
|
|
|
|
c_issueheads <- names(issuelist)
|
|
|
|
|
c_issues[issueheads] <- 0
|
|
|
|
|
source("issuecomp-codingsample-function.R")
|
|
|
|
|
rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)
|
|
|
|
|
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
|
|
|
|
View(c_errors)
|
|
|
|
|
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
|
|
|
|
|
View(c_errors)
|
|
|
|
|
for(r in 1:nrow(c_errors)) {
|
|
|
|
|
c_errcode <- as.character(c_errors$code[r])
|
|
|
|
|
c_errissue <- as.character(c_errors$issue[r])
|
|
|
|
|
c_errtags <- as.character(c_errors$tags[r])
|
|
|
|
|
c_errtext <- as.character(c_errors$text[r])
|
|
|
|
|
c_errid <- as.character(c_errors$str_id[r])
|
|
|
|
|
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
|
|
|
|
|
source("issuecomp-codingsample-function2.R")
|
|
|
|
|
}
|
|
|
|
|
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
|
|
|
|
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
|
|
|
|
|
for(r in 1:nrow(c_errors)) {
|
|
|
|
|
c_errcode <- as.character(c_errors$code[r])
|
|
|
|
|
c_errissue <- as.character(c_errors$issue[r])
|
|
|
|
|
c_errtags <- as.character(c_errors$tags[r])
|
|
|
|
|
c_errtext <- as.character(c_errors$text[r])
|
|
|
|
|
c_errid <- as.character(c_errors$str_id[r])
|
|
|
|
|
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
|
|
|
|
|
source("issuecomp-codingsample-function2.R")
|
|
|
|
|
}
|
|
|
|
|
for(r in 1:nrow(c_errors)) {
|
|
|
|
|
c_errcode <- as.character(c_errors$code[r])
|
|
|
|
|
c_errissue <- as.character(c_errors$issue[r])
|
|
|
|
|
c_errtags <- as.character(c_errors$tags[r])
|
|
|
|
|
c_errtext <- as.character(c_errors$text[r])
|
|
|
|
|
c_errid <- as.character(c_errors$str_id[r])
|
|
|
|
|
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
|
|
|
|
source("issuecomp-codingsample-function2.R")
|
|
|
|
|
}
|
|
|
|
|
for(r in 1:nrow(c_errors)) {
|
|
|
|
|
c_errcode <- as.character(c_errors$code[r])
|
|
|
|
|
c_errissue <- as.character(c_errors$issue[r])
|
|
|
|
|
c_errtags <- as.character(c_errors$tags[r])
|
|
|
|
|
c_errtext <- as.character(c_errors$text[r])
|
|
|
|
|
c_errid <- as.character(c_errors$str_id[r])
|
|
|
|
|
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
|
|
|
|
source("issuecomp-codingsample-function2.R")
|
|
|
|
|
}
|
|
|
|
|
tagexpand
|
|
|
|
|
source("issuecomp-codingsample-function.R")
|
|
|
|
|
source("issuecomp-codingsample-function.R")
|
|
|
|
|
source("issuecomp-codingsample-function.R")
|
|
|
|
|
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
|
|
|
|
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
|
|
|
|
|
for(r in 1:nrow(c_errors)) {
|
|
|
|
|
c_errcode <- as.character(c_errors$code[r])
|
|
|
|
|
c_errissue <- as.character(c_errors$issue[r])
|
|
|
|
|
c_errtags <- as.character(c_errors$tags[r])
|
|
|
|
|
c_errtext <- as.character(c_errors$text[r])
|
|
|
|
|
c_errid <- as.character(c_errors$str_id[r])
|
|
|
|
|
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
|
|
|
|
source("issuecomp-codingsample-function2.R")
|
|
|
|
|
}
|
|
|
|
|
for(r in 1:nrow(c_errors)) {
|
|
|
|
|
c_errcode <- as.character(c_errors$code[r])
|
|
|
|
|
c_errissue <- as.character(c_errors$issue[r])
|
|
|
|
|
c_errtags <- as.character(c_errors$tags[r])
|
|
|
|
|
c_errtext <- as.character(c_errors$text[r])
|
|
|
|
|
c_errid <- as.character(c_errors$str_id[r])
|
|
|
|
|
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
|
|
|
|
source("issuecomp-codingsample-function2.R")
|
|
|
|
|
}
|
|
|
|
|
for(r in 1:nrow(c_errors)) {
|
|
|
|
|
c_errcode <- as.character(c_errors$code[r])
|
|
|
|
|
c_errissue <- as.character(c_errors$issue[r])
|
|
|
|
|
c_errtags <- as.character(c_errors$tags[r])
|
|
|
|
|
c_errtext <- as.character(c_errors$text[r])
|
|
|
|
|
c_errid <- as.character(c_errors$str_id[r])
|
|
|
|
|
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
|
|
|
|
source("issuecomp-codingsample-function2.R")
|
|
|
|
|
}
|
|
|
|
|
for(r in 1:nrow(c_errors)) {
|
|
|
|
|
c_errcode <- as.character(c_errors$code[r])
|
|
|
|
|
c_errissue <- as.character(c_errors$issue[r])
|
|
|
|
|
c_errtags <- as.character(c_errors$tags[r])
|
|
|
|
|
c_errtext <- as.character(c_errors$text[r])
|
|
|
|
|
c_errid <- as.character(c_errors$str_id[r])
|
|
|
|
|
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
|
|
|
|
source("issuecomp-codingsample-function2.R")
|
|
|
|
|
}
|
|
|
|
|
for(r in 1:nrow(c_errors)) {
|
|
|
|
|
c_errcode <- as.character(c_errors$code[r])
|
|
|
|
|
c_errissue <- as.character(c_errors$issue[r])
|
|
|
|
|
c_errtags <- as.character(c_errors$tags[r])
|
|
|
|
@ -510,3 +362,151 @@ View(issues)
|
|
|
|
|
test <- VAR(issues[,2:44], p=1, type="none")
|
|
|
|
|
VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
|
|
|
|
|
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
|
|
|
|
|
rm(c_correct, c_curissue, c_errcode, c_errid, c_errissue, c_error1, c_error2, c_errors)
|
|
|
|
|
rm(c_issues, c_issuelist, c_issueheads)
|
|
|
|
|
rm(c_errtags, c_errtext, c_result, c_tag, c_tmp, c_tweets)
|
|
|
|
|
require(stringr)
|
|
|
|
|
require(XML)
|
|
|
|
|
readYN <- function(question) {
|
|
|
|
|
n <- readline(prompt=question)
|
|
|
|
|
n <- as.character(n)
|
|
|
|
|
return(n)
|
|
|
|
|
}
|
|
|
|
|
checkIssue <- function(string, issuelist) {
|
|
|
|
|
status <- any(str_detect(string, issuelist))
|
|
|
|
|
return(status)
|
|
|
|
|
}
|
|
|
|
|
checkAllIssues <- function(string, issuelist) {
|
|
|
|
|
status <- NULL
|
|
|
|
|
for(i in 1:length(string)) {
|
|
|
|
|
if(checkIssue(string[i], issuelist)) {
|
|
|
|
|
status[i] <- TRUE
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
cat("Issue",string[i],"does not exist. Please try again.\n")
|
|
|
|
|
status[i] <- FALSE
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return(status)
|
|
|
|
|
}
|
|
|
|
|
View(tweets)
|
|
|
|
|
c_tweets <- read.csv("tweets.csv", colClasses="character")
|
|
|
|
|
for(r in 1:nrow(c_tweets)) {
|
|
|
|
|
curtext <- as.character(c_tweets$text[r])
|
|
|
|
|
if(str_detect(curtext, "\"")) {
|
|
|
|
|
c_tweets$text[r] <- str_replace(curtext, "\"", "")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
c_tweets$X <- NULL
|
|
|
|
|
c_issues <- data.frame(date = drange)
|
|
|
|
|
c_issuelist <- xmlToList("issues-v2.xml")
|
|
|
|
|
c_issueheads <- names(issuelist)
|
|
|
|
|
c_issues[issueheads] <- 0
|
|
|
|
|
source("issuecomp-codingsample-function.R")
|
|
|
|
|
require(stringr)
|
|
|
|
|
curtext
|
|
|
|
|
curtext <- str_replace_all(curtext, "#", "")
|
|
|
|
|
curtext <- str_replace_all(curtext, "-", " ")
|
|
|
|
|
curtext
|
|
|
|
|
curtext
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]\s]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]\\s]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]^\\s]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]^\\S]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]][^\\s]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]][^\\S]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]][^[:blank]]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]][^[:blank:]]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]]", "")
|
|
|
|
|
str_replace_all(curtext, "\\W", "")
|
|
|
|
|
str_replace_all(curtext, "[\\W|\\S]", "")
|
|
|
|
|
str_replace_all(curtext, "(\\W|\\S)", "")
|
|
|
|
|
str_replace_all(curtext, "\\W|\\S", "")
|
|
|
|
|
str_replace_all(curtext, "\\W", "")
|
|
|
|
|
str_replace_all(curtext, "[\\W\\S]", "")
|
|
|
|
|
str_replace_all(curtext, "[\\S\\W]", "")
|
|
|
|
|
str_replace_all(curtext, "[\\s\\W]", "")
|
|
|
|
|
str_replace_all(curtext, "[\\W\\s]", "")
|
|
|
|
|
str_replace_all(curtext, "[\\W\s]", "")
|
|
|
|
|
str_replace_all(curtext, "[\\Ws]", "")
|
|
|
|
|
str_replace_all(curtext, "[\\W]", "")
|
|
|
|
|
str_replace_all(curtext, "\\W", "")
|
|
|
|
|
str_replace_all(curtext, "\\W|\\S", "")
|
|
|
|
|
str_replace_all(curtext, "\\W|\\s", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:] ]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:]\\s]", "")
|
|
|
|
|
str_replace_all(curtext, "[^[:alnum:] ]", "")
|
|
|
|
|
curtext
|
|
|
|
|
curtext <- "liebe @cdu, wir finden #Steuer gut, aber die KFZ-Steuer nicht!"
|
|
|
|
|
curtext <- str_replace_all(curtext, "-", " ")
|
|
|
|
|
curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "")
|
|
|
|
|
curtext
|
|
|
|
|
curtext <- "liebe @cdu, wir finden #Steuer gut, aber die KFZ--Steuer nicht!"
|
|
|
|
|
curtext <- str_replace_all(curtext, "-", " ")
|
|
|
|
|
curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "")
|
|
|
|
|
curtext
|
|
|
|
|
str_replace_all(curtext, " ", " ")
|
|
|
|
|
smartPatternMatch
|
|
|
|
|
require(vars)
|
|
|
|
|
require(stringr)
|
|
|
|
|
adf1 <- summary(ur.df(issues))
|
|
|
|
|
issues
|
|
|
|
|
summary(issues)
|
|
|
|
|
summary(issues[2:44])
|
|
|
|
|
summary(issues[2:44], digits = 2)
|
|
|
|
|
adf1 <- summary(ur.df(issues[, 2:44]), type ="trend", lags=1)
|
|
|
|
|
data("Canda")
|
|
|
|
|
data("Canada")
|
|
|
|
|
class(Canada)
|
|
|
|
|
class(issues)
|
|
|
|
|
view(Canada)
|
|
|
|
|
View(Canada)
|
|
|
|
|
as.ts(issues)
|
|
|
|
|
issues_ts <- as.ts(issues)
|
|
|
|
|
class(issues_ts)
|
|
|
|
|
View(issues_ts)
|
|
|
|
|
View(issues)
|
|
|
|
|
adf1 <- summary(ur.df(issues_ts[, 2:44]), type ="trend", lags=1)
|
|
|
|
|
adf1 <- summary(ur.df(issues_ts[, 2]), type ="trend", lags=1)
|
|
|
|
|
adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
|
|
|
|
|
adf1 <- summary(ur.df(issues_ts[, 2:44], type ="trend", lags=1))
|
|
|
|
|
adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
|
|
|
|
|
adf1
|
|
|
|
|
adf1 <- summary(ur.df(issues_ts[, 3], type ="trend", lags=1))
|
|
|
|
|
adf1
|
|
|
|
|
adf1 <- summary(ur.df(issues_ts[, 2], type ="none", lags=1))
|
|
|
|
|
adf1
|
|
|
|
|
adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
|
|
|
|
|
adf1
|
|
|
|
|
summary(ur.df(issues_ts[, 2], type ="none", lags=1))
|
|
|
|
|
VARselect(issues_ts[2:44], lag.max = 8, type = "both")
|
|
|
|
|
VARselect(issues_ts[1:44], lag.max = 8, type = "both")
|
|
|
|
|
VARselect(issues[1:44], lag.max = 8, type = "both")
|
|
|
|
|
VARselect(issues[2:44], lag.max = 8, type = "both")
|
|
|
|
|
VARselect(issues_ts[2:44], lag.max = 8, type = "both")
|
|
|
|
|
VARselect(issues[2:44], lag.max = 8, type = "none")
|
|
|
|
|
VARselect(issues[2:44], lag.max = 8, type = "trend")
|
|
|
|
|
VARselect(issues[2:44], lag.max = 8, type = "const")
|
|
|
|
|
VARselect(issues[2:44], lag.max = 8, type = "both")
|
|
|
|
|
test <- VAR(issues[,2:44], p=1, type="both")
|
|
|
|
|
# VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
|
|
|
|
|
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
|
|
|
|
|
summary(ur.df(issues_ts[, 2], type ="both", lags=1))
|
|
|
|
|
summary(ur.df(issues_ts[, 2], type ="none", lags=1))
|
|
|
|
|
test <- VAR(issues_ts[,2:44], p=1, type="both")
|
|
|
|
|
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
|
|
|
|
|
acc_df <- read.csv("MdB-twitter.csv")
|
|
|
|
|
delrow <- NULL
|
|
|
|
|
for(r in 1:nrow(acc_df)) {
|
|
|
|
|
acc <- as.character(acc_df$twitter_acc[r])
|
|
|
|
|
if(!nzchar(acc)) {
|
|
|
|
|
delrow <- c(delrow, r)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
acc_df <- acc_df[-delrow, ]
|
|
|
|
|
rm(delrow, r, acc)
|
|
|
|
|
acc_df$row.names <- NULL
|
|
|
|
|
row.names(acc_df) <- NULL
|
|
|
|
|
View(acc_df)
|
|
|
|
|