some small changes
This commit is contained in:
@@ -1,151 +1,3 @@
|
||||
curdistance <- 1
|
||||
}
|
||||
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance)
|
||||
tags_found <- NULL
|
||||
# Match the tweet with each variation of tagexpand
|
||||
for(e in 1:length(curtag)) {
|
||||
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
|
||||
}
|
||||
tags_found <- any(tags_found)
|
||||
tags_found
|
||||
curtag
|
||||
curtext
|
||||
curdistance
|
||||
test <- VAR(issues[,2:32], p=3, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
|
||||
test
|
||||
test <- VAR(issues[,2:32], p=1, type="none")
|
||||
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
|
||||
View(issues)
|
||||
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2])
|
||||
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
|
||||
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
|
||||
irf(test)
|
||||
test <- VAR(issues_s[,2:11], p=1, type="none")
|
||||
irf(test)
|
||||
plot(irf(test))
|
||||
test <- VAR(issues[,2:32], p=1, type="none")
|
||||
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
|
||||
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22]), n.ahead = 5))
|
||||
require(stringr)
|
||||
require(XML)
|
||||
readYN <- function(question) {
|
||||
n <- readline(prompt=question)
|
||||
n <- as.character(n)
|
||||
return(n)
|
||||
}
|
||||
checkIssue <- function(string, issuelist) {
|
||||
status <- any(str_detect(string, issuelist))
|
||||
return(status)
|
||||
}
|
||||
checkAllIssues <- function(string, issuelist) {
|
||||
status <- NULL
|
||||
for(i in 1:length(string)) {
|
||||
if(checkIssue(string[i], issuelist)) {
|
||||
status[i] <- TRUE
|
||||
}
|
||||
else {
|
||||
cat("Issue",string[i],"does not exist. Please try again.\n")
|
||||
status[i] <- FALSE
|
||||
}
|
||||
}
|
||||
return(status)
|
||||
}
|
||||
View(tweets)
|
||||
write.csv(tweets, file="tweets.csv")
|
||||
c_tweets <- read.csv("tweets.csv", colClasses="character")
|
||||
View(c_tweets)
|
||||
c_tweets$X <- NULL
|
||||
c_issues <- data.frame(date = drange)
|
||||
c_issuelist <- xmlToList("issues.xml")
|
||||
c_issueheads <- names(issuelist)
|
||||
c_issues[issueheads] <- 0
|
||||
source("issuecomp-codingsample-function.R")
|
||||
rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
View(c_errors)
|
||||
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
|
||||
View(c_errors)
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
tagexpand
|
||||
source("issuecomp-codingsample-function.R")
|
||||
source("issuecomp-codingsample-function.R")
|
||||
source("issuecomp-codingsample-function.R")
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
@@ -510,3 +362,151 @@ View(issues)
|
||||
test <- VAR(issues[,2:44], p=1, type="none")
|
||||
VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
|
||||
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
|
||||
rm(c_correct, c_curissue, c_errcode, c_errid, c_errissue, c_error1, c_error2, c_errors)
|
||||
rm(c_issues, c_issuelist, c_issueheads)
|
||||
rm(c_errtags, c_errtext, c_result, c_tag, c_tmp, c_tweets)
|
||||
require(stringr)
|
||||
require(XML)
|
||||
readYN <- function(question) {
|
||||
n <- readline(prompt=question)
|
||||
n <- as.character(n)
|
||||
return(n)
|
||||
}
|
||||
checkIssue <- function(string, issuelist) {
|
||||
status <- any(str_detect(string, issuelist))
|
||||
return(status)
|
||||
}
|
||||
checkAllIssues <- function(string, issuelist) {
|
||||
status <- NULL
|
||||
for(i in 1:length(string)) {
|
||||
if(checkIssue(string[i], issuelist)) {
|
||||
status[i] <- TRUE
|
||||
}
|
||||
else {
|
||||
cat("Issue",string[i],"does not exist. Please try again.\n")
|
||||
status[i] <- FALSE
|
||||
}
|
||||
}
|
||||
return(status)
|
||||
}
|
||||
View(tweets)
|
||||
c_tweets <- read.csv("tweets.csv", colClasses="character")
|
||||
for(r in 1:nrow(c_tweets)) {
|
||||
curtext <- as.character(c_tweets$text[r])
|
||||
if(str_detect(curtext, "\"")) {
|
||||
c_tweets$text[r] <- str_replace(curtext, "\"", "")
|
||||
}
|
||||
}
|
||||
c_tweets$X <- NULL
|
||||
c_issues <- data.frame(date = drange)
|
||||
c_issuelist <- xmlToList("issues-v2.xml")
|
||||
c_issueheads <- names(issuelist)
|
||||
c_issues[issueheads] <- 0
|
||||
source("issuecomp-codingsample-function.R")
|
||||
require(stringr)
|
||||
curtext
|
||||
curtext <- str_replace_all(curtext, "#", "")
|
||||
curtext <- str_replace_all(curtext, "-", " ")
|
||||
curtext
|
||||
curtext
|
||||
str_replace_all(curtext, "[^[:alnum:]]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]\s]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]\\s]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]^\\s]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]^\\S]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]][^\\s]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]][^\\S]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]][^[:blank]]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]][^[:blank:]]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]]", "")
|
||||
str_replace_all(curtext, "\\W", "")
|
||||
str_replace_all(curtext, "[\\W|\\S]", "")
|
||||
str_replace_all(curtext, "(\\W|\\S)", "")
|
||||
str_replace_all(curtext, "\\W|\\S", "")
|
||||
str_replace_all(curtext, "\\W", "")
|
||||
str_replace_all(curtext, "[\\W\\S]", "")
|
||||
str_replace_all(curtext, "[\\S\\W]", "")
|
||||
str_replace_all(curtext, "[\\s\\W]", "")
|
||||
str_replace_all(curtext, "[\\W\\s]", "")
|
||||
str_replace_all(curtext, "[\\W\s]", "")
|
||||
str_replace_all(curtext, "[\\Ws]", "")
|
||||
str_replace_all(curtext, "[\\W]", "")
|
||||
str_replace_all(curtext, "\\W", "")
|
||||
str_replace_all(curtext, "\\W|\\S", "")
|
||||
str_replace_all(curtext, "\\W|\\s", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:] ]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:]\\s]", "")
|
||||
str_replace_all(curtext, "[^[:alnum:] ]", "")
|
||||
curtext
|
||||
curtext <- "liebe @cdu, wir finden #Steuer gut, aber die KFZ-Steuer nicht!"
|
||||
curtext <- str_replace_all(curtext, "-", " ")
|
||||
curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "")
|
||||
curtext
|
||||
curtext <- "liebe @cdu, wir finden #Steuer gut, aber die KFZ--Steuer nicht!"
|
||||
curtext <- str_replace_all(curtext, "-", " ")
|
||||
curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "")
|
||||
curtext
|
||||
str_replace_all(curtext, " ", " ")
|
||||
smartPatternMatch
|
||||
require(vars)
|
||||
require(stringr)
|
||||
adf1 <- summary(ur.df(issues))
|
||||
issues
|
||||
summary(issues)
|
||||
summary(issues[2:44])
|
||||
summary(issues[2:44], digits = 2)
|
||||
adf1 <- summary(ur.df(issues[, 2:44]), type ="trend", lags=1)
|
||||
data("Canda")
|
||||
data("Canada")
|
||||
class(Canada)
|
||||
class(issues)
|
||||
view(Canada)
|
||||
View(Canada)
|
||||
as.ts(issues)
|
||||
issues_ts <- as.ts(issues)
|
||||
class(issues_ts)
|
||||
View(issues_ts)
|
||||
View(issues)
|
||||
adf1 <- summary(ur.df(issues_ts[, 2:44]), type ="trend", lags=1)
|
||||
adf1 <- summary(ur.df(issues_ts[, 2]), type ="trend", lags=1)
|
||||
adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
|
||||
adf1 <- summary(ur.df(issues_ts[, 2:44], type ="trend", lags=1))
|
||||
adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
|
||||
adf1
|
||||
adf1 <- summary(ur.df(issues_ts[, 3], type ="trend", lags=1))
|
||||
adf1
|
||||
adf1 <- summary(ur.df(issues_ts[, 2], type ="none", lags=1))
|
||||
adf1
|
||||
adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
|
||||
adf1
|
||||
summary(ur.df(issues_ts[, 2], type ="none", lags=1))
|
||||
VARselect(issues_ts[2:44], lag.max = 8, type = "both")
|
||||
VARselect(issues_ts[1:44], lag.max = 8, type = "both")
|
||||
VARselect(issues[1:44], lag.max = 8, type = "both")
|
||||
VARselect(issues[2:44], lag.max = 8, type = "both")
|
||||
VARselect(issues_ts[2:44], lag.max = 8, type = "both")
|
||||
VARselect(issues[2:44], lag.max = 8, type = "none")
|
||||
VARselect(issues[2:44], lag.max = 8, type = "trend")
|
||||
VARselect(issues[2:44], lag.max = 8, type = "const")
|
||||
VARselect(issues[2:44], lag.max = 8, type = "both")
|
||||
test <- VAR(issues[,2:44], p=1, type="both")
|
||||
# VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
|
||||
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
|
||||
summary(ur.df(issues_ts[, 2], type ="both", lags=1))
|
||||
summary(ur.df(issues_ts[, 2], type ="none", lags=1))
|
||||
test <- VAR(issues_ts[,2:44], p=1, type="both")
|
||||
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
|
||||
acc_df <- read.csv("MdB-twitter.csv")
|
||||
delrow <- NULL
|
||||
for(r in 1:nrow(acc_df)) {
|
||||
acc <- as.character(acc_df$twitter_acc[r])
|
||||
if(!nzchar(acc)) {
|
||||
delrow <- c(delrow, r)
|
||||
}
|
||||
}
|
||||
acc_df <- acc_df[-delrow, ]
|
||||
rm(delrow, r, acc)
|
||||
acc_df$row.names <- NULL
|
||||
row.names(acc_df) <- NULL
|
||||
View(acc_df)
|
||||
|
||||
@@ -56,9 +56,11 @@ foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% {
|
||||
|
||||
for(t in 1:nrow(tweets_curday)){
|
||||
# cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE)
|
||||
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
|
||||
# Select tweet's text, make it lowercase and remove hashtags, mentions and replace hyphens by spaces
|
||||
curtext <- as.character(tweets_curday$text[t])
|
||||
curtext <- str_replace_all(curtext, "#", "")
|
||||
curtext <- str_replace_all(curtext, "-", " ")
|
||||
curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "")
|
||||
curtext <- str_replace_all(curtext, " ", " ") # remove double spaces
|
||||
|
||||
curid <- as.character(tweets_curday$id_str[t])
|
||||
|
||||
@@ -95,7 +97,7 @@ foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% {
|
||||
}
|
||||
|
||||
# Set Levenshtein distance depending on char length, acronym and hashtag status
|
||||
if(curchars <= 6 || curacro || curhash) { # Distance = 1 if 7 chars or longer
|
||||
if(curchars <= 7 || curacro || curhash) { # Distance = 1 if 8 chars or longer
|
||||
curdistance <- 0
|
||||
} else {
|
||||
curdistance <- 1
|
||||
|
||||
+17
-1
@@ -81,13 +81,19 @@ g1
|
||||
# test <- VAR(issues[,2:32], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
|
||||
# test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
|
||||
# test <- VAR(issues_s[,2:11], p=1, type="none")
|
||||
test <- VAR(issues[,2:44], p=1, type="none")
|
||||
# VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
|
||||
|
||||
issues_ts <- as.ts(issues)
|
||||
vIssues <- VAR(issues_ts[,2:44], p=1, type="both")
|
||||
|
||||
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
|
||||
|
||||
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
|
||||
|
||||
# Tests
|
||||
issues_ts <- as.ts(issues)
|
||||
VARselect(issues[2:44], lag.max = 8, type = "both")
|
||||
summary(ur.df(issues_ts[, 2], type ="none", lags=1))
|
||||
|
||||
# SOME TESTS --------------------------------------------------------------
|
||||
|
||||
@@ -123,6 +129,16 @@ pie(acc_parties$twitter, col=c("black", "red", "purple", "green"), labels = c("C
|
||||
rm(acc_parties, p)
|
||||
|
||||
|
||||
# Count all tags
|
||||
num <- 0
|
||||
for(i in 1:length(issuelist)) {
|
||||
j <- length(issuelist[[i]])
|
||||
num <- num + j
|
||||
rm(j)
|
||||
}
|
||||
num
|
||||
|
||||
|
||||
# VISUALS -----------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ c_tweets$X <- NULL
|
||||
|
||||
# Read all issues from XML file
|
||||
c_issues <- data.frame(date = drange)
|
||||
c_issuelist <- xmlToList("issues.xml")
|
||||
c_issuelist <- xmlToList("issues-v2.xml")
|
||||
c_issueheads <- names(issuelist)
|
||||
c_issues[issueheads] <- 0
|
||||
|
||||
|
||||
@@ -1 +1,11 @@
|
||||
<s.ukraine>
|
||||
<tag>#Janukowitsch</tag>
|
||||
</s.ukraine>
|
||||
|
||||
<i2.civil>
|
||||
<tag>Foltermethode</tag>
|
||||
</i2.civil>
|
||||
|
||||
<i19.ib>
|
||||
--<tag>Afghanistan</tag>
|
||||
</i19.ib>
|
||||
|
||||
+1983
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user