some small changes

master
mxmehl 8 years ago
parent aedc3ac68c
commit cbbb664e04

@ -1,151 +1,3 @@
curdistance <- 1
}
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance)
tags_found <- NULL
# Match the tweet with each variation of tagexpand
for(e in 1:length(curtag)) {
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
}
tags_found <- any(tags_found)
tags_found
curtag
curtext
curdistance
test <- VAR(issues[,2:32], p=3, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
test
test <- VAR(issues[,2:32], p=1, type="none")
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
View(issues)
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2])
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
irf(test)
test <- VAR(issues_s[,2:11], p=1, type="none")
irf(test)
plot(irf(test))
test <- VAR(issues[,2:32], p=1, type="none")
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22]), n.ahead = 5))
require(stringr)
require(XML)
readYN <- function(question) {
n <- readline(prompt=question)
n <- as.character(n)
return(n)
}
checkIssue <- function(string, issuelist) {
status <- any(str_detect(string, issuelist))
return(status)
}
checkAllIssues <- function(string, issuelist) {
status <- NULL
for(i in 1:length(string)) {
if(checkIssue(string[i], issuelist)) {
status[i] <- TRUE
}
else {
cat("Issue",string[i],"does not exist. Please try again.\n")
status[i] <- FALSE
}
}
return(status)
}
View(tweets)
write.csv(tweets, file="tweets.csv")
c_tweets <- read.csv("tweets.csv", colClasses="character")
View(c_tweets)
c_tweets$X <- NULL
c_issues <- data.frame(date = drange)
c_issuelist <- xmlToList("issues.xml")
c_issueheads <- names(issuelist)
c_issues[issueheads] <- 0
source("issuecomp-codingsample-function.R")
rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
View(c_errors)
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
View(c_errors)
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
source("issuecomp-codingsample-function2.R")
}
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
tagexpand
source("issuecomp-codingsample-function.R")
source("issuecomp-codingsample-function.R")
source("issuecomp-codingsample-function.R")
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
@ -510,3 +362,151 @@ View(issues)
test <- VAR(issues[,2:44], p=1, type="none")
VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
rm(c_correct, c_curissue, c_errcode, c_errid, c_errissue, c_error1, c_error2, c_errors)
rm(c_issues, c_issuelist, c_issueheads)
rm(c_errtags, c_errtext, c_result, c_tag, c_tmp, c_tweets)
require(stringr)
require(XML)
readYN <- function(question) {
n <- readline(prompt=question)
n <- as.character(n)
return(n)
}
checkIssue <- function(string, issuelist) {
status <- any(str_detect(string, issuelist))
return(status)
}
checkAllIssues <- function(string, issuelist) {
status <- NULL
for(i in 1:length(string)) {
if(checkIssue(string[i], issuelist)) {
status[i] <- TRUE
}
else {
cat("Issue",string[i],"does not exist. Please try again.\n")
status[i] <- FALSE
}
}
return(status)
}
View(tweets)
c_tweets <- read.csv("tweets.csv", colClasses="character")
for(r in 1:nrow(c_tweets)) {
curtext <- as.character(c_tweets$text[r])
if(str_detect(curtext, "\"")) {
c_tweets$text[r] <- str_replace(curtext, "\"", "")
}
}
c_tweets$X <- NULL
c_issues <- data.frame(date = drange)
c_issuelist <- xmlToList("issues-v2.xml")
c_issueheads <- names(issuelist)
c_issues[issueheads] <- 0
source("issuecomp-codingsample-function.R")
require(stringr)
curtext
curtext <- str_replace_all(curtext, "#", "")
curtext <- str_replace_all(curtext, "-", " ")
curtext
curtext
str_replace_all(curtext, "[^[:alnum:]]", "")
str_replace_all(curtext, "[^[:alnum:]\s]", "")
str_replace_all(curtext, "[^[:alnum:]\\s]", "")
str_replace_all(curtext, "[^[:alnum:]^\\s]", "")
str_replace_all(curtext, "[^[:alnum:]^\\S]", "")
str_replace_all(curtext, "[^[:alnum:]][^\\s]", "")
str_replace_all(curtext, "[^[:alnum:]][^\\S]", "")
str_replace_all(curtext, "[^[:alnum:]][^[:blank]]", "")
str_replace_all(curtext, "[^[:alnum:]][^[:blank:]]", "")
str_replace_all(curtext, "[^[:alnum:]]", "")
str_replace_all(curtext, "\\W", "")
str_replace_all(curtext, "[\\W|\\S]", "")
str_replace_all(curtext, "(\\W|\\S)", "")
str_replace_all(curtext, "\\W|\\S", "")
str_replace_all(curtext, "\\W", "")
str_replace_all(curtext, "[\\W\\S]", "")
str_replace_all(curtext, "[\\S\\W]", "")
str_replace_all(curtext, "[\\s\\W]", "")
str_replace_all(curtext, "[\\W\\s]", "")
str_replace_all(curtext, "[\\W\s]", "")
str_replace_all(curtext, "[\\Ws]", "")
str_replace_all(curtext, "[\\W]", "")
str_replace_all(curtext, "\\W", "")
str_replace_all(curtext, "\\W|\\S", "")
str_replace_all(curtext, "\\W|\\s", "")
str_replace_all(curtext, "[^[:alnum:]]", "")
str_replace_all(curtext, "[^[:alnum:] ]", "")
str_replace_all(curtext, "[^[:alnum:]\\s]", "")
str_replace_all(curtext, "[^[:alnum:] ]", "")
curtext
curtext <- "liebe @cdu, wir finden #Steuer gut, aber die KFZ-Steuer nicht!"
curtext <- str_replace_all(curtext, "-", " ")
curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "")
curtext
curtext <- "liebe @cdu, wir finden #Steuer gut, aber die KFZ--Steuer nicht!"
curtext <- str_replace_all(curtext, "-", " ")
curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "")
curtext
str_replace_all(curtext, " ", " ")
smartPatternMatch
require(vars)
require(stringr)
adf1 <- summary(ur.df(issues))
issues
summary(issues)
summary(issues[2:44])
summary(issues[2:44], digits = 2)
adf1 <- summary(ur.df(issues[, 2:44]), type ="trend", lags=1)
data("Canda")
data("Canada")
class(Canada)
class(issues)
view(Canada)
View(Canada)
as.ts(issues)
issues_ts <- as.ts(issues)
class(issues_ts)
View(issues_ts)
View(issues)
adf1 <- summary(ur.df(issues_ts[, 2:44]), type ="trend", lags=1)
adf1 <- summary(ur.df(issues_ts[, 2]), type ="trend", lags=1)
adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
adf1 <- summary(ur.df(issues_ts[, 2:44], type ="trend", lags=1))
adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
adf1
adf1 <- summary(ur.df(issues_ts[, 3], type ="trend", lags=1))
adf1
adf1 <- summary(ur.df(issues_ts[, 2], type ="none", lags=1))
adf1
adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1))
adf1
summary(ur.df(issues_ts[, 2], type ="none", lags=1))
VARselect(issues_ts[2:44], lag.max = 8, type = "both")
VARselect(issues_ts[1:44], lag.max = 8, type = "both")
VARselect(issues[1:44], lag.max = 8, type = "both")
VARselect(issues[2:44], lag.max = 8, type = "both")
VARselect(issues_ts[2:44], lag.max = 8, type = "both")
VARselect(issues[2:44], lag.max = 8, type = "none")
VARselect(issues[2:44], lag.max = 8, type = "trend")
VARselect(issues[2:44], lag.max = 8, type = "const")
VARselect(issues[2:44], lag.max = 8, type = "both")
test <- VAR(issues[,2:44], p=1, type="both")
# VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
summary(ur.df(issues_ts[, 2], type ="both", lags=1))
summary(ur.df(issues_ts[, 2], type ="none", lags=1))
test <- VAR(issues_ts[,2:44], p=1, type="both")
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
acc_df <- read.csv("MdB-twitter.csv")
delrow <- NULL
for(r in 1:nrow(acc_df)) {
acc <- as.character(acc_df$twitter_acc[r])
if(!nzchar(acc)) {
delrow <- c(delrow, r)
}
}
acc_df <- acc_df[-delrow, ]
rm(delrow, r, acc)
acc_df$row.names <- NULL
row.names(acc_df) <- NULL
View(acc_df)

@ -56,9 +56,11 @@ foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% {
for(t in 1:nrow(tweets_curday)){
# cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE)
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
# Select tweet's text, make it lowercase and remove hashtags, mentions and replace hyphens by spaces
curtext <- as.character(tweets_curday$text[t])
curtext <- str_replace_all(curtext, "#", "")
curtext <- str_replace_all(curtext, "-", " ")
curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "")
curtext <- str_replace_all(curtext, " ", " ") # remove double spaces
curid <- as.character(tweets_curday$id_str[t])
@ -95,7 +97,7 @@ foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% {
}
# Set Levenshtein distance depending on char length, acronym and hashtag status
if(curchars <= 6 || curacro || curhash) { # Distance = 1 if 7 chars or longer
if(curchars <= 7 || curacro || curhash) { # Distance = 1 if 8 chars or longer
curdistance <- 0
} else {
curdistance <- 1

@ -81,13 +81,19 @@ g1
# test <- VAR(issues[,2:32], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
# test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
# test <- VAR(issues_s[,2:11], p=1, type="none")
test <- VAR(issues[,2:44], p=1, type="none")
# VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22])
issues_ts <- as.ts(issues)
vIssues <- VAR(issues_ts[,2:44], p=1, type="both")
plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])))
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
# Tests
issues_ts <- as.ts(issues)
VARselect(issues[2:44], lag.max = 8, type = "both")
summary(ur.df(issues_ts[, 2], type ="none", lags=1))
# SOME TESTS --------------------------------------------------------------
@ -123,6 +129,16 @@ pie(acc_parties$twitter, col=c("black", "red", "purple", "green"), labels = c("C
rm(acc_parties, p)
# Count all tags
num <- 0
for(i in 1:length(issuelist)) {
j <- length(issuelist[[i]])
num <- num + j
rm(j)
}
num
# VISUALS -----------------------------------------------------------------

@ -45,7 +45,7 @@ c_tweets$X <- NULL
# Read all issues from XML file
c_issues <- data.frame(date = drange)
c_issuelist <- xmlToList("issues.xml")
c_issuelist <- xmlToList("issues-v2.xml")
c_issueheads <- names(issuelist)
c_issues[issueheads] <- 0

@ -1 +1,11 @@
<s.ukraine>
<tag>#Janukowitsch</tag>
</s.ukraine>
<i2.civil>
<tag>Foltermethode</tag>
</i2.civil>
<i19.ib>
--<tag>Afghanistan</tag>
</i19.ib>

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save