Files
uni-ba-socialagenda/.Rhistory
T
2015-03-04 11:36:31 +01:00

513 lines
19 KiB
R

# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
curacro <- checkAcronym(string = curtag)
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
if(str_detect(curtag, "^#")) {
curacro <- FALSE
curhash <- TRUE
curtag <- str_replace(curtag, "#", "")
curchars <- curchars - 1
} else {
curhash <- FALSE
}
# Now expand the current tag by possible suffixes that may be plural forms
# Only do if it isn't an acronym or specific hastag
if(!curacro && !curhash) {
for(e in 1:length(tagexpand)) {
curtag[e] <- str_c(curtag[1], tagexpand[e])
}
}
##############
if(curchars <= 4 || curacro || curhash) {
cat("distance 0\n")
} else {
cat("distance 1\n")
}
curtag <- "EURATOM"
curchars <- nchar(curtag, type = "chars")
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
curacro <- checkAcronym(string = curtag)
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
if(str_detect(curtag, "^#")) {
curacro <- FALSE
curhash <- TRUE
curtag <- str_replace(curtag, "#", "")
curchars <- curchars - 1
} else {
curhash <- FALSE
}
# Now expand the current tag by possible suffixes that may be plural forms
# Only do if it isn't an acronym or specific hastag
if(!curacro && !curhash) {
for(e in 1:length(tagexpand)) {
curtag[e] <- str_c(curtag[1], tagexpand[e])
}
}
##############
if(curchars <= 4 || curacro || curhash) {
cat("distance 0\n")
} else {
cat("distance 1\n")
}
curtag <- "Energiewende"
curchars <- nchar(curtag, type = "chars")
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
curacro <- checkAcronym(string = curtag)
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
if(str_detect(curtag, "^#")) {
curacro <- FALSE
curhash <- TRUE
curtag <- str_replace(curtag, "#", "")
curchars <- curchars - 1
} else {
curhash <- FALSE
}
# Now expand the current tag by possible suffixes that may be plural forms
# Only do if it isn't an acronym or specific hastag
if(!curacro && !curhash) {
for(e in 1:length(tagexpand)) {
curtag[e] <- str_c(curtag[1], tagexpand[e])
}
}
##############
if(curchars <= 4 || curacro || curhash) {
cat("distance 0\n")
} else {
cat("distance 1\n")
}
curtag <- "bnd"
curchars <- nchar(curtag, type = "chars")
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
curacro <- checkAcronym(string = curtag)
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
if(str_detect(curtag, "^#")) {
curacro <- FALSE
curhash <- TRUE
curtag <- str_replace(curtag, "#", "")
curchars <- curchars - 1
} else {
curhash <- FALSE
}
# Now expand the current tag by possible suffixes that may be plural forms
# Only do if it isn't an acronym or specific hastag
if(!curacro && !curhash) {
for(e in 1:length(tagexpand)) {
curtag[e] <- str_c(curtag[1], tagexpand[e])
}
}
##############
if(curchars <= 4 || curacro || curhash) {
cat("distance 0\n")
} else {
cat("distance 1\n")
}
curtag <- "#WM"
curchars <- nchar(curtag, type = "chars")
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
curacro <- checkAcronym(string = curtag)
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
if(str_detect(curtag, "^#")) {
curacro <- FALSE
curhash <- TRUE
curtag <- str_replace(curtag, "#", "")
curchars <- curchars - 1
} else {
curhash <- FALSE
}
# Now expand the current tag by possible suffixes that may be plural forms
# Only do if it isn't an acronym or specific hastag
if(!curacro && !curhash) {
for(e in 1:length(tagexpand)) {
curtag[e] <- str_c(curtag[1], tagexpand[e])
}
}
##############
if(curchars <= 4 || curacro || curhash) {
cat("distance 0\n")
} else {
cat("distance 1\n")
}
curtag
curtag <- "Energiewende"
curchars <- nchar(curtag, type = "chars")
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
curacro <- checkAcronym(string = curtag)
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
if(str_detect(curtag, "^#")) {
curacro <- FALSE
curhash <- TRUE
curtag <- str_replace(curtag, "#", "")
curchars <- curchars - 1
} else {
curhash <- FALSE
}
# Now expand the current tag by possible suffixes that may be plural forms
# Only do if it isn't an acronym or specific hastag
if(!curacro && !curhash) {
for(e in 1:length(tagexpand)) {
curtag[e] <- str_c(curtag[1], tagexpand[e])
}
}
##############
if(curchars <= 4 || curacro || curhash) {
cat("distance 0\n")
} else {
cat("distance 1\n")
}
curtag <- "Energiewende"
curchars <- nchar(curtag, type = "chars")
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
curacro <- checkAcronym(string = curtag)
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
if(str_detect(curtag, "^#")) {
curacro <- FALSE
curhash <- TRUE
curtag <- str_replace(curtag, "#", "")
curchars <- curchars - 1
} else {
curhash <- FALSE
}
# Now expand the current tag by possible suffixes that may be plural forms
# Only do if it isn't an acronym or specific hastag
if(!curacro && !curhash) {
for(e in 1:length(tagexpand)) {
curtag[e] <- str_c(curtag[1], tagexpand[e])
}
}
# Set Levenshtein distance depending on char length, acronym and hashtag status
if(curchars <= 4 || curacro || curhash) {
curdistance <- 0
} else {
curdistance <- 1
}
curtag
smartPatternMatch("Die Energiewende ist toll!", curtag, curdistance, curacro)
smartPatternMatch("Die Energiewende ist toll!", curtag[1], curdistance, curacro)
smartPatternMatch("Die Energiewende ist toll!", curtag[2], curdistance, curacro)
smartPatternMatch("Die Energiewende ist toll!", sprintf("%s", curtag), curdistance, curacro)
tags_found <- NULL
# Match the tweet with each variation of tagexpand
for(e in 1:length(curtag)) {
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
}
curtext <- "Die Energiewende ist toll!"
tags_found <- NULL
# Match the tweet with each variation of tagexpand
for(e in 1:length(curtag)) {
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
}
tags_found
curtag
curtag <- "#WM2014"
curtext <- "Ich freu mich auf wm2014 sehr"
curchars <- nchar(curtag, type = "chars")
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
curacro <- checkAcronym(string = curtag)
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
if(str_detect(curtag, "^#")) {
curacro <- FALSE
curhash <- TRUE
curtag <- str_replace(curtag, "#", "")
curchars <- curchars - 1
} else {
curhash <- FALSE
}
# Now expand the current tag by possible suffixes that may be plural forms
# Only do if it isn't an acronym or specific hastag
if(!curacro && !curhash) {
for(e in 1:length(tagexpand)) {
curtag[e] <- str_c(curtag[1], tagexpand[e])
}
}
# Set Levenshtein distance depending on char length, acronym and hashtag status
if(curchars <= 4 || curacro || curhash) {
curdistance <- 0
} else {
curdistance <- 1
}
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance)
tags_found <- NULL
# Match the tweet with each variation of tagexpand
for(e in 1:length(curtag)) {
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
}
tags_found <- any(tags_found)
tags_found
curtag
curtext
curdistance
test <- VAR(issues[,2:32], p=3, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
test
test <- VAR(issues[,2:32], p=1, type="none")
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
View(issues)
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2])
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
irf(test)
test <- VAR(issues_s[,2:11], p=1, type="none")
irf(test)
plot(irf(test))
test <- VAR(issues[,2:32], p=1, type="none")
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22]), n.ahead = 5))
require(stringr)
require(XML)
readYN <- function(question) {
n <- readline(prompt=question)
n <- as.character(n)
return(n)
}
checkIssue <- function(string, issuelist) {
status <- any(str_detect(string, issuelist))
return(status)
}
checkAllIssues <- function(string, issuelist) {
status <- NULL
for(i in 1:length(string)) {
if(checkIssue(string[i], issuelist)) {
status[i] <- TRUE
}
else {
cat("Issue",string[i],"does not exist. Please try again.\n")
status[i] <- FALSE
}
}
return(status)
}
View(tweets)
write.csv(tweets, file="tweets.csv")
c_tweets <- read.csv("tweets.csv", colClasses="character")
View(c_tweets)
c_tweets$X <- NULL
c_issues <- data.frame(date = drange)
c_issuelist <- xmlToList("issues.xml")
c_issueheads <- names(issuelist)
c_issues[issueheads] <- 0
source("issuecomp-codingsample-function.R")
rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
View(c_errors)
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
View(c_errors)
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
source("issuecomp-codingsample-function2.R")
}
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
tagexpand
source("issuecomp-codingsample-function.R")
source("issuecomp-codingsample-function.R")
source("issuecomp-codingsample-function.R")
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
View(c_tmp)
View(c_errors)
View(c_tmp)
names(c_tmp) <- c("str_id", "all", "wrong", "tags", "text")
View(c_tmp)
c_tmp[, c("wrong", "tagged", "all", "text")]
View(c_tmp)
names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
c_tmp[, c("wrong", "tagged", "all", "text")]
c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]
View(c_error1)
c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
View(c_tmp)
c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
c_error1 <- c_tmp[, c("missing", "tagged", "all", "text")]
c_error2 <- c_tmp[, c("missing", "tagged", "all", "text")]
View(c_error2)
c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]
View(c_error2)
View(c_error1)
View(c_error2)
c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character")
View(c_tmp)
names(c_tmp) <- c("str_id", "status", "issue", "tags", "text")
View(c_tmp)
c_currect <- c_tmp
c_correct <- c_tmp
rm(c_currect)
View(c_correct)
source("issuecomp-codingsample-function.R")
rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character", quote = "")
View(c_errors)
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
test <- "Zitat "total dämlich!""
tweets$id_str == "523512815425175552"
tweets[tweets$id_str == "523512815425175552"]
tweets[tweets$id_str == "523512815425175552", ]
tweets[tweets$id_str == "523512815425175552", "text"]
test <- tweets[tweets$id_str == "523512815425175552", "text"]
test
test <- c_tweets[ctweets$id_str == "523512815425175552", "text"]
test <- c_tweets[c_tweets$id_str == "523512815425175552", "text"]
test
str_replace(test, "\\"", ")
str_replace(test, "\\"", "")
str_replace(test, "\"", "")
str_detect(test, "\"")
test <- as.character(c_tweets[c_tweets$id_str == "523512815425175552", "text"])
test
c_tweets <- read.csv("tweets.csv", colClasses="character")
for(r in 1:nrow(c_tweets)) {
curtext <- as.character(c_tweets$text[r])
if(str_detect(curtext, "\"") {
c_tweets$text[r] <- str_replace(curtext, "\"", "")
}
}
for(r in 1:nrow(c_tweets)) {
curtext <- as.character(c_tweets$text[r])
if(str_detect(curtext, "\"") {
c_tweets$text[r] <- str_replace(curtext, "\"", "")
} else {}
}
for(r in 1:nrow(c_tweets)) {
curtext <- as.character(c_tweets$text[r])
if(str_detect(curtext, "\"") {
c_tweets$text[r] <- str_replace(curtext, "\"", "")
} else {
}
}
for(r in 1:nrow(c_tweets)) {
curtext <- as.character(c_tweets$text[r])
if(str_detect(curtext, "\"")) {
c_tweets$text[r] <- str_replace(curtext, "\"", "")
}
}
test <- as.character(c_tweets[c_tweets$id_str == "523512815425175552", "text"])
test
View(c_tweets)
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
View(c_errors)
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
View(c_errors)
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
issueheads
for(r in 1:nrow(c_errors)) {
c_errcode <- as.character(c_errors$code[r])
c_errissue <- as.character(c_errors$issue[r])
c_errtags <- as.character(c_errors$tags[r])
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
# All tweets with WRONG ISSUES
c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]
# All tweets with MISSING ISSUES
c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]
# All CORRECT tweets
c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character")
names(c_tmp) <- c("str_id", "status", "issue", "tags", "text")
c_correct <- c_tmp
View(c_error1)
View(c_error2)
View(c_error1)
View(c_correct)
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
test <- VAR(issues[,2:32], p=1, type="none")
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
VARselect(issues[,2:32], lag.max=8, type="none")
VARselect(issues[,2:32], lag.max=8, type="both")
VARselect(issues[,2:32], lag.max=30, type="both")
VARselect(issues[,2:32], lag.max=15, type="both")