# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch curacro <- checkAcronym(string = curtag) # Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either if(str_detect(curtag, "^#")) { curacro <- FALSE curhash <- TRUE curtag <- str_replace(curtag, "#", "") curchars <- curchars - 1 } else { curhash <- FALSE } # Now expand the current tag by possible suffixes that may be plural forms # Only do if it isn't an acronym or specific hastag if(!curacro && !curhash) { for(e in 1:length(tagexpand)) { curtag[e] <- str_c(curtag[1], tagexpand[e]) } } ############## if(curchars <= 4 || curacro || curhash) { cat("distance 0\n") } else { cat("distance 1\n") } curtag <- "EURATOM" curchars <- nchar(curtag, type = "chars") # Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch curacro <- checkAcronym(string = curtag) # Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either if(str_detect(curtag, "^#")) { curacro <- FALSE curhash <- TRUE curtag <- str_replace(curtag, "#", "") curchars <- curchars - 1 } else { curhash <- FALSE } # Now expand the current tag by possible suffixes that may be plural forms # Only do if it isn't an acronym or specific hastag if(!curacro && !curhash) { for(e in 1:length(tagexpand)) { curtag[e] <- str_c(curtag[1], tagexpand[e]) } } ############## if(curchars <= 4 || curacro || curhash) { cat("distance 0\n") } else { cat("distance 1\n") } curtag <- "Energiewende" curchars <- nchar(curtag, type = "chars") # Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch curacro <- checkAcronym(string = curtag) # Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either if(str_detect(curtag, "^#")) { curacro <- FALSE curhash <- TRUE curtag <- str_replace(curtag, "#", "") curchars <- curchars - 1 } else { curhash <- FALSE } # Now expand the current tag by possible suffixes that may be plural forms # Only do if it isn't an acronym or specific hastag if(!curacro && !curhash) { for(e in 1:length(tagexpand)) { curtag[e] <- str_c(curtag[1], tagexpand[e]) } } ############## if(curchars <= 4 || curacro || curhash) { cat("distance 0\n") } else { cat("distance 1\n") } curtag <- "bnd" curchars <- nchar(curtag, type = "chars") # Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch curacro <- checkAcronym(string = curtag) # Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either if(str_detect(curtag, "^#")) { curacro <- FALSE curhash <- TRUE curtag <- str_replace(curtag, "#", "") curchars <- curchars - 1 } else { curhash <- FALSE } # Now expand the current tag by possible suffixes that may be plural forms # Only do if it isn't an acronym or specific hastag if(!curacro && !curhash) { for(e in 1:length(tagexpand)) { curtag[e] <- str_c(curtag[1], tagexpand[e]) } } ############## if(curchars <= 4 || curacro || curhash) { cat("distance 0\n") } else { cat("distance 1\n") } curtag <- "#WM" curchars <- nchar(curtag, type = "chars") # Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch curacro <- checkAcronym(string = curtag) # Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either if(str_detect(curtag, "^#")) { curacro <- FALSE curhash <- TRUE curtag <- str_replace(curtag, "#", "") curchars <- curchars - 1 } else { curhash <- FALSE } # Now expand the current tag by possible suffixes that may be plural forms # Only do if it isn't an acronym or specific hastag if(!curacro && !curhash) { for(e in 1:length(tagexpand)) { curtag[e] <- str_c(curtag[1], tagexpand[e]) } } ############## if(curchars <= 4 || curacro || curhash) { cat("distance 0\n") } else { cat("distance 1\n") } curtag curtag <- "Energiewende" curchars <- nchar(curtag, type = "chars") # Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch curacro <- checkAcronym(string = curtag) # Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either if(str_detect(curtag, "^#")) { curacro <- FALSE curhash <- TRUE curtag <- str_replace(curtag, "#", "") curchars <- curchars - 1 } else { curhash <- FALSE } # Now expand the current tag by possible suffixes that may be plural forms # Only do if it isn't an acronym or specific hastag if(!curacro && !curhash) { for(e in 1:length(tagexpand)) { curtag[e] <- str_c(curtag[1], tagexpand[e]) } } ############## if(curchars <= 4 || curacro || curhash) { cat("distance 0\n") } else { cat("distance 1\n") } curtag <- "Energiewende" curchars <- nchar(curtag, type = "chars") # Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch curacro <- checkAcronym(string = curtag) # Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either if(str_detect(curtag, "^#")) { curacro <- FALSE curhash <- TRUE curtag <- str_replace(curtag, "#", "") curchars <- curchars - 1 } else { curhash <- FALSE } # Now expand the current tag by possible suffixes that may be plural forms # Only do if it isn't an acronym or specific hastag if(!curacro && !curhash) { for(e in 1:length(tagexpand)) { curtag[e] <- str_c(curtag[1], tagexpand[e]) } } # Set Levenshtein distance depending on char length, acronym and hashtag status if(curchars <= 4 || curacro || curhash) { curdistance <- 0 } else { curdistance <- 1 } curtag smartPatternMatch("Die Energiewende ist toll!", curtag, curdistance, curacro) smartPatternMatch("Die Energiewende ist toll!", curtag[1], curdistance, curacro) smartPatternMatch("Die Energiewende ist toll!", curtag[2], curdistance, curacro) smartPatternMatch("Die Energiewende ist toll!", sprintf("%s", curtag), curdistance, curacro) tags_found <- NULL # Match the tweet with each variation of tagexpand for(e in 1:length(curtag)) { tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro) } curtext <- "Die Energiewende ist toll!" tags_found <- NULL # Match the tweet with each variation of tagexpand for(e in 1:length(curtag)) { tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro) } tags_found curtag curtag <- "#WM2014" curtext <- "Ich freu mich auf wm2014 sehr" curchars <- nchar(curtag, type = "chars") # Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch curacro <- checkAcronym(string = curtag) # Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either if(str_detect(curtag, "^#")) { curacro <- FALSE curhash <- TRUE curtag <- str_replace(curtag, "#", "") curchars <- curchars - 1 } else { curhash <- FALSE } # Now expand the current tag by possible suffixes that may be plural forms # Only do if it isn't an acronym or specific hastag if(!curacro && !curhash) { for(e in 1:length(tagexpand)) { curtag[e] <- str_c(curtag[1], tagexpand[e]) } } # Set Levenshtein distance depending on char length, acronym and hashtag status if(curchars <= 4 || curacro || curhash) { curdistance <- 0 } else { curdistance <- 1 } # Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance) tags_found <- NULL # Match the tweet with each variation of tagexpand for(e in 1:length(curtag)) { tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro) } tags_found <- any(tags_found) tags_found curtag curtext curdistance test <- VAR(issues[,2:32], p=3, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE")) test test <- VAR(issues[,2:32], p=1, type="none") capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt") View(issues) test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2]) test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3]) capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt") irf(test) test <- VAR(issues_s[,2:11], p=1, type="none") irf(test) plot(irf(test)) test <- VAR(issues[,2:32], p=1, type="none") plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22]))) plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22]), n.ahead = 5)) require(stringr) require(XML) readYN <- function(question) { n <- readline(prompt=question) n <- as.character(n) return(n) } checkIssue <- function(string, issuelist) { status <- any(str_detect(string, issuelist)) return(status) } checkAllIssues <- function(string, issuelist) { status <- NULL for(i in 1:length(string)) { if(checkIssue(string[i], issuelist)) { status[i] <- TRUE } else { cat("Issue",string[i],"does not exist. Please try again.\n") status[i] <- FALSE } } return(status) } View(tweets) write.csv(tweets, file="tweets.csv") c_tweets <- read.csv("tweets.csv", colClasses="character") View(c_tweets) c_tweets$X <- NULL c_issues <- data.frame(date = drange) c_issuelist <- xmlToList("issues.xml") c_issueheads <- names(issuelist) c_issues[issueheads] <- 0 source("issuecomp-codingsample-function.R") rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn) c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") View(c_errors) names(c_errors) <- c("str_id", "code", "issue", "tags", "text") View(c_errors) for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="") source("issuecomp-codingsample-function2.R") } c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") names(c_errors) <- c("str_id", "code", "issue", "tags", "text") for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="") source("issuecomp-codingsample-function2.R") } for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") source("issuecomp-codingsample-function2.R") } for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") source("issuecomp-codingsample-function2.R") } tagexpand source("issuecomp-codingsample-function.R") source("issuecomp-codingsample-function.R") source("issuecomp-codingsample-function.R") c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") names(c_errors) <- c("str_id", "code", "issue", "tags", "text") for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") source("issuecomp-codingsample-function2.R") } for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") source("issuecomp-codingsample-function2.R") } for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") source("issuecomp-codingsample-function2.R") } for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") source("issuecomp-codingsample-function2.R") } for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") source("issuecomp-codingsample-function2.R") } c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character") View(c_tmp) View(c_errors) View(c_tmp) names(c_tmp) <- c("str_id", "all", "wrong", "tags", "text") View(c_tmp) c_tmp[, c("wrong", "tagged", "all", "text")] View(c_tmp) names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text") c_tmp[, c("wrong", "tagged", "all", "text")] c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")] View(c_error1) c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character") View(c_tmp) c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character") names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text") c_error1 <- c_tmp[, c("missing", "tagged", "all", "text")] c_error2 <- c_tmp[, c("missing", "tagged", "all", "text")] View(c_error2) c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")] View(c_error2) View(c_error1) View(c_error2) c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character") View(c_tmp) names(c_tmp) <- c("str_id", "status", "issue", "tags", "text") View(c_tmp) c_currect <- c_tmp c_correct <- c_tmp rm(c_currect) View(c_correct) source("issuecomp-codingsample-function.R") rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn) c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character", quote = "") View(c_errors) c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") test <- "Zitat "total dämlich!"" tweets$id_str == "523512815425175552" tweets[tweets$id_str == "523512815425175552"] tweets[tweets$id_str == "523512815425175552", ] tweets[tweets$id_str == "523512815425175552", "text"] test <- tweets[tweets$id_str == "523512815425175552", "text"] test test <- c_tweets[ctweets$id_str == "523512815425175552", "text"] test <- c_tweets[c_tweets$id_str == "523512815425175552", "text"] test str_replace(test, "\\"", ") str_replace(test, "\\"", "") str_replace(test, "\"", "") str_detect(test, "\"") test <- as.character(c_tweets[c_tweets$id_str == "523512815425175552", "text"]) test c_tweets <- read.csv("tweets.csv", colClasses="character") for(r in 1:nrow(c_tweets)) { curtext <- as.character(c_tweets$text[r]) if(str_detect(curtext, "\"") { c_tweets$text[r] <- str_replace(curtext, "\"", "") } } for(r in 1:nrow(c_tweets)) { curtext <- as.character(c_tweets$text[r]) if(str_detect(curtext, "\"") { c_tweets$text[r] <- str_replace(curtext, "\"", "") } else {} } for(r in 1:nrow(c_tweets)) { curtext <- as.character(c_tweets$text[r]) if(str_detect(curtext, "\"") { c_tweets$text[r] <- str_replace(curtext, "\"", "") } else { } } for(r in 1:nrow(c_tweets)) { curtext <- as.character(c_tweets$text[r]) if(str_detect(curtext, "\"")) { c_tweets$text[r] <- str_replace(curtext, "\"", "") } } test <- as.character(c_tweets[c_tweets$id_str == "523512815425175552", "text"]) test View(c_tweets) c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") View(c_errors) names(c_errors) <- c("str_id", "code", "issue", "tags", "text") View(c_errors) for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") source("issuecomp-codingsample-function2.R") } issueheads for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") source("issuecomp-codingsample-function2.R") } # All tweets with WRONG ISSUES c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character") names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text") c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")] # All tweets with MISSING ISSUES c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character") names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text") c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")] # All CORRECT tweets c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character") names(c_tmp) <- c("str_id", "status", "issue", "tags", "text") c_correct <- c_tmp View(c_error1) View(c_error2) View(c_error1) View(c_correct) test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3]) plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22]))) test <- VAR(issues[,2:32], p=1, type="none") plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22]))) VARselect(issues[,2:32], lag.max=8, type="none") VARselect(issues[,2:32], lag.max=8, type="both") VARselect(issues[,2:32], lag.max=30, type="both") VARselect(issues[,2:32], lag.max=15, type="both")