Corrent status
This commit is contained in:
@@ -1,242 +1,512 @@
|
||||
require(lubridate)
|
||||
require(XML)
|
||||
require(ggplot2)
|
||||
require(reshape2)
|
||||
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
|
||||
curacro <- checkAcronym(string = curtag)
|
||||
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
|
||||
if(str_detect(curtag, "^#")) {
|
||||
curacro <- FALSE
|
||||
curhash <- TRUE
|
||||
curtag <- str_replace(curtag, "#", "")
|
||||
curchars <- curchars - 1
|
||||
} else {
|
||||
curhash <- FALSE
|
||||
}
|
||||
# Now expand the current tag by possible suffixes that may be plural forms
|
||||
# Only do if it isn't an acronym or specific hastag
|
||||
if(!curacro && !curhash) {
|
||||
for(e in 1:length(tagexpand)) {
|
||||
curtag[e] <- str_c(curtag[1], tagexpand[e])
|
||||
}
|
||||
}
|
||||
##############
|
||||
if(curchars <= 4 || curacro || curhash) {
|
||||
cat("distance 0\n")
|
||||
} else {
|
||||
cat("distance 1\n")
|
||||
}
|
||||
curtag <- "EURATOM"
|
||||
curchars <- nchar(curtag, type = "chars")
|
||||
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
|
||||
curacro <- checkAcronym(string = curtag)
|
||||
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
|
||||
if(str_detect(curtag, "^#")) {
|
||||
curacro <- FALSE
|
||||
curhash <- TRUE
|
||||
curtag <- str_replace(curtag, "#", "")
|
||||
curchars <- curchars - 1
|
||||
} else {
|
||||
curhash <- FALSE
|
||||
}
|
||||
# Now expand the current tag by possible suffixes that may be plural forms
|
||||
# Only do if it isn't an acronym or specific hastag
|
||||
if(!curacro && !curhash) {
|
||||
for(e in 1:length(tagexpand)) {
|
||||
curtag[e] <- str_c(curtag[1], tagexpand[e])
|
||||
}
|
||||
}
|
||||
##############
|
||||
if(curchars <= 4 || curacro || curhash) {
|
||||
cat("distance 0\n")
|
||||
} else {
|
||||
cat("distance 1\n")
|
||||
}
|
||||
curtag <- "Energiewende"
|
||||
curchars <- nchar(curtag, type = "chars")
|
||||
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
|
||||
curacro <- checkAcronym(string = curtag)
|
||||
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
|
||||
if(str_detect(curtag, "^#")) {
|
||||
curacro <- FALSE
|
||||
curhash <- TRUE
|
||||
curtag <- str_replace(curtag, "#", "")
|
||||
curchars <- curchars - 1
|
||||
} else {
|
||||
curhash <- FALSE
|
||||
}
|
||||
# Now expand the current tag by possible suffixes that may be plural forms
|
||||
# Only do if it isn't an acronym or specific hastag
|
||||
if(!curacro && !curhash) {
|
||||
for(e in 1:length(tagexpand)) {
|
||||
curtag[e] <- str_c(curtag[1], tagexpand[e])
|
||||
}
|
||||
}
|
||||
##############
|
||||
if(curchars <= 4 || curacro || curhash) {
|
||||
cat("distance 0\n")
|
||||
} else {
|
||||
cat("distance 1\n")
|
||||
}
|
||||
curtag <- "bnd"
|
||||
curchars <- nchar(curtag, type = "chars")
|
||||
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
|
||||
curacro <- checkAcronym(string = curtag)
|
||||
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
|
||||
if(str_detect(curtag, "^#")) {
|
||||
curacro <- FALSE
|
||||
curhash <- TRUE
|
||||
curtag <- str_replace(curtag, "#", "")
|
||||
curchars <- curchars - 1
|
||||
} else {
|
||||
curhash <- FALSE
|
||||
}
|
||||
# Now expand the current tag by possible suffixes that may be plural forms
|
||||
# Only do if it isn't an acronym or specific hastag
|
||||
if(!curacro && !curhash) {
|
||||
for(e in 1:length(tagexpand)) {
|
||||
curtag[e] <- str_c(curtag[1], tagexpand[e])
|
||||
}
|
||||
}
|
||||
##############
|
||||
if(curchars <= 4 || curacro || curhash) {
|
||||
cat("distance 0\n")
|
||||
} else {
|
||||
cat("distance 1\n")
|
||||
}
|
||||
curtag <- "#WM"
|
||||
curchars <- nchar(curtag, type = "chars")
|
||||
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
|
||||
curacro <- checkAcronym(string = curtag)
|
||||
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
|
||||
if(str_detect(curtag, "^#")) {
|
||||
curacro <- FALSE
|
||||
curhash <- TRUE
|
||||
curtag <- str_replace(curtag, "#", "")
|
||||
curchars <- curchars - 1
|
||||
} else {
|
||||
curhash <- FALSE
|
||||
}
|
||||
# Now expand the current tag by possible suffixes that may be plural forms
|
||||
# Only do if it isn't an acronym or specific hastag
|
||||
if(!curacro && !curhash) {
|
||||
for(e in 1:length(tagexpand)) {
|
||||
curtag[e] <- str_c(curtag[1], tagexpand[e])
|
||||
}
|
||||
}
|
||||
##############
|
||||
if(curchars <= 4 || curacro || curhash) {
|
||||
cat("distance 0\n")
|
||||
} else {
|
||||
cat("distance 1\n")
|
||||
}
|
||||
curtag
|
||||
curtag <- "Energiewende"
|
||||
curchars <- nchar(curtag, type = "chars")
|
||||
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
|
||||
curacro <- checkAcronym(string = curtag)
|
||||
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
|
||||
if(str_detect(curtag, "^#")) {
|
||||
curacro <- FALSE
|
||||
curhash <- TRUE
|
||||
curtag <- str_replace(curtag, "#", "")
|
||||
curchars <- curchars - 1
|
||||
} else {
|
||||
curhash <- FALSE
|
||||
}
|
||||
# Now expand the current tag by possible suffixes that may be plural forms
|
||||
# Only do if it isn't an acronym or specific hastag
|
||||
if(!curacro && !curhash) {
|
||||
for(e in 1:length(tagexpand)) {
|
||||
curtag[e] <- str_c(curtag[1], tagexpand[e])
|
||||
}
|
||||
}
|
||||
##############
|
||||
if(curchars <= 4 || curacro || curhash) {
|
||||
cat("distance 0\n")
|
||||
} else {
|
||||
cat("distance 1\n")
|
||||
}
|
||||
curtag <- "Energiewende"
|
||||
curchars <- nchar(curtag, type = "chars")
|
||||
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
|
||||
curacro <- checkAcronym(string = curtag)
|
||||
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
|
||||
if(str_detect(curtag, "^#")) {
|
||||
curacro <- FALSE
|
||||
curhash <- TRUE
|
||||
curtag <- str_replace(curtag, "#", "")
|
||||
curchars <- curchars - 1
|
||||
} else {
|
||||
curhash <- FALSE
|
||||
}
|
||||
# Now expand the current tag by possible suffixes that may be plural forms
|
||||
# Only do if it isn't an acronym or specific hastag
|
||||
if(!curacro && !curhash) {
|
||||
for(e in 1:length(tagexpand)) {
|
||||
curtag[e] <- str_c(curtag[1], tagexpand[e])
|
||||
}
|
||||
}
|
||||
# Set Levenshtein distance depending on char length, acronym and hashtag status
|
||||
if(curchars <= 4 || curacro || curhash) {
|
||||
curdistance <- 0
|
||||
} else {
|
||||
curdistance <- 1
|
||||
}
|
||||
curtag
|
||||
smartPatternMatch("Die Energiewende ist toll!", curtag, curdistance, curacro)
|
||||
smartPatternMatch("Die Energiewende ist toll!", curtag[1], curdistance, curacro)
|
||||
smartPatternMatch("Die Energiewende ist toll!", curtag[2], curdistance, curacro)
|
||||
smartPatternMatch("Die Energiewende ist toll!", sprintf("%s", curtag), curdistance, curacro)
|
||||
tags_found <- NULL
|
||||
# Match the tweet with each variation of tagexpand
|
||||
for(e in 1:length(curtag)) {
|
||||
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
|
||||
}
|
||||
curtext <- "Die Energiewende ist toll!"
|
||||
tags_found <- NULL
|
||||
# Match the tweet with each variation of tagexpand
|
||||
for(e in 1:length(curtag)) {
|
||||
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
|
||||
}
|
||||
tags_found
|
||||
curtag
|
||||
curtag <- "#WM2014"
|
||||
curtext <- "Ich freu mich auf wm2014 sehr"
|
||||
curchars <- nchar(curtag, type = "chars")
|
||||
# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch
|
||||
curacro <- checkAcronym(string = curtag)
|
||||
# Check if tag is some kind of specific hashtag. If so, do not handle as acronym, but don't expand it either
|
||||
if(str_detect(curtag, "^#")) {
|
||||
curacro <- FALSE
|
||||
curhash <- TRUE
|
||||
curtag <- str_replace(curtag, "#", "")
|
||||
curchars <- curchars - 1
|
||||
} else {
|
||||
curhash <- FALSE
|
||||
}
|
||||
# Now expand the current tag by possible suffixes that may be plural forms
|
||||
# Only do if it isn't an acronym or specific hastag
|
||||
if(!curacro && !curhash) {
|
||||
for(e in 1:length(tagexpand)) {
|
||||
curtag[e] <- str_c(curtag[1], tagexpand[e])
|
||||
}
|
||||
}
|
||||
# Set Levenshtein distance depending on char length, acronym and hashtag status
|
||||
if(curchars <= 4 || curacro || curhash) {
|
||||
curdistance <- 0
|
||||
} else {
|
||||
curdistance <- 1
|
||||
}
|
||||
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance)
|
||||
tags_found <- NULL
|
||||
# Match the tweet with each variation of tagexpand
|
||||
for(e in 1:length(curtag)) {
|
||||
tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro)
|
||||
}
|
||||
tags_found <- any(tags_found)
|
||||
tags_found
|
||||
curtag
|
||||
curtext
|
||||
curdistance
|
||||
test <- VAR(issues[,2:32], p=3, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"))
|
||||
test
|
||||
test <- VAR(issues[,2:32], p=1, type="none")
|
||||
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
|
||||
View(issues)
|
||||
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2])
|
||||
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
|
||||
capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt")
|
||||
irf(test)
|
||||
test <- VAR(issues_s[,2:11], p=1, type="none")
|
||||
irf(test)
|
||||
plot(irf(test))
|
||||
test <- VAR(issues[,2:32], p=1, type="none")
|
||||
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
|
||||
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22]), n.ahead = 5))
|
||||
require(stringr)
|
||||
library(foreach)
|
||||
library(doParallel)
|
||||
source("issuecomp-functions.R")
|
||||
getwd()
|
||||
setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
|
||||
getwd()
|
||||
list.files()
|
||||
list.files("matched-ids/")
|
||||
load(file = "tweets_untagged.RData")
|
||||
issues <- data.frame(date = drange)
|
||||
# Create date range
|
||||
date_start <- as.Date("2014-01-01")
|
||||
date_end <- as.Date("2014-12-31")
|
||||
drange <- as.integer(date_end - date_start)
|
||||
drange <- date_start + days(0:drange)
|
||||
issues <- data.frame(date = drange)
|
||||
issuelist <- readLines("issues.xml")
|
||||
issuelist <- str_replace_all(string = issuelist, pattern = ".*<!-- .+ -->", "")
|
||||
issuelist <- xmlToList(issuelist)
|
||||
issueheads <- names(issuelist)
|
||||
issues[issueheads] <- 0
|
||||
tweets$issue <- ""
|
||||
tweets$tags <- ""
|
||||
View(issues)
|
||||
list.files("matched-ids/")
|
||||
results <- list.files("matched-ids/")
|
||||
results
|
||||
read.csv("matched-ids/i10.trans.csv")
|
||||
read.csv("matched-ids/i10.trans.csv", sep=";")
|
||||
read.csv("matched-ids/i10.trans.csv", sep=";", stringsAsFactors=F)
|
||||
read.csv("matched-ids/i10.trans.csv", sep=";", stringsAsFactors=T)
|
||||
reesult_files <- read.csv("matched-ids/i10.trans.csv", sep=";", stringsAsFactors=F)
|
||||
View(reesult_files)
|
||||
result_files <- read.csv("matched-ids/i10.trans.csv", sep=";", colClasses=c("date", "character", "character", "character"))
|
||||
result_files <- read.csv("matched-ids/i10.trans.csv", sep=";", colClasses=c("character", "character", "character", "character"))
|
||||
rm(reesult_files)
|
||||
View(result_files)
|
||||
nrow(result_files)
|
||||
result_files <- result_files(!duplicated(result_files))
|
||||
result_files <- result_files[!duplicated(result_files)]
|
||||
result_files <- result_files[!duplicated(result_files), ]
|
||||
nrow(result_files)
|
||||
result_files <- read.csv("matched-ids/i10.trans.csv", sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
View(result_files)
|
||||
read.results
|
||||
results
|
||||
setwd("matched-ids/")
|
||||
list.files("")
|
||||
getwd()
|
||||
list.files()
|
||||
results <- list.files()
|
||||
results
|
||||
results_cat <- read.csv(results, sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
results_cat <- read.csv(results[1], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
results_cat
|
||||
View(results_cat)
|
||||
source("issuecomp-functions.R")
|
||||
setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
|
||||
source("issuecomp-functions.R")
|
||||
insertRow
|
||||
results_temp <- read.csv(results[2], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
setwd("matched-ids/")
|
||||
results_temp <- read.csv(results[2], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
rm(result_files)
|
||||
insertRow(existingDF = results_cat, results_temp)
|
||||
rm(results_cat)
|
||||
for(r in 1:length(results)) {
|
||||
if(r == 1) {
|
||||
results_cat <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
} else {
|
||||
results_temp <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
insertRow(results_cat, results_temp)
|
||||
}
|
||||
}
|
||||
for(r in 1:length(results)) {
|
||||
if(r == 1) {
|
||||
results_cat <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
} else {
|
||||
results_temp <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
results_cat insertRow(results_cat, results_temp)
|
||||
}
|
||||
}
|
||||
for(r in 1:length(results)) {
|
||||
if(r == 1) {
|
||||
results_cat <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
} else {
|
||||
results_temp <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
results_cat <- insertRow(results_cat, results_temp)
|
||||
}
|
||||
}
|
||||
View(results_cat)
|
||||
results_cat[20000]
|
||||
results_cat[20000, ]
|
||||
rm(r, results_temp)
|
||||
results_cat <- results_cat[!duplicated(results_cat), ]
|
||||
View(results_cat)
|
||||
rm(results, results_cat)
|
||||
results_files <- list.files()
|
||||
for(r in 1:length(results)) {
|
||||
if(r == 1) {
|
||||
results <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
} else {
|
||||
results_temp <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
results <- insertRow(results_cat, results_temp)
|
||||
}
|
||||
}
|
||||
rm(r, results_temp)
|
||||
results <- results[!duplicated(results), ]
|
||||
results_files <- list.files()
|
||||
for(r in 1:length(results_files)) {
|
||||
if(r == 1) {
|
||||
results <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
} else {
|
||||
results_temp <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
results <- insertRow(results, results_temp)
|
||||
}
|
||||
}
|
||||
rm(r, results_temp)
|
||||
results <- results[!duplicated(results), ]
|
||||
View(results)
|
||||
View(issues)
|
||||
row.names(results) <- NULL
|
||||
View(results)
|
||||
rownames(results)
|
||||
row.names(results)
|
||||
names(results)
|
||||
View(tweets)
|
||||
View(tweets)
|
||||
names(results) <- c("date", "id_str", "issue", "tags")
|
||||
View(results)
|
||||
results_test <- results[order(results$id_str)]
|
||||
results_test <- results[order(results$id_str), ]
|
||||
View(results_test)
|
||||
results_files <- list.files()
|
||||
for(r in 1:length(results_files)) {
|
||||
if(r == 1) {
|
||||
results <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
} else {
|
||||
results_temp <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F)
|
||||
results <- insertRow(results, results_temp)
|
||||
}
|
||||
}
|
||||
rm(r, results_temp)
|
||||
rm(r, results_temp, results_files)
|
||||
results <- results[!duplicated(results), ]
|
||||
names(results)
|
||||
names(results) <- c("date", "id_str", "issue", "tags")
|
||||
View(results)
|
||||
results_test <- results[order(results$id_str), ]
|
||||
row.names(results) <- NULL
|
||||
results <- results[order(results$id_str), ]
|
||||
row.names(results) <- NULL
|
||||
View(results)
|
||||
rm(results_test)
|
||||
View(issues)
|
||||
as.character(results$date[2])
|
||||
class(results$date)
|
||||
class(issues$date)
|
||||
View(issues)
|
||||
as.character(issues$date[2])
|
||||
issues$date[2]
|
||||
issuelist <- readLines("issues.xml")
|
||||
issuelist <- str_replace_all(string = issuelist, pattern = ".*<!-- .+ -->", "")
|
||||
issuelist <- xmlToList(issuelist)
|
||||
issueheads <- names(issuelist)
|
||||
require(lubridate)
|
||||
require(XML)
|
||||
require(ggplot2)
|
||||
require(reshape2)
|
||||
require(stringr)
|
||||
library(foreach)
|
||||
library(doParallel)
|
||||
issuelist <- readLines("issues.xml")
|
||||
issuelist <- str_replace_all(string = issuelist, pattern = ".*<!-- .+ -->", "")
|
||||
issuelist <- xmlToList(issuelist)
|
||||
issueheads <- names(issuelist)
|
||||
setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
|
||||
issuelist <- readLines("issues.xml")
|
||||
issuelist <- str_replace_all(string = issuelist, pattern = ".*<!-- .+ -->", "")
|
||||
issuelist <- xmlToList(issuelist)
|
||||
issueheads <- names(issuelist)
|
||||
issues[issueheads] <- 0
|
||||
curdate <- as.character(results$date[3])
|
||||
curissue <- as.character(results$issue[3])
|
||||
curdate
|
||||
curissue
|
||||
issues[curdate, curissue] <- issues[curdate, curissue] + 1
|
||||
View(issues)
|
||||
issues <- data.frame(date = drange)
|
||||
issues[issueheads] <- 0
|
||||
View(issues)
|
||||
issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
|
||||
View(issues)
|
||||
for(r in 1:nrow(results)) {
|
||||
curdate <- as.character(results$date[r])
|
||||
curissue <- as.character(results$issue[r])
|
||||
issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
|
||||
readYN <- function(question) {
|
||||
n <- readline(prompt=question)
|
||||
n <- as.character(n)
|
||||
return(n)
|
||||
}
|
||||
View(issues)
|
||||
issues[issueheads] <- 0
|
||||
View(issues)
|
||||
for(r in 1:nrow(results)) {
|
||||
curdate <- as.character(results$date[r])
|
||||
curid <- as.character(results$id_str[r])
|
||||
curissue <- as.character(results$issue[r])
|
||||
curtag <- as.character(results$tags[r])
|
||||
# Update issue counter (date and issue)
|
||||
issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
|
||||
# Update tweet dataframe (id, issue and tags)
|
||||
oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
|
||||
tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
|
||||
oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
|
||||
tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
|
||||
checkIssue <- function(string, issuelist) {
|
||||
status <- any(str_detect(string, issuelist))
|
||||
return(status)
|
||||
}
|
||||
checkAllIssues <- function(string, issuelist) {
|
||||
status <- NULL
|
||||
for(i in 1:length(string)) {
|
||||
if(checkIssue(string[i], issuelist)) {
|
||||
status[i] <- TRUE
|
||||
}
|
||||
else {
|
||||
cat("Issue",string[i],"does not exist. Please try again.\n")
|
||||
status[i] <- FALSE
|
||||
}
|
||||
}
|
||||
return(status)
|
||||
}
|
||||
View(tweets)
|
||||
tweets$issue <- ""
|
||||
tweets$tags <- ""
|
||||
View(tweets)
|
||||
issues[issueheads] <- 0
|
||||
for(r in 1:nrow(results)) {
|
||||
curdate <- as.character(results$date[r])
|
||||
curid <- as.character(results$id_str[r])
|
||||
curissue <- as.character(results$issue[r])
|
||||
curtag <- as.character(results$tags[r])
|
||||
cat("Sorting match", r, "from", nrow(results), "\n")
|
||||
# Update issue counter (date and issue)
|
||||
issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1
|
||||
# Update tweet dataframe (id, issue and tags)
|
||||
oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
|
||||
tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",")
|
||||
oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
|
||||
tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",")
|
||||
write.csv(tweets, file="tweets.csv")
|
||||
c_tweets <- read.csv("tweets.csv", colClasses="character")
|
||||
View(c_tweets)
|
||||
c_tweets$X <- NULL
|
||||
c_issues <- data.frame(date = drange)
|
||||
c_issuelist <- xmlToList("issues.xml")
|
||||
c_issueheads <- names(issuelist)
|
||||
c_issues[issueheads] <- 0
|
||||
source("issuecomp-codingsample-function.R")
|
||||
rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
View(c_errors)
|
||||
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
|
||||
View(c_errors)
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
View(issues)
|
||||
View(tweets)
|
||||
View(tweets)
|
||||
save(tweets, file="tweets_tagged.RData")
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
tagexpand
|
||||
source("issuecomp-codingsample-function.R")
|
||||
source("issuecomp-codingsample-function.R")
|
||||
source("issuecomp-codingsample-function.R")
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
|
||||
View(c_tmp)
|
||||
View(c_errors)
|
||||
View(c_tmp)
|
||||
names(c_tmp) <- c("str_id", "all", "wrong", "tags", "text")
|
||||
View(c_tmp)
|
||||
c_tmp[, c("wrong", "tagged", "all", "text")]
|
||||
View(c_tmp)
|
||||
names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
|
||||
c_tmp[, c("wrong", "tagged", "all", "text")]
|
||||
c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]
|
||||
View(c_error1)
|
||||
c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
|
||||
View(c_tmp)
|
||||
c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
|
||||
names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
|
||||
c_error1 <- c_tmp[, c("missing", "tagged", "all", "text")]
|
||||
c_error2 <- c_tmp[, c("missing", "tagged", "all", "text")]
|
||||
View(c_error2)
|
||||
c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]
|
||||
View(c_error2)
|
||||
View(c_error1)
|
||||
View(c_error2)
|
||||
c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character")
|
||||
View(c_tmp)
|
||||
names(c_tmp) <- c("str_id", "status", "issue", "tags", "text")
|
||||
View(c_tmp)
|
||||
c_currect <- c_tmp
|
||||
c_correct <- c_tmp
|
||||
rm(c_currect)
|
||||
View(c_correct)
|
||||
source("issuecomp-codingsample-function.R")
|
||||
rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character", quote = "")
|
||||
View(c_errors)
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
test <- "Zitat "total dämlich!""
|
||||
tweets$id_str == "523512815425175552"
|
||||
tweets[tweets$id_str == "523512815425175552"]
|
||||
tweets[tweets$id_str == "523512815425175552", ]
|
||||
tweets[tweets$id_str == "523512815425175552", "text"]
|
||||
test <- tweets[tweets$id_str == "523512815425175552", "text"]
|
||||
test
|
||||
test <- c_tweets[ctweets$id_str == "523512815425175552", "text"]
|
||||
test <- c_tweets[c_tweets$id_str == "523512815425175552", "text"]
|
||||
test
|
||||
str_replace(test, "\\"", ")
|
||||
str_replace(test, "\\"", "")
|
||||
str_replace(test, "\"", "")
|
||||
str_detect(test, "\"")
|
||||
test <- as.character(c_tweets[c_tweets$id_str == "523512815425175552", "text"])
|
||||
test
|
||||
c_tweets <- read.csv("tweets.csv", colClasses="character")
|
||||
for(r in 1:nrow(c_tweets)) {
|
||||
curtext <- as.character(c_tweets$text[r])
|
||||
if(str_detect(curtext, "\"") {
|
||||
c_tweets$text[r] <- str_replace(curtext, "\"", "")
|
||||
}
|
||||
}
|
||||
for(r in 1:nrow(c_tweets)) {
|
||||
curtext <- as.character(c_tweets$text[r])
|
||||
if(str_detect(curtext, "\"") {
|
||||
c_tweets$text[r] <- str_replace(curtext, "\"", "")
|
||||
} else {}
|
||||
}
|
||||
for(r in 1:nrow(c_tweets)) {
|
||||
curtext <- as.character(c_tweets$text[r])
|
||||
if(str_detect(curtext, "\"") {
|
||||
c_tweets$text[r] <- str_replace(curtext, "\"", "")
|
||||
} else {
|
||||
}
|
||||
}
|
||||
for(r in 1:nrow(c_tweets)) {
|
||||
curtext <- as.character(c_tweets$text[r])
|
||||
if(str_detect(curtext, "\"")) {
|
||||
c_tweets$text[r] <- str_replace(curtext, "\"", "")
|
||||
}
|
||||
}
|
||||
test <- as.character(c_tweets[c_tweets$id_str == "523512815425175552", "text"])
|
||||
test
|
||||
View(c_tweets)
|
||||
c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
|
||||
View(c_errors)
|
||||
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")
|
||||
View(c_errors)
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
issueheads
|
||||
for(r in 1:nrow(c_errors)) {
|
||||
c_errcode <- as.character(c_errors$code[r])
|
||||
c_errissue <- as.character(c_errors$issue[r])
|
||||
c_errtags <- as.character(c_errors$tags[r])
|
||||
c_errtext <- as.character(c_errors$text[r])
|
||||
c_errid <- as.character(c_errors$str_id[r])
|
||||
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
|
||||
source("issuecomp-codingsample-function2.R")
|
||||
}
|
||||
# All tweets with WRONG ISSUES
|
||||
c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
|
||||
names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
|
||||
c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]
|
||||
# All tweets with MISSING ISSUES
|
||||
c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
|
||||
names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
|
||||
c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]
|
||||
# All CORRECT tweets
|
||||
c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character")
|
||||
names(c_tmp) <- c("str_id", "status", "issue", "tags", "text")
|
||||
c_correct <- c_tmp
|
||||
View(c_error1)
|
||||
View(c_error2)
|
||||
View(c_error1)
|
||||
View(c_correct)
|
||||
test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3])
|
||||
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
|
||||
test <- VAR(issues[,2:32], p=1, type="none")
|
||||
plot(irf(test, impulse = names(issues_s[2:11]), response = names(issues_i[2:22])))
|
||||
VARselect(issues[,2:32], lag.max=8, type="none")
|
||||
VARselect(issues[,2:32], lag.max=8, type="both")
|
||||
VARselect(issues[,2:32], lag.max=30, type="both")
|
||||
VARselect(issues[,2:32], lag.max=15, type="both")
|
||||
|
||||
Reference in New Issue
Block a user