Corrent status

This commit is contained in:
2015-03-04 11:36:31 +01:00
parent a388110551
commit 9bbf1b4f56
16 changed files with 2713 additions and 494 deletions
+22 -1
View File
@@ -34,6 +34,13 @@ checkAllIssues <- function(string, issuelist) {
# Read CSV of all tweets (with tags, if available)
c_tweets <- read.csv("tweets.csv", colClasses="character")
# Replace quotes because it may cause problems when saving and reading as CSV files
for(r in 1:nrow(c_tweets)) {
curtext <- as.character(c_tweets$text[r])
if(str_detect(curtext, "\"")) {
c_tweets$text[r] <- str_replace(curtext, "\"", "")
}
}
c_tweets$X <- NULL
# Read all issues from XML file
@@ -66,10 +73,24 @@ for(r in 1:nrow(c_errors)) {
c_errtext <- as.character(c_errors$text[r])
c_errid <- as.character(c_errors$str_id[r])
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="")
cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
source("issuecomp-codingsample-function2.R")
}
# Now import the error files in a human readable data frame to improve the issue database
# All tweets with WRONG ISSUES
c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]
# All tweets with MISSING ISSUES
c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]
# All CORRECT tweets
c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character")
names(c_tmp) <- c("str_id", "status", "issue", "tags", "text")
c_correct <- c_tmp