require(stringr) require(XML) # FUNCTIONS --------------------------------------------------------------- readYN <- function(question) { n <- readline(prompt=question) n <- as.character(n) return(n) } checkIssue <- function(string, issuelist) { status <- any(str_detect(string, issuelist)) return(status) } checkAllIssues <- function(string, issuelist) { status <- NULL for(i in 1:length(string)) { if(checkIssue(string[i], issuelist)) { status[i] <- TRUE } else { cat("Issue",string[i],"does not exist. Please try again.\n") status[i] <- FALSE } } return(status) } # SAMPLE OUT/INPUT -------------------------------------------------------- # Read CSV of all tweets (with tags, if available) c_tweets <- read.csv("tweets.csv", colClasses="character") c_tweets$X <- NULL # Read all issues from XML file c_issues <- data.frame(date = drange) c_issuelist <- xmlToList("issues.xml") c_issueheads <- names(issuelist) c_issues[issueheads] <- 0 # Run through as many tweets as wished to mark them as correct or incorrect source("issuecomp-codingsample-function.R") rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn) # Now go through tweets/tags marked as false # Exit codes: # 0 = Correct tagging # 1 = At least one tag was incorrect # 2 = At least one tag was missing # 3 = Both 1 and 2 c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") names(c_errors) <- c("str_id", "code", "issue", "tags", "text") for(r in 1:nrow(c_errors)) { c_errcode <- as.character(c_errors$code[r]) c_errissue <- as.character(c_errors$issue[r]) c_errtags <- as.character(c_errors$tags[r]) c_errtext <- as.character(c_errors$text[r]) c_errid <- as.character(c_errors$str_id[r]) cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errtags, "\n", sep="") source("issuecomp-codingsample-function2.R") }