uni-ba-socialagenda/issuecomp-codingsample.R

require(stringr)
require(XML)

# FUNCTIONS ---------------------------------------------------------------

readYN <- function(question) { 
  n <- readline(prompt=question)
  n <- as.character(n)
  return(n)
}

checkIssue <- function(string, issuelist) {
  status <- any(str_detect(string, issuelist))
  return(status)
}

checkAllIssues <- function(string, issuelist) {
  status <- NULL
  for(i in 1:length(string)) {
    if(checkIssue(string[i], issuelist)) {
      status[i] <- TRUE
    } 
    else {
      cat("Issue",string[i],"does not exist. Please try again.\n")
      status[i] <- FALSE
    }
  }
  return(status)
}


# SAMPLE OUT/INPUT --------------------------------------------------------


# Read CSV of all tweets (with tags, if available)
c_tweets <- read.csv("tweets.csv", colClasses="character")
# Replace quotes because it may cause problems when saving and reading as CSV files
for(r in 1:nrow(c_tweets)) {
  curtext <- as.character(c_tweets$text[r])
  if(str_detect(curtext, "\"")) {
    c_tweets$text[r] <- str_replace(curtext, "\"", "")
  }
}
c_tweets$X <- NULL

# Read all issues from XML file
c_issues <- data.frame(date = drange)
c_issuelist <- xmlToList("issues-v2.xml")
c_issueheads <- names(issuelist)
c_issues[issueheads] <- 0


# Run through as many tweets as wished to mark them as correct or incorrect
source("issuecomp-codingsample-function.R")
rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)


# Now go through tweets/tags marked as false

# Exit codes:
# 0 = Correct tagging
# 1 = At least one tag was incorrect
# 2 = At least one tag was missing
# 3 = Both 1 and 2

c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")
names(c_errors) <- c("str_id", "code", "issue", "tags", "text")

for(r in 1:nrow(c_errors)) {
  c_errcode <- as.character(c_errors$code[r])
  c_errissue <- as.character(c_errors$issue[r])
  c_errtags <- as.character(c_errors$tags[r])
  c_errtext <- as.character(c_errors$text[r])
  c_errid <- as.character(c_errors$str_id[r])
  
  cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")
  source("issuecomp-codingsample-function2.R")
}


# Now import the error files in a human readable data frame to improve the issue database

# All tweets with WRONG ISSUES
c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")
names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")
c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]

# All tweets with MISSING ISSUES
c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")
names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")
c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]

# All CORRECT tweets
c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character")
names(c_tmp) <- c("str_id", "status", "issue", "tags", "text")
c_correct <- c_tmp
added coding sample tests 2015-01-15 20:24:40 +01:00			`require(stringr)`
			`require(XML)`

			`# FUNCTIONS ---------------------------------------------------------------`

			`readYN <- function(question) {`
			`n <- readline(prompt=question)`
			`n <- as.character(n)`
			`return(n)`
			`}`

			`checkIssue <- function(string, issuelist) {`
			`status <- any(str_detect(string, issuelist))`
			`return(status)`
			`}`

			`checkAllIssues <- function(string, issuelist) {`
			`status <- NULL`
			`for(i in 1:length(string)) {`
			`if(checkIssue(string[i], issuelist)) {`
			`status[i] <- TRUE`
			`}`
			`else {`
			`cat("Issue",string[i],"does not exist. Please try again.\n")`
			`status[i] <- FALSE`
			`}`
			`}`
			`return(status)`
			`}`


			`# SAMPLE OUT/INPUT --------------------------------------------------------`


			`# Read CSV of all tweets (with tags, if available)`
			`c_tweets <- read.csv("tweets.csv", colClasses="character")`
Corrent status 2015-03-04 11:36:31 +01:00			`# Replace quotes because it may cause problems when saving and reading as CSV files`
			`for(r in 1:nrow(c_tweets)) {`
			`curtext <- as.character(c_tweets$text[r])`
			`if(str_detect(curtext, "\"")) {`
			`c_tweets$text[r] <- str_replace(curtext, "\"", "")`
			`}`
			`}`
added coding sample tests 2015-01-15 20:24:40 +01:00			`c_tweets$X <- NULL`

			`# Read all issues from XML file`
			`c_issues <- data.frame(date = drange)`
some small changes 2015-03-13 15:11:49 +01:00			`c_issuelist <- xmlToList("issues-v2.xml")`
added coding sample tests 2015-01-15 20:24:40 +01:00			`c_issueheads <- names(issuelist)`
			`c_issues[issueheads] <- 0`


			`# Run through as many tweets as wished to mark them as correct or incorrect`
			`source("issuecomp-codingsample-function.R")`
			`rm(c_err, c_result, c_samid, c_samno,c_samtags,c_samissue,c_samtext,c_yn)`


			`# Now go through tweets/tags marked as false`

			`# Exit codes:`
			`# 0 = Correct tagging`
			`# 1 = At least one tag was incorrect`
			`# 2 = At least one tag was missing`
			`# 3 = Both 1 and 2`

			`c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character")`
			`names(c_errors) <- c("str_id", "code", "issue", "tags", "text")`

			`for(r in 1:nrow(c_errors)) {`
			`c_errcode <- as.character(c_errors$code[r])`
			`c_errissue <- as.character(c_errors$issue[r])`
			`c_errtags <- as.character(c_errors$tags[r])`
			`c_errtext <- as.character(c_errors$text[r])`
			`c_errid <- as.character(c_errors$str_id[r])`

Corrent status 2015-03-04 11:36:31 +01:00			`cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="")`
added coding sample tests 2015-01-15 20:24:40 +01:00			`source("issuecomp-codingsample-function2.R")`
			`}`


Corrent status 2015-03-04 11:36:31 +01:00			`# Now import the error files in a human readable data frame to improve the issue database`

			`# All tweets with WRONG ISSUES`
			`c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character")`
			`names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text")`
			`c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")]`
added coding sample tests 2015-01-15 20:24:40 +01:00
Corrent status 2015-03-04 11:36:31 +01:00			`# All tweets with MISSING ISSUES`
			`c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character")`
			`names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text")`
			`c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")]`
added coding sample tests 2015-01-15 20:24:40 +01:00
Corrent status 2015-03-04 11:36:31 +01:00			`# All CORRECT tweets`
			`c_tmp <- read.csv("issuecomp-codingsample-correct.csv", header = F, colClasses="character")`
			`names(c_tmp) <- c("str_id", "status", "issue", "tags", "text")`
			`c_correct <- c_tmp`