require(lubridate) require(XML) require(ggplot2) require(reshape2) require(stringr) library(foreach) library(doParallel) source("issuecomp-functions.R") getwd() setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp") getwd() list.files() list.files("matched-ids/") load(file = "tweets_untagged.RData") issues <- data.frame(date = drange) # Create date range date_start <- as.Date("2014-01-01") date_end <- as.Date("2014-12-31") drange <- as.integer(date_end - date_start) drange <- date_start + days(0:drange) issues <- data.frame(date = drange) issuelist <- readLines("issues.xml") issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") issuelist <- xmlToList(issuelist) issueheads <- names(issuelist) issues[issueheads] <- 0 tweets$issue <- "" tweets$tags <- "" View(issues) list.files("matched-ids/") results <- list.files("matched-ids/") results read.csv("matched-ids/i10.trans.csv") read.csv("matched-ids/i10.trans.csv", sep=";") read.csv("matched-ids/i10.trans.csv", sep=";", stringsAsFactors=F) read.csv("matched-ids/i10.trans.csv", sep=";", stringsAsFactors=T) reesult_files <- read.csv("matched-ids/i10.trans.csv", sep=";", stringsAsFactors=F) View(reesult_files) result_files <- read.csv("matched-ids/i10.trans.csv", sep=";", colClasses=c("date", "character", "character", "character")) result_files <- read.csv("matched-ids/i10.trans.csv", sep=";", colClasses=c("character", "character", "character", "character")) rm(reesult_files) View(result_files) nrow(result_files) result_files <- result_files(!duplicated(result_files)) result_files <- result_files[!duplicated(result_files)] result_files <- result_files[!duplicated(result_files), ] nrow(result_files) result_files <- read.csv("matched-ids/i10.trans.csv", sep=";", colClasses=c("character", "character", "character", "character"), header=F) View(result_files) read.results results setwd("matched-ids/") list.files("") getwd() list.files() results <- list.files() results results_cat <- read.csv(results, sep=";", colClasses=c("character", "character", "character", "character"), header=F) results_cat <- read.csv(results[1], sep=";", colClasses=c("character", "character", "character", "character"), header=F) results_cat View(results_cat) source("issuecomp-functions.R") setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp") source("issuecomp-functions.R") insertRow results_temp <- read.csv(results[2], sep=";", colClasses=c("character", "character", "character", "character"), header=F) setwd("matched-ids/") results_temp <- read.csv(results[2], sep=";", colClasses=c("character", "character", "character", "character"), header=F) rm(result_files) insertRow(existingDF = results_cat, results_temp) rm(results_cat) for(r in 1:length(results)) { if(r == 1) { results_cat <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) } else { results_temp <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) insertRow(results_cat, results_temp) } } for(r in 1:length(results)) { if(r == 1) { results_cat <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) } else { results_temp <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) results_cat insertRow(results_cat, results_temp) } } for(r in 1:length(results)) { if(r == 1) { results_cat <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) } else { results_temp <- read.csv(results[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) results_cat <- insertRow(results_cat, results_temp) } } View(results_cat) results_cat[20000] results_cat[20000, ] rm(r, results_temp) results_cat <- results_cat[!duplicated(results_cat), ] View(results_cat) rm(results, results_cat) results_files <- list.files() for(r in 1:length(results)) { if(r == 1) { results <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) } else { results_temp <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) results <- insertRow(results_cat, results_temp) } } rm(r, results_temp) results <- results[!duplicated(results), ] results_files <- list.files() for(r in 1:length(results_files)) { if(r == 1) { results <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) } else { results_temp <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) results <- insertRow(results, results_temp) } } rm(r, results_temp) results <- results[!duplicated(results), ] View(results) View(issues) row.names(results) <- NULL View(results) rownames(results) row.names(results) names(results) View(tweets) View(tweets) names(results) <- c("date", "id_str", "issue", "tags") View(results) results_test <- results[order(results$id_str)] results_test <- results[order(results$id_str), ] View(results_test) results_files <- list.files() for(r in 1:length(results_files)) { if(r == 1) { results <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) } else { results_temp <- read.csv(results_files[r], sep=";", colClasses=c("character", "character", "character", "character"), header=F) results <- insertRow(results, results_temp) } } rm(r, results_temp) rm(r, results_temp, results_files) results <- results[!duplicated(results), ] names(results) names(results) <- c("date", "id_str", "issue", "tags") View(results) results_test <- results[order(results$id_str), ] row.names(results) <- NULL results <- results[order(results$id_str), ] row.names(results) <- NULL View(results) rm(results_test) View(issues) as.character(results$date[2]) class(results$date) class(issues$date) View(issues) as.character(issues$date[2]) issues$date[2] issuelist <- readLines("issues.xml") issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") issuelist <- xmlToList(issuelist) issueheads <- names(issuelist) require(lubridate) require(XML) require(ggplot2) require(reshape2) require(stringr) library(foreach) library(doParallel) issuelist <- readLines("issues.xml") issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") issuelist <- xmlToList(issuelist) issueheads <- names(issuelist) setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp") issuelist <- readLines("issues.xml") issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") issuelist <- xmlToList(issuelist) issueheads <- names(issuelist) issues[issueheads] <- 0 curdate <- as.character(results$date[3]) curissue <- as.character(results$issue[3]) curdate curissue issues[curdate, curissue] <- issues[curdate, curissue] + 1 View(issues) issues <- data.frame(date = drange) issues[issueheads] <- 0 View(issues) issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1 View(issues) for(r in 1:nrow(results)) { curdate <- as.character(results$date[r]) curissue <- as.character(results$issue[r]) issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1 } View(issues) issues[issueheads] <- 0 View(issues) for(r in 1:nrow(results)) { curdate <- as.character(results$date[r]) curid <- as.character(results$id_str[r]) curissue <- as.character(results$issue[r]) curtag <- as.character(results$tags[r]) # Update issue counter (date and issue) issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1 # Update tweet dataframe (id, issue and tags) oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",") oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",") } View(tweets) tweets$issue <- "" tweets$tags <- "" View(tweets) issues[issueheads] <- 0 for(r in 1:nrow(results)) { curdate <- as.character(results$date[r]) curid <- as.character(results$id_str[r]) curissue <- as.character(results$issue[r]) curtag <- as.character(results$tags[r]) cat("Sorting match", r, "from", nrow(results), "\n") # Update issue counter (date and issue) issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1 # Update tweet dataframe (id, issue and tags) oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",") oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",") } View(issues) View(tweets) View(tweets) save(tweets, file="tweets_tagged.RData")