parallel works, now output in data frame

This commit is contained in:
2015-02-22 03:05:56 +01:00
parent b3658e282a
commit a121c1baf1
3 changed files with 802 additions and 365 deletions
+7 -6
View File
@@ -3,6 +3,8 @@ require(XML)
require(ggplot2)
require(reshape2)
require(stringr)
library(foreach)
library(doParallel)
source("issuecomp-functions.R")
@@ -34,22 +36,21 @@ tweets$tags <- ""
tagexpand <- c("", "s", "n", "en", "er")
# Parallelisation
writeLines(c(""), "log.txt")
writeLines(c(""), "issuecomp-analysis.log")
cl<-makeCluster(3)
registerDoParallel(cl)
df<-foreach(d = 1:nrow(issues) %dopar% {
df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% {
#for(d in 1:nrow(issues)) {
# Go through every day
curdate <- issues$date[d]
sink("log.txt", append=TRUE)
cat(as.character(curdate),"\n")
cat(paste(as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE)
# Put all tweets from specific day in a temporary DF
tweets_curday <- tweets[tweets[, "created_at"] == curdate, ]
for(t in 1:nrow(tweets_curday)){
cat("Starting tweet", t, "of",as.character(curdate),"\n")
cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE)
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
curtext <- as.character(tweets_curday$text[t])
curtext <- str_replace_all(curtext, "#", "")
@@ -122,7 +123,7 @@ df<-foreach(d = 1:nrow(issues) %dopar% {
} # /for drange
#rm(tweets_curday,curacro, curchars, curdate,curfile,curid,curissue,curtag,curtags,curtext,d,date_end,date_start,i,id_folder,oldissue,oldtag,s,t,tags_found)
stopCluster(cl)
# SAVING ------------------------------------------------------------------