improve parallel

This commit is contained in:
2015-02-22 15:10:54 +01:00
parent b1a6a548f0
commit c09fd13976
4 changed files with 359 additions and 387 deletions
+15 -17
View File
@@ -33,15 +33,14 @@ issues[issueheads] <- 0
tweets$issue <- ""
tweets$tags <- ""
tagexpand <- c("", "s", "n", "en", "er")
tagexpand <- c("", "s", "n", "en", "er", "e")
# Parallelisation
writeLines(c(""), "issuecomp-analysis.log")
cl<-makeCluster(3)
cl<-makeCluster(4)
registerDoParallel(cl)
df<-foreach(d = 1:3, .packages = c("stringr"), .combine=rbind) %dopar% {
#df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% {
df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% {
#for(d in 1:nrow(issues)) {
# Go through every day
curdate <- issues$date[d]
@@ -50,8 +49,7 @@ df<-foreach(d = 1:3, .packages = c("stringr"), .combine=rbind) %dopar% {
# Put all tweets from specific day in a temporary DF
tweets_curday <- tweets[tweets[, "created_at"] == curdate, ]
for(t in 1:25){
#for(t in 1:nrow(tweets_curday)){
for(t in 1:nrow(tweets_curday)){
cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE)
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
curtext <- as.character(tweets_curday$text[t])
@@ -101,25 +99,25 @@ df<-foreach(d = 1:3, .packages = c("stringr"), .combine=rbind) %dopar% {
curtag <- curtag[1]
if(tags_found == TRUE) {
# Raise number of findings on this day for this issue by 1
issues[d,curissue] <- issues[d,curissue] + 1
# Add issue and first matched tag of tweet to tweets-DF
oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";")
oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";")
# # Raise number of findings on this day for this issue by 1
# issues[d,curissue] <- issues[d,curissue] + 1
#
# # Add issue and first matched tag of tweet to tweets-DF
# oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
# tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";")
# oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
# tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";")
# Add information to file for function viewPatternMatching
write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE)
write(str_c(curdate,";\"",curid,"\";",curissue,";",curtag), curfile, append = TRUE)
cat(paste("Match!\n"), file="issuecomp-analysis.log", append=TRUE)
break
# data.frame(date=curdate, issue=curissue)
break # next issue, no more tags from same issue
}
else {
#cat("Nothing found\n")
}
} # /for curtags
} # /for issuelist
} # /for tweets_curday
} # /for drange