improve parallel

2015-02-22 15:10:54 +01:00
parent b1a6a548f0
commit c09fd13976
4 changed files with 359 additions and 387 deletions
@@ -33,15 +33,14 @@ issues[issueheads] <- 0
 tweets$issue <- ""
 tweets$tags <- ""

-tagexpand <- c("", "s", "n", "en", "er")
+tagexpand <- c("", "s", "n", "en", "er", "e")

 # Parallelisation
 writeLines(c(""), "issuecomp-analysis.log")  
-cl<-makeCluster(3)
+cl<-makeCluster(4)
 registerDoParallel(cl)

-df<-foreach(d = 1:3, .packages = c("stringr"), .combine=rbind) %dopar% {
-#df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% {
+df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% {
 #for(d in 1:nrow(issues)) {
  # Go through every day
  curdate <- issues$date[d]
@@ -50,8 +49,7 @@ df<-foreach(d = 1:3, .packages = c("stringr"), .combine=rbind) %dopar% {
  # Put all tweets from specific day in a temporary DF
  tweets_curday <- tweets[tweets[, "created_at"] == curdate, ]
  
-  for(t in 1:25){
-  #for(t in 1:nrow(tweets_curday)){
+  for(t in 1:nrow(tweets_curday)){
    cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE)
    # Select tweet's text, make it lowercase and remove hashtag indicators (#)
    curtext <- as.character(tweets_curday$text[t])
@@ -101,25 +99,25 @@ df<-foreach(d = 1:3, .packages = c("stringr"), .combine=rbind) %dopar% {
        curtag <- curtag[1]
        
        if(tags_found == TRUE) {
-          # Raise number of findings on this day for this issue by 1
-          issues[d,curissue] <- issues[d,curissue] + 1
-          
-          # Add issue and first matched tag of tweet to tweets-DF
-          oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
-          tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";")
-          oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
-          tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";")
+#           # Raise number of findings on this day for this issue by 1
+#           issues[d,curissue] <- issues[d,curissue] + 1
+#           
+#           # Add issue and first matched tag of tweet to tweets-DF
+#           oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
+#           tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";")
+#           oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
+#           tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";")
          
          # Add information to file for function viewPatternMatching
-          write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE)
+          write(str_c(curdate,";\"",curid,"\";",curissue,";",curtag), curfile, append = TRUE)
          cat(paste("Match!\n"), file="issuecomp-analysis.log", append=TRUE)
-          break
+#           data.frame(date=curdate, issue=curissue)
+          break   # next issue, no more tags from same issue
        }
        else {
          #cat("Nothing found\n")
        }
      } # /for curtags
-      
    } # /for issuelist
  } # /for tweets_curday
 } # /for drange