From b1a6a548f04987b367408b2c328296a3a4609695 Mon Sep 17 00:00:00 2001 From: mxmehl Date: Sun, 22 Feb 2015 03:18:51 +0100 Subject: [PATCH] output still has to be directed to correct data frame --- .Rhistory | 924 ++++++++++++++++++++--------------------- issuecomp-analysis.R | 8 +- issuecomp-analysis.log | 435 +++---------------- 3 files changed, 518 insertions(+), 849 deletions(-) diff --git a/.Rhistory b/.Rhistory index 48415fe..d55074d 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,465 +1,3 @@ -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -sink("log.txt", append=TRUE) -as.character(drange[i]) -w <- sample(1:2, 1) -Sys.sleep(w) -} -stopCluster(cl) -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -sink("log.txt", append=TRUE) -as.character(drange[i]) -} -print(Sys.time()-strt) -stopCluster(cl) -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -sink("log.txt", append=TRUE) -cat(as.character(drange[i])) -} -print(Sys.time()-strt) -stopCluster(cl) -writeLines(c(""), "log.txt") -cat(as.character(drange[i])) -writeLines(c(""), "log.txt") -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -sink("log.txt", append=TRUE) -cat(as.character(drange[i]),"\n") -} -print(Sys.time()-strt) -stopCluster(cl) -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -sink("log.txt", append=TRUE) -cat(as.character(drange[i]),"\n") -w <- sample(1:3, 1) -Sys.sleep(w) -as.character(drange[i]) -} -print(Sys.time()-strt) -stopCluster(cl) -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -sink("log.txt", append=TRUE) -cat(as.character(drange[i]),"\n") -# w <- sample(1:3, 1) -# Sys.sleep(w) -as.character(drange[i]) -} -print(Sys.time()-strt) -stopCluster(cl) -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -sink("log.txt", append=TRUE) -cat(as.character(drange[i]),"\n") -# w <- sample(1:3, 1) -# Sys.sleep(w) -as.character(drange[i]) -} -print(Sys.time()-strt) -stopCluster(cl) -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -cat(paste("\n","Starting iteration",i,"\n"), file="log.txt", append=TRUE) -as.character(drange[i]) -} -print(Sys.time()-strt) -stopCluster(cl) -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -w <- sample(1:3, 1) -Sys.sleep(w) -cat(paste("\n","Starting iteration",i,"\n"), file="log.txt", append=TRUE) -as.character(drange[i]) -} -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -w <- sample(1:10, 1) -Sys.sleep(w) -cat(paste("\n","Starting iteration",i,"\n"), file="log.txt", append=TRUE) -as.character(drange[i]) -} -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -w <- sample(1:10, 1) -#Sys.sleep(w) -cat(paste("\n","Starting iteration",i,"\n"), file="log.txt", append=TRUE) -as.character(drange[i]) -} -print(Sys.time()-strt) -stopCluster(cl) -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -ls<-foreach(i = 1:length(drange)) %dopar% { -w <- sample(1:10, 1) -#Sys.sleep(w) -cat(paste("\n","Starting iteration",i,"\n"), file="log.txt", append=TRUE) -as.character(drange[i]) -} -print(Sys.time()-strt) -stopCluster(cl) -View(data) -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -data<-foreach(i = 1:length(drange)) %dopar% { -w <- sample(1:10, 1) -#Sys.sleep(w) -cat(paste("\n","Starting iteration",i,"\n"), file="log.txt", append=TRUE) -as.character(drange[i]) -} -print(Sys.time()-strt) -stopCluster(cl) -rm(ls) -data -#import packages -library(foreach) -library(doParallel) -#setup parallel backend to use 8 processors -cl<-makeCluster(3) -registerDoParallel(cl) -#start time -strt<-Sys.time() -writeLines(c(""), "log.txt") -#loop -df<-foreach(i = 1:length(drange)) %dopar% { -w <- sample(1:10, 1) -#Sys.sleep(w) -cat(paste("\n","Starting iteration",i,"\n"), file="log.txt", append=TRUE) -as.character(drange[i]) -} -print(Sys.time()-strt) -stopCluster(cl) -df -view(df) -View(df) -# Parallelisation -writeLines(c(""), "log.txt") -cl<-makeCluster(3) -registerDoParallel(cl) -# MATCH TWEETS ------------------------------------------------------------ -id_folder <- "matched-ids" -unlink(id_folder, recursive = TRUE) -dir.create(id_folder) -issues <- data.frame(date = drange) -issuelist <- readLines("issues.xml") -issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") -issuelist <- xmlToList(issuelist) -issueheads <- names(issuelist) -issues[issueheads] <- 0 -tweets$issue <- "" -tweets$tags <- "" -tagexpand <- c("", "s", "n", "en", "er") -# Parallelisation -writeLines(c(""), "issuecomp-analysis.log") -cl<-makeCluster(3) -registerDoParallel(cl) -df<-foreach(d = 1:nrow(issues) %dopar% { -#for(d in 1:nrow(issues)) { -# Go through every day -curdate <- issues$date[d] -sink("log.txt", append=TRUE) -cat(as.character(curdate),"\n") -# Put all tweets from specific day in a temporary DF -tweets_curday <- tweets[tweets[, "created_at"] == curdate, ] -for(t in 1:nrow(tweets_curday)){ -cat("Starting tweet", t, "of",as.character(curdate),"\n") -# Select tweet's text, make it lowercase and remove hashtag indicators (#) -curtext <- as.character(tweets_curday$text[t]) -curtext <- str_replace_all(curtext, "#", "") -curid <- as.character(tweets_curday$id_str[t]) -# Now test each single issue (not tag!) -for(i in 1:length(issueheads)) { -curissue <- issueheads[i] -curtags <- as.character(issuelist[[curissue]]) -curfile <- str_c(id_folder,"/",curissue,".csv") -# Now test all tags of a single issue -for(s in 1:length(curtags)) { -curtag <- curtags[s] -curchars <- nchar(curtag, type = "chars") -# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch -if(curchars <= 4) { -curacro <- checkAcronym(string = curtag, chars = curchars) -} else { -curacro <- FALSE -} -# Now expand the current tag by possible suffixes that may be plural forms -if(!curacro) { -for(e in 1:length(tagexpand)) { -curtag[e] <- str_c(curtag[1], tagexpand[e]) -} -} -# Set Levenshtein distance depending on char length -if(curchars <= 4) { -curdistance <- 0 -} else { -curdistance <- 1 -} -# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance) -tags_found <- NULL -# Match the tweet with each variation of tagexpand -for(e in 1:length(curtag)) { -tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro) -} -tags_found <- any(tags_found) -curtag <- curtag[1] -if(tags_found == TRUE) { -# Raise number of findings on this day for this issue by 1 -issues[d,curissue] <- issues[d,curissue] + 1 -# Add issue and first matched tag of tweet to tweets-DF -oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] -tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";") -oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] -tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";") -# Add information to file for function viewPatternMatching -write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE) -cat("Match!\n") -break -} -else { -#cat("Nothing found\n") -} -} # /for curtags -} # /for issuelist -} # /for tweets_curday -} # /for drange -#rm(tweets_curday,curacro, curchars, curdate,curfile,curid,curissue,curtag,curtags,curtext,d,date_end,date_start,i,id_folder,oldissue,oldtag,s,t,tags_found) -# MATCH TWEETS ------------------------------------------------------------ -id_folder <- "matched-ids" -unlink(id_folder, recursive = TRUE) -dir.create(id_folder) -issues <- data.frame(date = drange) -issuelist <- readLines("issues.xml") -issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") -issuelist <- xmlToList(issuelist) -issueheads <- names(issuelist) -issues[issueheads] <- 0 -tweets$issue <- "" -tweets$tags <- "" -tagexpand <- c("", "s", "n", "en", "er") -# Parallelisation -writeLines(c(""), "issuecomp-analysis.log") -cl<-makeCluster(3) -registerDoParallel(cl) -df<-foreach(d = 1:nrow(issues)) %dopar% { -#for(d in 1:nrow(issues)) { -# Go through every day -curdate <- issues$date[d] -sink("log.txt", append=TRUE) -cat(as.character(curdate),"\n") -# Put all tweets from specific day in a temporary DF -tweets_curday <- tweets[tweets[, "created_at"] == curdate, ] -for(t in 1:nrow(tweets_curday)){ -cat("Starting tweet", t, "of",as.character(curdate),"\n") -# Select tweet's text, make it lowercase and remove hashtag indicators (#) -curtext <- as.character(tweets_curday$text[t]) -curtext <- str_replace_all(curtext, "#", "") -curid <- as.character(tweets_curday$id_str[t]) -# Now test each single issue (not tag!) -for(i in 1:length(issueheads)) { -curissue <- issueheads[i] -curtags <- as.character(issuelist[[curissue]]) -curfile <- str_c(id_folder,"/",curissue,".csv") -# Now test all tags of a single issue -for(s in 1:length(curtags)) { -curtag <- curtags[s] -curchars <- nchar(curtag, type = "chars") -# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch -if(curchars <= 4) { -curacro <- checkAcronym(string = curtag, chars = curchars) -} else { -curacro <- FALSE -} -# Now expand the current tag by possible suffixes that may be plural forms -if(!curacro) { -for(e in 1:length(tagexpand)) { -curtag[e] <- str_c(curtag[1], tagexpand[e]) -} -} -# Set Levenshtein distance depending on char length -if(curchars <= 4) { -curdistance <- 0 -} else { -curdistance <- 1 -} -# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance) -tags_found <- NULL -# Match the tweet with each variation of tagexpand -for(e in 1:length(curtag)) { -tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro) -} -tags_found <- any(tags_found) -curtag <- curtag[1] -if(tags_found == TRUE) { -# Raise number of findings on this day for this issue by 1 -issues[d,curissue] <- issues[d,curissue] + 1 -# Add issue and first matched tag of tweet to tweets-DF -oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] -tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";") -oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] -tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";") -# Add information to file for function viewPatternMatching -write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE) -cat("Match!\n") -break -} -else { -#cat("Nothing found\n") -} -} # /for curtags -} # /for issuelist -} # /for tweets_curday -} # /for drange -#rm(tweets_curday,curacro, curchars, curdate,curfile,curid,curissue,curtag,curtags,curtext,d,date_end,date_start,i,id_folder,oldissue,oldtag,s,t,tags_found) -# MATCH TWEETS ------------------------------------------------------------ -id_folder <- "matched-ids" -unlink(id_folder, recursive = TRUE) -dir.create(id_folder) -issues <- data.frame(date = drange) -issuelist <- readLines("issues.xml") -issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") -issuelist <- xmlToList(issuelist) -issueheads <- names(issuelist) -issues[issueheads] <- 0 -tweets$issue <- "" -tweets$tags <- "" -tagexpand <- c("", "s", "n", "en", "er") -# Parallelisation -writeLines(c(""), "issuecomp-analysis.log") -cl<-makeCluster(3) -registerDoParallel(cl) -df<-foreach(d = 1:nrow(issues), .packages = c("stringr")) %dopar% { -#for(d in 1:nrow(issues)) { -# Go through every day -curdate <- issues$date[d] -sink("log.txt", append=TRUE) -cat(as.character(curdate),"\n") -# Put all tweets from specific day in a temporary DF -tweets_curday <- tweets[tweets[, "created_at"] == curdate, ] -for(t in 1:nrow(tweets_curday)){ -cat("Starting tweet", t, "of",as.character(curdate),"\n") -# Select tweet's text, make it lowercase and remove hashtag indicators (#) -curtext <- as.character(tweets_curday$text[t]) -curtext <- str_replace_all(curtext, "#", "") -curid <- as.character(tweets_curday$id_str[t]) -# Now test each single issue (not tag!) -for(i in 1:length(issueheads)) { -curissue <- issueheads[i] -curtags <- as.character(issuelist[[curissue]]) -curfile <- str_c(id_folder,"/",curissue,".csv") -# Now test all tags of a single issue -for(s in 1:length(curtags)) { -curtag <- curtags[s] -curchars <- nchar(curtag, type = "chars") -# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch -if(curchars <= 4) { curacro <- checkAcronym(string = curtag, chars = curchars) } else { curacro <- FALSE @@ -510,3 +48,465 @@ cl df View(data) stopCluster(cl) +# MATCH TWEETS ------------------------------------------------------------ +id_folder <- "matched-ids" +unlink(id_folder, recursive = TRUE) +dir.create(id_folder) +issues <- data.frame(date = drange) +issuelist <- readLines("issues.xml") +issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") +issuelist <- xmlToList(issuelist) +issueheads <- names(issuelist) +issues[issueheads] <- 0 +tweets$issue <- "" +tweets$tags <- "" +tagexpand <- c("", "s", "n", "en", "er") +# Parallelisation +writeLines(c(""), "issuecomp-analysis.log") +cl<-makeCluster(3) +registerDoParallel(cl) +df<-foreach(d = 1:nrow(issues), .packages = c("stringr")) %dopar% { +#for(d in 1:nrow(issues)) { +# Go through every day +curdate <- issues$date[d] +cat(paste(as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) +# Put all tweets from specific day in a temporary DF +tweets_curday <- tweets[tweets[, "created_at"] == curdate, ] +for(t in 1:nrow(tweets_curday)){ +cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) +# Select tweet's text, make it lowercase and remove hashtag indicators (#) +curtext <- as.character(tweets_curday$text[t]) +curtext <- str_replace_all(curtext, "#", "") +curid <- as.character(tweets_curday$id_str[t]) +# Now test each single issue (not tag!) +for(i in 1:length(issueheads)) { +curissue <- issueheads[i] +curtags <- as.character(issuelist[[curissue]]) +curfile <- str_c(id_folder,"/",curissue,".csv") +# Now test all tags of a single issue +for(s in 1:length(curtags)) { +curtag <- curtags[s] +curchars <- nchar(curtag, type = "chars") +# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch +if(curchars <= 4) { +curacro <- checkAcronym(string = curtag, chars = curchars) +} else { +curacro <- FALSE +} +# Now expand the current tag by possible suffixes that may be plural forms +if(!curacro) { +for(e in 1:length(tagexpand)) { +curtag[e] <- str_c(curtag[1], tagexpand[e]) +} +} +# Set Levenshtein distance depending on char length +if(curchars <= 4) { +curdistance <- 0 +} else { +curdistance <- 1 +} +# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance) +tags_found <- NULL +# Match the tweet with each variation of tagexpand +for(e in 1:length(curtag)) { +tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro) +} +tags_found <- any(tags_found) +curtag <- curtag[1] +if(tags_found == TRUE) { +# Raise number of findings on this day for this issue by 1 +issues[d,curissue] <- issues[d,curissue] + 1 +# Add issue and first matched tag of tweet to tweets-DF +oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] +tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";") +oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] +tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";") +# Add information to file for function viewPatternMatching +write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE) +cat("Match!\n") +break +} +else { +#cat("Nothing found\n") +} +} # /for curtags +} # /for issuelist +} # /for tweets_curday +} # /for drange +#rm(tweets_curday,curacro, curchars, curdate,curfile,curid,curissue,curtag,curtags,curtext,d,date_end,date_start,i,id_folder,oldissue,oldtag,s,t,tags_found) +stopCluster(cl) +require(lubridate) +require(XML) +require(ggplot2) +require(reshape2) +require(stringr) +library(foreach) +library(doParallel) +# MATCH TWEETS ------------------------------------------------------------ +id_folder <- "matched-ids" +unlink(id_folder, recursive = TRUE) +dir.create(id_folder) +issues <- data.frame(date = drange) +issuelist <- readLines("issues.xml") +issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") +issuelist <- xmlToList(issuelist) +issueheads <- names(issuelist) +issues[issueheads] <- 0 +tweets$issue <- "" +tweets$tags <- "" +tagexpand <- c("", "s", "n", "en", "er") +# Parallelisation +writeLines(c(""), "issuecomp-analysis.log") +cl<-makeCluster(3) +registerDoParallel(cl) +df<-foreach(d = 1:nrow(issues), .packages = c("stringr")) %dopar% { +#for(d in 1:nrow(issues)) { +# Go through every day +curdate <- issues$date[d] +cat(paste(as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) +# Put all tweets from specific day in a temporary DF +tweets_curday <- tweets[tweets[, "created_at"] == curdate, ] +for(t in 1:nrow(tweets_curday)){ +cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) +# Select tweet's text, make it lowercase and remove hashtag indicators (#) +curtext <- as.character(tweets_curday$text[t]) +curtext <- str_replace_all(curtext, "#", "") +curid <- as.character(tweets_curday$id_str[t]) +# Now test each single issue (not tag!) +for(i in 1:length(issueheads)) { +curissue <- issueheads[i] +curtags <- as.character(issuelist[[curissue]]) +curfile <- str_c(id_folder,"/",curissue,".csv") +# Now test all tags of a single issue +for(s in 1:length(curtags)) { +curtag <- curtags[s] +curchars <- nchar(curtag, type = "chars") +# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch +if(curchars <= 4) { +curacro <- checkAcronym(string = curtag, chars = curchars) +} else { +curacro <- FALSE +} +# Now expand the current tag by possible suffixes that may be plural forms +if(!curacro) { +for(e in 1:length(tagexpand)) { +curtag[e] <- str_c(curtag[1], tagexpand[e]) +} +} +# Set Levenshtein distance depending on char length +if(curchars <= 4) { +curdistance <- 0 +} else { +curdistance <- 1 +} +# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance) +tags_found <- NULL +# Match the tweet with each variation of tagexpand +for(e in 1:length(curtag)) { +tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro) +} +tags_found <- any(tags_found) +curtag <- curtag[1] +if(tags_found == TRUE) { +# Raise number of findings on this day for this issue by 1 +issues[d,curissue] <- issues[d,curissue] + 1 +# Add issue and first matched tag of tweet to tweets-DF +oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] +tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";") +oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] +tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";") +# Add information to file for function viewPatternMatching +write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE) +cat("Match!\n") +break +} +else { +#cat("Nothing found\n") +} +} # /for curtags +} # /for issuelist +} # /for tweets_curday +} # /for drange +stopCluster(cl) +cl<-makeCluster(3) +registerDoParallel(cl) +stopCluster(cl) +# MATCH TWEETS ------------------------------------------------------------ +id_folder <- "matched-ids" +unlink(id_folder, recursive = TRUE) +dir.create(id_folder) +issues <- data.frame(date = drange) +issuelist <- readLines("issues.xml") +issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") +issuelist <- xmlToList(issuelist) +issueheads <- names(issuelist) +issues[issueheads] <- 0 +tweets$issue <- "" +tweets$tags <- "" +tagexpand <- c("", "s", "n", "en", "er") +# Parallelisation +writeLines(c(""), "issuecomp-analysis.log") +cl<-makeCluster(3) +registerDoParallel(cl) +df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% { +#for(d in 1:nrow(issues)) { +# Go through every day +curdate <- issues$date[d] +cat(paste(as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) +# Put all tweets from specific day in a temporary DF +tweets_curday <- tweets[tweets[, "created_at"] == curdate, ] +for(t in 1:nrow(tweets_curday)){ +cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) +# Select tweet's text, make it lowercase and remove hashtag indicators (#) +curtext <- as.character(tweets_curday$text[t]) +curtext <- str_replace_all(curtext, "#", "") +curid <- as.character(tweets_curday$id_str[t]) +# Now test each single issue (not tag!) +for(i in 1:length(issueheads)) { +curissue <- issueheads[i] +curtags <- as.character(issuelist[[curissue]]) +curfile <- str_c(id_folder,"/",curissue,".csv") +# Now test all tags of a single issue +for(s in 1:length(curtags)) { +curtag <- curtags[s] +curchars <- nchar(curtag, type = "chars") +# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch +if(curchars <= 4) { +curacro <- checkAcronym(string = curtag, chars = curchars) +} else { +curacro <- FALSE +} +# Now expand the current tag by possible suffixes that may be plural forms +if(!curacro) { +for(e in 1:length(tagexpand)) { +curtag[e] <- str_c(curtag[1], tagexpand[e]) +} +} +# Set Levenshtein distance depending on char length +if(curchars <= 4) { +curdistance <- 0 +} else { +curdistance <- 1 +} +# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance) +tags_found <- NULL +# Match the tweet with each variation of tagexpand +for(e in 1:length(curtag)) { +tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro) +} +tags_found <- any(tags_found) +curtag <- curtag[1] +if(tags_found == TRUE) { +# Raise number of findings on this day for this issue by 1 +issues[d,curissue] <- issues[d,curissue] + 1 +# Add issue and first matched tag of tweet to tweets-DF +oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] +tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";") +oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] +tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";") +# Add information to file for function viewPatternMatching +write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE) +cat("Match!\n") +break +} +else { +#cat("Nothing found\n") +} +} # /for curtags +} # /for issuelist +} # /for tweets_curday +} # /for drange +stopCluster(cl) +require(lubridate) +require(XML) +require(ggplot2) +require(reshape2) +require(stringr) +library(foreach) +library(doParallel) +source("issuecomp-functions.R") +# MATCH TWEETS ------------------------------------------------------------ +id_folder <- "matched-ids" +unlink(id_folder, recursive = TRUE) +dir.create(id_folder) +issues <- data.frame(date = drange) +issuelist <- readLines("issues.xml") +issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") +issuelist <- xmlToList(issuelist) +issueheads <- names(issuelist) +issues[issueheads] <- 0 +tweets$issue <- "" +tweets$tags <- "" +tagexpand <- c("", "s", "n", "en", "er") +# Parallelisation +writeLines(c(""), "issuecomp-analysis.log") +cl<-makeCluster(3) +registerDoParallel(cl) +df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% { +#for(d in 1:nrow(issues)) { +# Go through every day +curdate <- issues$date[d] +cat(paste(as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) +# Put all tweets from specific day in a temporary DF +tweets_curday <- tweets[tweets[, "created_at"] == curdate, ] +for(t in 1:nrow(tweets_curday)){ +cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) +# Select tweet's text, make it lowercase and remove hashtag indicators (#) +curtext <- as.character(tweets_curday$text[t]) +curtext <- str_replace_all(curtext, "#", "") +curid <- as.character(tweets_curday$id_str[t]) +# Now test each single issue (not tag!) +for(i in 1:length(issueheads)) { +curissue <- issueheads[i] +curtags <- as.character(issuelist[[curissue]]) +curfile <- str_c(id_folder,"/",curissue,".csv") +# Now test all tags of a single issue +for(s in 1:length(curtags)) { +curtag <- curtags[s] +curchars <- nchar(curtag, type = "chars") +# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch +if(curchars <= 4) { +curacro <- checkAcronym(string = curtag, chars = curchars) +} else { +curacro <- FALSE +} +# Now expand the current tag by possible suffixes that may be plural forms +if(!curacro) { +for(e in 1:length(tagexpand)) { +curtag[e] <- str_c(curtag[1], tagexpand[e]) +} +} +# Set Levenshtein distance depending on char length +if(curchars <= 4) { +curdistance <- 0 +} else { +curdistance <- 1 +} +# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance) +tags_found <- NULL +# Match the tweet with each variation of tagexpand +for(e in 1:length(curtag)) { +tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro) +} +tags_found <- any(tags_found) +curtag <- curtag[1] +if(tags_found == TRUE) { +# Raise number of findings on this day for this issue by 1 +issues[d,curissue] <- issues[d,curissue] + 1 +# Add issue and first matched tag of tweet to tweets-DF +oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] +tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";") +oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] +tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";") +# Add information to file for function viewPatternMatching +write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE) +cat(paste("Match!\n"), file="issuecomp-analysis.log", append=TRUE) +break +} +else { +#cat("Nothing found\n") +} +} # /for curtags +} # /for issuelist +} # /for tweets_curday +} # /for drange +View(issues) +require(lubridate) +require(XML) +require(ggplot2) +require(reshape2) +require(stringr) +library(foreach) +library(doParallel) +# MATCH TWEETS ------------------------------------------------------------ +id_folder <- "matched-ids" +unlink(id_folder, recursive = TRUE) +dir.create(id_folder) +issues <- data.frame(date = drange) +issuelist <- readLines("issues.xml") +issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") +issuelist <- xmlToList(issuelist) +issueheads <- names(issuelist) +issues[issueheads] <- 0 +tweets$issue <- "" +tweets$tags <- "" +tagexpand <- c("", "s", "n", "en", "er") +# Parallelisation +writeLines(c(""), "issuecomp-analysis.log") +cl<-makeCluster(3) +registerDoParallel(cl) +df<-foreach(d = 1:3, .packages = c("stringr"), .combine=rbind) %dopar% { +#df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% { +#for(d in 1:nrow(issues)) { +# Go through every day +curdate <- issues$date[d] +cat(paste(as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) +# Put all tweets from specific day in a temporary DF +tweets_curday <- tweets[tweets[, "created_at"] == curdate, ] +for(t in 1:25){ +#for(t in 1:nrow(tweets_curday)){ +cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) +# Select tweet's text, make it lowercase and remove hashtag indicators (#) +curtext <- as.character(tweets_curday$text[t]) +curtext <- str_replace_all(curtext, "#", "") +curid <- as.character(tweets_curday$id_str[t]) +# Now test each single issue (not tag!) +for(i in 1:length(issueheads)) { +curissue <- issueheads[i] +curtags <- as.character(issuelist[[curissue]]) +curfile <- str_c(id_folder,"/",curissue,".csv") +# Now test all tags of a single issue +for(s in 1:length(curtags)) { +curtag <- curtags[s] +curchars <- nchar(curtag, type = "chars") +# Check if tag is an acronym. If so, ignore.case will be deactivated in smartPatternMatch +if(curchars <= 4) { +curacro <- checkAcronym(string = curtag, chars = curchars) +} else { +curacro <- FALSE +} +# Now expand the current tag by possible suffixes that may be plural forms +if(!curacro) { +for(e in 1:length(tagexpand)) { +curtag[e] <- str_c(curtag[1], tagexpand[e]) +} +} +# Set Levenshtein distance depending on char length +if(curchars <= 4) { +curdistance <- 0 +} else { +curdistance <- 1 +} +# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow also 1 (Levenshtein distance) +tags_found <- NULL +# Match the tweet with each variation of tagexpand +for(e in 1:length(curtag)) { +tags_found[e] <- smartPatternMatch(curtext, curtag[e], curdistance, curacro) +} +tags_found <- any(tags_found) +curtag <- curtag[1] +if(tags_found == TRUE) { +# Raise number of findings on this day for this issue by 1 +issues[d,curissue] <- issues[d,curissue] + 1 +# Add issue and first matched tag of tweet to tweets-DF +oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] +tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";") +oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] +tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";") +# Add information to file for function viewPatternMatching +write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE) +cat(paste("Match!\n"), file="issuecomp-analysis.log", append=TRUE) +break +} +else { +#cat("Nothing found\n") +} +} # /for curtags +} # /for issuelist +} # /for tweets_curday +} # /for drange +#rm(tweets_curday,curacro, curchars, curdate,curfile,curid,curissue,curtag,curtags,curtext,d,date_end,date_start,i,id_folder,oldissue,oldtag,s,t,tags_found) +stopCluster(cl) +View(issues) +rm(data) +df diff --git a/issuecomp-analysis.R b/issuecomp-analysis.R index 2561858..c33f18e 100644 --- a/issuecomp-analysis.R +++ b/issuecomp-analysis.R @@ -40,7 +40,8 @@ writeLines(c(""), "issuecomp-analysis.log") cl<-makeCluster(3) registerDoParallel(cl) -df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% { +df<-foreach(d = 1:3, .packages = c("stringr"), .combine=rbind) %dopar% { +#df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar% { #for(d in 1:nrow(issues)) { # Go through every day curdate <- issues$date[d] @@ -49,7 +50,8 @@ df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar # Put all tweets from specific day in a temporary DF tweets_curday <- tweets[tweets[, "created_at"] == curdate, ] - for(t in 1:nrow(tweets_curday)){ + for(t in 1:25){ + #for(t in 1:nrow(tweets_curday)){ cat(paste("Starting tweet", t, "of",as.character(curdate),"\n"), file="issuecomp-analysis.log", append=TRUE) # Select tweet's text, make it lowercase and remove hashtag indicators (#) curtext <- as.character(tweets_curday$text[t]) @@ -110,7 +112,7 @@ df<-foreach(d = 1:nrow(issues), .packages = c("stringr"), .combine=rbind) %dopar # Add information to file for function viewPatternMatching write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE) - cat("Match!\n") + cat(paste("Match!\n"), file="issuecomp-analysis.log", append=TRUE) break } else { diff --git a/issuecomp-analysis.log b/issuecomp-analysis.log index 8a76877..c073cf9 100644 --- a/issuecomp-analysis.log +++ b/issuecomp-analysis.log @@ -2,435 +2,102 @@ 2014-01-01 2014-01-02 2014-01-03 -Starting tweet 1 of 2014-01-02 -Starting tweet 1 of 2014-01-01 Starting tweet 1 of 2014-01-03 +Starting tweet 1 of 2014-01-01 +Starting tweet 1 of 2014-01-02 Starting tweet 2 of 2014-01-01 Starting tweet 2 of 2014-01-03 Starting tweet 2 of 2014-01-02 -Starting tweet 3 of 2014-01-01 Starting tweet 3 of 2014-01-03 +Starting tweet 3 of 2014-01-01 +Match! Starting tweet 3 of 2014-01-02 -Starting tweet 4 of 2014-01-01 Starting tweet 4 of 2014-01-03 +Match! +Starting tweet 4 of 2014-01-01 +Match! Starting tweet 4 of 2014-01-02 -Starting tweet 5 of 2014-01-01 Starting tweet 5 of 2014-01-03 -Starting tweet 6 of 2014-01-01 -Starting tweet 5 of 2014-01-02 +Starting tweet 5 of 2014-01-01 Starting tweet 6 of 2014-01-03 +Match! +Starting tweet 5 of 2014-01-02 +Match! +Starting tweet 6 of 2014-01-01 Starting tweet 6 of 2014-01-02 -Starting tweet 7 of 2014-01-01 Starting tweet 7 of 2014-01-03 -Starting tweet 8 of 2014-01-01 +Starting tweet 7 of 2014-01-01 +Match! Starting tweet 7 of 2014-01-02 Starting tweet 8 of 2014-01-03 -Starting tweet 9 of 2014-01-01 +Starting tweet 8 of 2014-01-01 +Match! Starting tweet 8 of 2014-01-02 Starting tweet 9 of 2014-01-03 +Starting tweet 9 of 2014-01-01 Starting tweet 9 of 2014-01-02 -Starting tweet 10 of 2014-01-01 Starting tweet 10 of 2014-01-03 +Starting tweet 10 of 2014-01-01 Starting tweet 10 of 2014-01-02 +Match! +Starting tweet 11 of 2014-01-03 Starting tweet 11 of 2014-01-01 Starting tweet 11 of 2014-01-02 -Starting tweet 11 of 2014-01-03 +Starting tweet 12 of 2014-01-03 Starting tweet 12 of 2014-01-01 Starting tweet 12 of 2014-01-02 -Starting tweet 12 of 2014-01-03 +Starting tweet 13 of 2014-01-03 +Match! Starting tweet 13 of 2014-01-01 Starting tweet 13 of 2014-01-02 -Starting tweet 13 of 2014-01-03 +Starting tweet 14 of 2014-01-03 Starting tweet 14 of 2014-01-01 Starting tweet 14 of 2014-01-02 -Starting tweet 15 of 2014-01-01 -Starting tweet 14 of 2014-01-03 +Match! Starting tweet 15 of 2014-01-03 +Starting tweet 15 of 2014-01-01 +Match! Starting tweet 15 of 2014-01-02 +Match! +Starting tweet 16 of 2014-01-03 Starting tweet 16 of 2014-01-01 Starting tweet 16 of 2014-01-02 -Starting tweet 16 of 2014-01-03 -Starting tweet 17 of 2014-01-01 Starting tweet 17 of 2014-01-03 +Match! +Starting tweet 17 of 2014-01-01 Starting tweet 17 of 2014-01-02 +Match! Starting tweet 18 of 2014-01-03 -Starting tweet 18 of 2014-01-01 +Match! +Match! Starting tweet 18 of 2014-01-02 +Match! +Starting tweet 18 of 2014-01-01 Starting tweet 19 of 2014-01-03 -Starting tweet 19 of 2014-01-01 -Starting tweet 20 of 2014-01-03 +Match! Starting tweet 19 of 2014-01-02 -Starting tweet 21 of 2014-01-03 -Starting tweet 20 of 2014-01-01 +Starting tweet 19 of 2014-01-01 +Match! +Starting tweet 20 of 2014-01-03 Starting tweet 20 of 2014-01-02 -Starting tweet 22 of 2014-01-03 -Starting tweet 21 of 2014-01-01 +Match! +Starting tweet 20 of 2014-01-01 +Match! +Starting tweet 21 of 2014-01-03 Starting tweet 21 of 2014-01-02 -Starting tweet 22 of 2014-01-01 +Match! +Starting tweet 21 of 2014-01-01 +Starting tweet 22 of 2014-01-03 Starting tweet 22 of 2014-01-02 +Starting tweet 22 of 2014-01-01 Starting tweet 23 of 2014-01-03 -Starting tweet 23 of 2014-01-01 Starting tweet 23 of 2014-01-02 +Starting tweet 23 of 2014-01-01 Starting tweet 24 of 2014-01-03 Starting tweet 24 of 2014-01-02 Starting tweet 24 of 2014-01-01 +Match! Starting tweet 25 of 2014-01-03 Starting tweet 25 of 2014-01-02 Starting tweet 25 of 2014-01-01 -Starting tweet 26 of 2014-01-03 -Starting tweet 26 of 2014-01-02 -Starting tweet 26 of 2014-01-01 -Starting tweet 27 of 2014-01-03 -Starting tweet 27 of 2014-01-02 -Starting tweet 27 of 2014-01-01 -Starting tweet 28 of 2014-01-03 -Starting tweet 28 of 2014-01-02 -Starting tweet 28 of 2014-01-01 -Starting tweet 29 of 2014-01-03 -Starting tweet 29 of 2014-01-01 -Starting tweet 29 of 2014-01-02 -Starting tweet 30 of 2014-01-03 -Starting tweet 30 of 2014-01-01 -Starting tweet 30 of 2014-01-02 -Starting tweet 31 of 2014-01-03 -Starting tweet 31 of 2014-01-02 -Starting tweet 31 of 2014-01-01 -Starting tweet 32 of 2014-01-02 -Starting tweet 32 of 2014-01-03 -Starting tweet 32 of 2014-01-01 -Starting tweet 33 of 2014-01-02 -Starting tweet 33 of 2014-01-03 -Starting tweet 33 of 2014-01-01 -Starting tweet 34 of 2014-01-02 -Starting tweet 34 of 2014-01-03 -Starting tweet 34 of 2014-01-01 -Starting tweet 35 of 2014-01-02 -Starting tweet 35 of 2014-01-03 -Starting tweet 35 of 2014-01-01 -Starting tweet 36 of 2014-01-02 -Starting tweet 36 of 2014-01-01 -Starting tweet 36 of 2014-01-03 -Starting tweet 37 of 2014-01-01 -Starting tweet 37 of 2014-01-02 -Starting tweet 37 of 2014-01-03 -Starting tweet 38 of 2014-01-01 -Starting tweet 38 of 2014-01-02 -Starting tweet 39 of 2014-01-01 -Starting tweet 38 of 2014-01-03 -Starting tweet 39 of 2014-01-02 -Starting tweet 39 of 2014-01-03 -Starting tweet 40 of 2014-01-01 -Starting tweet 40 of 2014-01-02 -Starting tweet 41 of 2014-01-01 -Starting tweet 40 of 2014-01-03 -Starting tweet 41 of 2014-01-02 -Starting tweet 42 of 2014-01-01 -Starting tweet 41 of 2014-01-03 -Starting tweet 43 of 2014-01-01 -Starting tweet 42 of 2014-01-02 -Starting tweet 44 of 2014-01-01 -Starting tweet 42 of 2014-01-03 -Starting tweet 43 of 2014-01-02 -Starting tweet 43 of 2014-01-03 -Starting tweet 45 of 2014-01-01 -Starting tweet 44 of 2014-01-02 -Starting tweet 44 of 2014-01-03 -Starting tweet 46 of 2014-01-01 -Starting tweet 45 of 2014-01-02 -Starting tweet 45 of 2014-01-03 -Starting tweet 47 of 2014-01-01 -Starting tweet 46 of 2014-01-02 -Starting tweet 46 of 2014-01-03 -Starting tweet 48 of 2014-01-01 -Starting tweet 47 of 2014-01-02 -Starting tweet 47 of 2014-01-03 -Starting tweet 48 of 2014-01-02 -Starting tweet 49 of 2014-01-01 -Starting tweet 48 of 2014-01-03 -Starting tweet 49 of 2014-01-02 -Starting tweet 50 of 2014-01-01 -Starting tweet 49 of 2014-01-03 -Starting tweet 50 of 2014-01-02 -Starting tweet 51 of 2014-01-01 -Starting tweet 50 of 2014-01-03 -Starting tweet 52 of 2014-01-01 -Starting tweet 51 of 2014-01-02 -Starting tweet 51 of 2014-01-03 -Starting tweet 52 of 2014-01-02 -Starting tweet 53 of 2014-01-01 -Starting tweet 52 of 2014-01-03 -Starting tweet 53 of 2014-01-02 -Starting tweet 54 of 2014-01-01 -Starting tweet 53 of 2014-01-03 -Starting tweet 54 of 2014-01-02 -Starting tweet 55 of 2014-01-01 -Starting tweet 54 of 2014-01-03 -Starting tweet 56 of 2014-01-01 -Starting tweet 55 of 2014-01-02 -Starting tweet 55 of 2014-01-03 -Starting tweet 56 of 2014-01-02 -Starting tweet 57 of 2014-01-01 -Starting tweet 56 of 2014-01-03 -Starting tweet 57 of 2014-01-02 -Starting tweet 58 of 2014-01-01 -Starting tweet 57 of 2014-01-03 -Starting tweet 58 of 2014-01-02 -Starting tweet 58 of 2014-01-03 -Starting tweet 59 of 2014-01-01 -Starting tweet 60 of 2014-01-01 -Starting tweet 59 of 2014-01-03 -Starting tweet 59 of 2014-01-02 -Starting tweet 60 of 2014-01-03 -Starting tweet 61 of 2014-01-01 -Starting tweet 60 of 2014-01-02 -Starting tweet 61 of 2014-01-03 -Starting tweet 62 of 2014-01-01 -Starting tweet 61 of 2014-01-02 -Starting tweet 62 of 2014-01-03 -Starting tweet 63 of 2014-01-01 -Starting tweet 62 of 2014-01-02 -Starting tweet 63 of 2014-01-03 -Starting tweet 64 of 2014-01-01 -Starting tweet 63 of 2014-01-02 -Starting tweet 64 of 2014-01-03 -Starting tweet 65 of 2014-01-01 -Starting tweet 64 of 2014-01-02 -Starting tweet 65 of 2014-01-03 -Starting tweet 66 of 2014-01-01 -Starting tweet 66 of 2014-01-03 -Starting tweet 65 of 2014-01-02 -2014-01-04 -Starting tweet 1 of 2014-01-04 -Starting tweet 67 of 2014-01-03 -Starting tweet 66 of 2014-01-02 -Starting tweet 2 of 2014-01-04 -Starting tweet 67 of 2014-01-02 -Starting tweet 68 of 2014-01-03 -Starting tweet 3 of 2014-01-04 -Starting tweet 68 of 2014-01-02 -Starting tweet 69 of 2014-01-03 -Starting tweet 4 of 2014-01-04 -Starting tweet 70 of 2014-01-03 -Starting tweet 69 of 2014-01-02 -Starting tweet 5 of 2014-01-04 -Starting tweet 71 of 2014-01-03 -Starting tweet 70 of 2014-01-02 -Starting tweet 72 of 2014-01-03 -Starting tweet 6 of 2014-01-04 -Starting tweet 71 of 2014-01-02 -Starting tweet 73 of 2014-01-03 -Starting tweet 7 of 2014-01-04 -Starting tweet 74 of 2014-01-03 -Starting tweet 72 of 2014-01-02 -Starting tweet 75 of 2014-01-03 -Starting tweet 8 of 2014-01-04 -Starting tweet 73 of 2014-01-02 -Starting tweet 76 of 2014-01-03 -Starting tweet 9 of 2014-01-04 -Starting tweet 74 of 2014-01-02 -Starting tweet 77 of 2014-01-03 -Starting tweet 75 of 2014-01-02 -Starting tweet 10 of 2014-01-04 -Starting tweet 78 of 2014-01-03 -Starting tweet 76 of 2014-01-02 -Starting tweet 11 of 2014-01-04 -Starting tweet 77 of 2014-01-02 -Starting tweet 79 of 2014-01-03 -Starting tweet 80 of 2014-01-03 -Starting tweet 12 of 2014-01-04 -Starting tweet 78 of 2014-01-02 -Starting tweet 79 of 2014-01-02 -Starting tweet 13 of 2014-01-04 -Starting tweet 81 of 2014-01-03 -Starting tweet 80 of 2014-01-02 -Starting tweet 14 of 2014-01-04 -Starting tweet 82 of 2014-01-03 -Starting tweet 81 of 2014-01-02 -Starting tweet 15 of 2014-01-04 -Starting tweet 83 of 2014-01-03 -Starting tweet 82 of 2014-01-02 -Starting tweet 84 of 2014-01-03 -Starting tweet 16 of 2014-01-04 -Starting tweet 83 of 2014-01-02 -Starting tweet 17 of 2014-01-04 -Starting tweet 85 of 2014-01-03 -Starting tweet 84 of 2014-01-02 -Starting tweet 18 of 2014-01-04 -Starting tweet 86 of 2014-01-03 -Starting tweet 85 of 2014-01-02 -Starting tweet 19 of 2014-01-04 -Starting tweet 87 of 2014-01-03 -Starting tweet 88 of 2014-01-03 -Starting tweet 86 of 2014-01-02 -Starting tweet 20 of 2014-01-04 -Starting tweet 89 of 2014-01-03 -Starting tweet 87 of 2014-01-02 -Starting tweet 21 of 2014-01-04 -Starting tweet 90 of 2014-01-03 -Starting tweet 88 of 2014-01-02 -Starting tweet 22 of 2014-01-04 -Starting tweet 91 of 2014-01-03 -Starting tweet 89 of 2014-01-02 -Starting tweet 23 of 2014-01-04 -Starting tweet 92 of 2014-01-03 -Starting tweet 24 of 2014-01-04 -Starting tweet 90 of 2014-01-02 -Starting tweet 93 of 2014-01-03 -Starting tweet 91 of 2014-01-02 -Starting tweet 25 of 2014-01-04 -Starting tweet 94 of 2014-01-03 -Starting tweet 92 of 2014-01-02 -Starting tweet 26 of 2014-01-04 -Starting tweet 95 of 2014-01-03 -Starting tweet 93 of 2014-01-02 -Starting tweet 96 of 2014-01-03 -Starting tweet 27 of 2014-01-04 -Starting tweet 94 of 2014-01-02 -Starting tweet 28 of 2014-01-04 -Starting tweet 97 of 2014-01-03 -Starting tweet 95 of 2014-01-02 -Starting tweet 29 of 2014-01-04 -Starting tweet 98 of 2014-01-03 -Starting tweet 96 of 2014-01-02 -Starting tweet 30 of 2014-01-04 -Starting tweet 99 of 2014-01-03 -Starting tweet 97 of 2014-01-02 -Starting tweet 31 of 2014-01-04 -Starting tweet 100 of 2014-01-03 -Starting tweet 98 of 2014-01-02 -Starting tweet 32 of 2014-01-04 -Starting tweet 101 of 2014-01-03 -Starting tweet 99 of 2014-01-02 -Starting tweet 33 of 2014-01-04 -Starting tweet 102 of 2014-01-03 -Starting tweet 100 of 2014-01-02 -Starting tweet 34 of 2014-01-04 -Starting tweet 103 of 2014-01-03 -Starting tweet 101 of 2014-01-02 -Starting tweet 35 of 2014-01-04 -Starting tweet 104 of 2014-01-03 -Starting tweet 102 of 2014-01-02 -Starting tweet 36 of 2014-01-04 -Starting tweet 105 of 2014-01-03 -Starting tweet 103 of 2014-01-02 -Starting tweet 37 of 2014-01-04 -Starting tweet 106 of 2014-01-03 -Starting tweet 104 of 2014-01-02 -Starting tweet 38 of 2014-01-04 -Starting tweet 107 of 2014-01-03 -Starting tweet 105 of 2014-01-02 -Starting tweet 108 of 2014-01-03 -Starting tweet 39 of 2014-01-04 -Starting tweet 106 of 2014-01-02 -Starting tweet 107 of 2014-01-02 -Starting tweet 109 of 2014-01-03 -Starting tweet 40 of 2014-01-04 -Starting tweet 108 of 2014-01-02 -Starting tweet 110 of 2014-01-03 -Starting tweet 41 of 2014-01-04 -Starting tweet 109 of 2014-01-02 -Starting tweet 110 of 2014-01-02 -Starting tweet 111 of 2014-01-03 -Starting tweet 42 of 2014-01-04 -Starting tweet 111 of 2014-01-02 -Starting tweet 112 of 2014-01-03 -Starting tweet 43 of 2014-01-04 -Starting tweet 113 of 2014-01-03 -Starting tweet 112 of 2014-01-02 -Starting tweet 44 of 2014-01-04 -Starting tweet 114 of 2014-01-03 -Starting tweet 113 of 2014-01-02 -Starting tweet 45 of 2014-01-04 -Starting tweet 115 of 2014-01-03 -Starting tweet 114 of 2014-01-02 -Starting tweet 46 of 2014-01-04 -Starting tweet 115 of 2014-01-02 -Starting tweet 116 of 2014-01-03 -Starting tweet 47 of 2014-01-04 -Starting tweet 117 of 2014-01-03 -Starting tweet 116 of 2014-01-02 -Starting tweet 118 of 2014-01-03 -Starting tweet 48 of 2014-01-04 -Starting tweet 117 of 2014-01-02 -Starting tweet 119 of 2014-01-03 -Starting tweet 118 of 2014-01-02 -Starting tweet 49 of 2014-01-04 -Starting tweet 120 of 2014-01-03 -Starting tweet 119 of 2014-01-02 -Starting tweet 50 of 2014-01-04 -Starting tweet 121 of 2014-01-03 -Starting tweet 120 of 2014-01-02 -Starting tweet 51 of 2014-01-04 -Starting tweet 122 of 2014-01-03 -Starting tweet 121 of 2014-01-02 -Starting tweet 52 of 2014-01-04 -Starting tweet 123 of 2014-01-03 -Starting tweet 122 of 2014-01-02 -Starting tweet 53 of 2014-01-04 -Starting tweet 123 of 2014-01-02 -Starting tweet 124 of 2014-01-03 -Starting tweet 54 of 2014-01-04 -Starting tweet 124 of 2014-01-02 -Starting tweet 125 of 2014-01-03 -Starting tweet 55 of 2014-01-04 -Starting tweet 125 of 2014-01-02 -Starting tweet 126 of 2014-01-03 -Starting tweet 56 of 2014-01-04 -Starting tweet 126 of 2014-01-02 -Starting tweet 57 of 2014-01-04 -Starting tweet 127 of 2014-01-03 -Starting tweet 127 of 2014-01-02 -Starting tweet 58 of 2014-01-04 -Starting tweet 128 of 2014-01-03 -Starting tweet 128 of 2014-01-02 -Starting tweet 129 of 2014-01-03 -Starting tweet 59 of 2014-01-04 -Starting tweet 129 of 2014-01-02 -Starting tweet 60 of 2014-01-04 -Starting tweet 130 of 2014-01-03 -Starting tweet 130 of 2014-01-02 -Starting tweet 61 of 2014-01-04 -Starting tweet 131 of 2014-01-03 -Starting tweet 131 of 2014-01-02 -Starting tweet 132 of 2014-01-03 -Starting tweet 62 of 2014-01-04 -Starting tweet 63 of 2014-01-04 -Starting tweet 132 of 2014-01-02 -Starting tweet 133 of 2014-01-03 -Starting tweet 64 of 2014-01-04 -Starting tweet 133 of 2014-01-02 -Starting tweet 134 of 2014-01-03 -Starting tweet 65 of 2014-01-04 -Starting tweet 135 of 2014-01-03 -Starting tweet 134 of 2014-01-02 -Starting tweet 66 of 2014-01-04 -Starting tweet 136 of 2014-01-03 -Starting tweet 135 of 2014-01-02 -Starting tweet 137 of 2014-01-03 -Starting tweet 67 of 2014-01-04 -Starting tweet 136 of 2014-01-02 -Starting tweet 138 of 2014-01-03 -Starting tweet 68 of 2014-01-04 -Starting tweet 137 of 2014-01-02 -Starting tweet 69 of 2014-01-04 -Starting tweet 138 of 2014-01-02 -Starting tweet 70 of 2014-01-04 -Starting tweet 139 of 2014-01-02 -Starting tweet 71 of 2014-01-04 -Starting tweet 140 of 2014-01-02 -Starting tweet 72 of 2014-01-04 -Starting tweet 141 of 2014-01-02 -Starting tweet 73 of 2014-01-04 -Starting tweet 74 of 2014-01-04 -Starting tweet 75 of 2014-01-04 -Starting tweet 76 of 2014-01-04 -Starting tweet 77 of 2014-01-04 -Starting tweet 78 of 2014-01-04 -Starting tweet 79 of 2014-01-04 -Starting tweet 80 of 2014-01-04 -Starting tweet 81 of 2014-01-04 -Starting tweet 82 of 2014-01-04 -Starting tweet 83 of 2014-01-04 -Starting tweet 84 of 2014-01-04 -Starting tweet 85 of 2014-01-04 -Starting tweet 86 of 2014-01-04 +Match!