added coding sample tests
This commit is contained in:
+22
-3
@@ -6,6 +6,8 @@ require(stringr)
|
||||
|
||||
source("issuecomp-functions.R")
|
||||
|
||||
load(file = "tweets_untagged.RData")
|
||||
|
||||
# Create date range
|
||||
date_start <- as.Date("2014-01-01")
|
||||
date_end <- as.Date("2014-12-31")
|
||||
@@ -23,6 +25,8 @@ issues <- data.frame(date = drange)
|
||||
issuelist <- xmlToList("issues.xml")
|
||||
issueheads <- names(issuelist)
|
||||
issues[issueheads] <- 0
|
||||
tweets$issue <- ""
|
||||
tweets$tags <- ""
|
||||
|
||||
for(d in 1:nrow(issues)) {
|
||||
# Go through every day
|
||||
@@ -36,8 +40,7 @@ for(d in 1:nrow(issues)) {
|
||||
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
|
||||
curtext <- as.character(tweets_curday$text[t])
|
||||
curtext <- str_replace_all(curtext, "#", "")
|
||||
curtext <- str_replace_all(curtext, "$", " ")
|
||||
curtext <- str_replace_all(curtext, "http://.+?\\s", "URL ")
|
||||
|
||||
curid <- as.character(tweets_curday$id_str[t])
|
||||
|
||||
# Now test each single issue (not tag!)
|
||||
@@ -61,8 +64,16 @@ for(d in 1:nrow(issues)) {
|
||||
# Match current tweet with tag. If >= 5 letters allow 1 changed letter, if >=8 letters allow 2 (Levenshtein distance)
|
||||
tags_found <- smartPatternMatch(curtext, curtag, curchars, curacro)
|
||||
if(tags_found == 1) {
|
||||
#cat("Matched", curtag, "with", curtext,"\n")
|
||||
# Raise number of findings on this day for this issue by 1
|
||||
issues[d,curissue] <- issues[d,curissue] + 1
|
||||
|
||||
# Add issue and first matched tag of tweet to tweets-DF
|
||||
oldissue <- tweets[tweets[, "id_str"] == curid, "issue"]
|
||||
tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ";")
|
||||
oldtag <- tweets[tweets[, "id_str"] == curid, "tags"]
|
||||
tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ";")
|
||||
|
||||
# Add information to file for function viewPatternMatching
|
||||
write(str_c(curdate,";\"",curid,"\";",curtag), curfile, append = TRUE)
|
||||
break
|
||||
}
|
||||
@@ -75,11 +86,19 @@ for(d in 1:nrow(issues)) {
|
||||
} # /for tweets_curday
|
||||
} # /for drange
|
||||
|
||||
rm(tweets_curday,curacro, curchars, curdate,curfile,curid,curissue,curtag,curtags,curtext,d,date_end,date_start,drange,i,id_folder,oldissue,oldtag,s,t,tags_found)
|
||||
|
||||
|
||||
# SAVING ------------------------------------------------------------------
|
||||
|
||||
|
||||
row.names(tweets) <- NULL
|
||||
write.csv(tweets, "tweets.csv")
|
||||
save(tweets, file="tweets.RData")
|
||||
|
||||
# VISUALS -----------------------------------------------------------------
|
||||
|
||||
|
||||
# Level: days
|
||||
issues_melt <- melt(issues,id="date")
|
||||
ggplot(issues_melt,aes(x=date,y=value,colour=variable,group=variable)) + geom_line(size=1)
|
||||
|
||||
Reference in New Issue
Block a user