From 1eb6aeec187046a05bde55568ca4e7ce3074395b Mon Sep 17 00:00:00 2001 From: mxmehl Date: Sun, 7 Dec 2014 21:06:59 +0100 Subject: [PATCH] starting issuecounting --- extract-twitter-accounts.R | 11 ---- issuecomp.R | 55 +++++++++++++++++++ issues.xml | 109 +++++++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 11 deletions(-) create mode 100644 issuecomp.R create mode 100644 issues.xml diff --git a/extract-twitter-accounts.R b/extract-twitter-accounts.R index 4cb976e..fe20b2c 100644 --- a/extract-twitter-accounts.R +++ b/extract-twitter-accounts.R @@ -2,10 +2,8 @@ require(jsonlite) require(stringr) -require(RCurl) require(devtools) require(RTwitterAPI) -require(lubridate) setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp") @@ -196,12 +194,3 @@ rm(delrow, r) # Convert dates to omit (unnecessary) time tweets$created_at <- format(tweets$created_at, "%Y-%m-%d") -# Create date range -date_start <- as.Date("2014-01-01") -date_end <- as.Date("2014-12-01") -drange <- as.integer(date_end - date_start) -drange <- date_start + days(0:d) -issues <- data.frame(date = drange) - -# Select all tweets from current day in drange -tweets_curday <- tweets[tweets[, "created_at"] == drange[5], ] diff --git a/issuecomp.R b/issuecomp.R new file mode 100644 index 0000000..bfbecab --- /dev/null +++ b/issuecomp.R @@ -0,0 +1,55 @@ +require(lubridate) + +# Create date range +date_start <- as.Date("2014-01-01") +date_end <- as.Date("2014-12-01") +drange <- as.integer(date_end - date_start) +drange <- date_start + days(0:d) +issues <- data.frame(date = drange) + +issuelist <- xmlToList("issues.xml") +issueheads <- names(issuelist) +issues[issueheads] <- 0 + +for(d in 1:nrow(issues)) { + curdate <- issues$date[d] + cat(as.character(curdate),"\n") + # Put all tweets from specific day in a temporary DF + tweets_curday <- tweets[tweets[, "created_at"] == curdate, ] + + for(t in 1:nrow(tweets_curday)){ + # Select tweet's text, make it lowercase and remove hashtag indicators (#) + curtext <- tolower(as.character(tweets_curday$text[t])) + curtext <- str_replace_all(curtext, "#", "") + + for(i in 1:length(issuelist)) { + curtags <- as.character(issuelist[[i]]) + curissue <- names(issuelist)[i] + curtags <- str_c("\\W", curtags, "\\W") + tags_found <- str_detect(curtext, sprintf("%s", curtags)) + tags_found <- any(tags_found) + + if(tags_found) { + #cat("Positive in", curissue,"from",as.character(drange[d]),"\n") + issues[d,curissue] <- issues[d,curissue] + 1 + } + else { + #cat("Nothing found\n") + } + + } # /for issuelist + } # /for tweets_curday +} # /for drange + + + +# POSSIBLY USEFUL CODE ---------------------------------------------------- + +# Limits of list +length(issuelist) +length(issuelist[[2]]) + +# Select all tweets from current day in drange +tweets_curday <- tweets[tweets[, "created_at"] == drange[5], ] +# Is column a issue counting column? +str_detect(names(issues[2]), "^issue") \ No newline at end of file diff --git a/issues.xml b/issues.xml new file mode 100644 index 0000000..724384d --- /dev/null +++ b/issues.xml @@ -0,0 +1,109 @@ + + + ttip + ceta + freihandelsabkommen + + + + energiewende + atomkraft + windkraft + wasserkraft + solarstrom + kraftwerk + + + + ukraine + euromaidan + krim + putin + kiew + + + + vds + vorratsdatenspeicherung + datenschutz + + + + schulz + kommission + juncker + ec2014 + ep2014 + europawahl + + + + arbeitsmarkt + mindestlohn + arbeitslosigkeit + hartz4 + arbeitslos + + + + nsa + snowden + bnd + gchq + + + + wm2014 + weltmeister + meister + finale + halbfinale + viertelfinale + achtelfinale + brager + gerbra + argger + gerarg + wm + stadion + + + + israel + gaza + naher osten + nahen osten + nahost + + + + irak + isis + is + kalifat + + + + ebola + + + + nsu + mundlos + zschäpe + + + + edathy + kinderpornographie + kipo + pädophil + pädophilie + + + + pillepalle + schundluder + whatthefuck + +