diff --git a/.Rhistory b/.Rhistory index 1707561..e605379 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,281 +1,175 @@ -load(file = "tweets_untagged.RData") -setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp") -results_files <- "matched-ids/all.csv" -load(file = "tweets_untagged.RData") -View(issues) -issues <- data.frame(date = drange) -issuelist <- readLines("issues.xml") -issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") -issuelist <- xmlToList(issuelist) -issueheads <- names(issuelist) -issues[issueheads] <- 0 -tweets$issue <- "" -tweets$tags <- "" -View(results) -rm(r, results_temp, results_files) -results <- results[!duplicated(results), ] -names(results) <- c("date", "id_str", "issue", "tags") -results <- results[order(results$id_str), ] -row.names(results) <- NULL -for(r in 1:nrow(results)) { -curdate <- as.character(results$date[r]) -curid <- as.character(results$id_str[r]) -curissue <- as.character(results$issue[r]) -curtag <- as.character(results$tags[r]) -cat("Sorting match", r, "of 53383 \n") -# Update issue counter (date and issue) -issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1 -# Update tweet dataframe (id, issue and tags) -oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] -tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",") -oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] -tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",") -} -curdate -curissue -issues[issues[, "date"] == curdate, curissue] -issueheads -issuelist <- readLines("issues-v2.xml") -issues <- data.frame(date = drange) -issuelist <- readLines("issues-v2.xml") -issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") -issuelist <- xmlToList(issuelist) -issueheads <- names(issuelist) -issues[issueheads] <- 0 -tweets$issue <- "" -tweets$tags <- "" -for(r in 1:nrow(results)) { -curdate <- as.character(results$date[r]) -curid <- as.character(results$id_str[r]) -curissue <- as.character(results$issue[r]) -curtag <- as.character(results$tags[r]) -cat("Sorting match", r, "of 53383 \n") -# Update issue counter (date and issue) -issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1 -# Update tweet dataframe (id, issue and tags) -oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] -tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",") -oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] -tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",") -} -results[33170,] -results[33171,] -results$date[33170] -results$date[33170] <- "2014-08-21" -for(r in 33170:nrow(results)) { -curdate <- as.character(results$date[r]) -curid <- as.character(results$id_str[r]) -curissue <- as.character(results$issue[r]) -curtag <- as.character(results$tags[r]) -cat("Sorting match", r, "of 53383 \n") -# Update issue counter (date and issue) -issues[issues[, "date"] == curdate, curissue] <- issues[issues[, "date"] == curdate, curissue] + 1 -# Update tweet dataframe (id, issue and tags) -oldissue <- tweets[tweets[, "id_str"] == curid, "issue"] -tweets[tweets[, "id_str"] == curid, "issue"] <- str_c(oldissue, curissue, ",") -oldtag <- tweets[tweets[, "id_str"] == curid, "tags"] -tweets[tweets[, "id_str"] == curid, "tags"] <- str_c(oldtag, curtag, ",") -} -save(tweets, file="tweets_tagged.RData") -write.csv(tweets, file="tweets.csv") -save(issues, file="issues.RData") +xlab("Zeitraum") + ylab("Tweets pro Tag") + +scale_colour_discrete(name = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.text = element_text(size = 18)) +g_perday +g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line()+ +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Tweets pro Tag") + +scale_colour_discrete(name = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 18)) +g_perday +g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line()+ +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Tweets pro Tag") + +scale_colour_discrete(name = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 12)) +g_perday +g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line()+ +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Tweets pro Tag") + +scale_colour_discrete(name = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 13)) +g_perday +g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line()+ +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Tweets pro Tag") + +scale_colour_discrete(name = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) + +theme(legend.title = element_text(size=14, face="plain")) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 13)) +g_perday +g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line()+ +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Tweets pro Tag") + +scale_colour_discrete(name = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 13)) +g_perday +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie") + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 13)) +g_entrop +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie")# + +# theme(legend.title = element_text(size=14)) + +# theme(legend.text = element_text(size=12)) + +# theme(axis.title = element_text(size = 13)) +g_entrop +detach("package:ggplot2", unload=TRUE) +library("ggplot2", lib.loc="/usr/lib/R/site-library") +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie")# + +# theme(legend.title = element_text(size=14)) + +# theme(legend.text = element_text(size=12)) + +# theme(axis.title = element_text(size = 13)) +g_entrop +theme() require(stringr) require(reshape2) require(ggplot2) require(vars) -drop_s <- which(str_detect(names(issues), "^s")) -drop_i <- which(str_detect(names(issues), "^i")) -issues_i <- issues[,-drop_s] -issues_s <- issues[,-drop_i] -issues_i$total <- rowSums(issues_i[2:ncol(issues_i)]) -issues_i$entropy <- 0 -for(r in 1:nrow(issues_i)) { -curtotal <- as.numeric(issues_i$total[r]) -curp <- 0 -for(c in 2:ncol(issues_i)) { -curcount <- as.numeric(issues_i[r,c]) -curp[c] <- curcount / curtotal -} -curp <- curp [2:length(curp)-2] -curdrop <- which(curp==0) -curp <- curp[-curdrop] -issues_i$entropy[r] <- sum(-1 * curp * log(curp)) -} -issues_s$total <- rowSums(issues_s[2:ncol(issues_s)]) -issues_s$entropy <- 0 -for(r in 1:nrow(issues_s)) { -curtotal <- as.numeric(issues_s$total[r]) -curp <- 0 -for(c in 2:ncol(issues_s)) { -curcount <- as.numeric(issues_s[r,c]) -curp[c] <- curcount / curtotal -} -curp <- curp [2:length(curp)-2] -curdrop <- which(curp==0) -curp <- curp[-curdrop] -issues_s$entropy[r] <- sum(-1 * curp * log(curp)) -} -stats_total <- data.frame(date=drange) -stats_total$tpd <- 0 -stats_total$ipd <- issues_i$total -stats_total$spd <- issues_s$total -# Total number of tweets per day over time -for(r in 1:length(drange)) { -stats_total$tpd[r] <- length(tweets[tweets[, "created_at"] == drange[r], "id_str"]) -} -g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + +theme() +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie")# + +# theme(legend.title = element_text(size=14)) + +# theme(legend.text = element_text(size=12)) + +# theme(axis.title = element_text(size = 13)) +g_entrop +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie") +g_entrop +g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + geom_line()+ +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Tweets pro Tag") + +scale_colour_discrete(name = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) +g_perday +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie") +g_entrop +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) -g1 +g_entrop stats_entropy <- data.frame(date=drange) stats_entropy$entropy <- issues_i$entropy stats_entropy <- melt(stats_entropy, id="date") -g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + geom_line() + -geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) -g1 -test <- VAR(issues[,2:32], p=1, type="none") -View(issues_i) -View(issues_s) -View(issues) -test <- VAR(issues[,2:44], p=1, type="none") -VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22]) -plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22]))) -rm(c_correct, c_curissue, c_errcode, c_errid, c_errissue, c_error1, c_error2, c_errors) -rm(c_issues, c_issuelist, c_issueheads) -rm(c_errtags, c_errtext, c_result, c_tag, c_tmp, c_tweets) +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie") +g_entrop +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie") + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 13)) +g_entrop +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie") + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 14)) +g_entrop +g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line()+ +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Tweets pro Tag") + +scale_colour_discrete(name = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 14)) +g_perday +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie") + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 14)) +g_entrop +acc_parties <- data.frame(party = c("cducsu", "spd", "linke", "gruene")) +acc_parties$btw13 <- c(49.3, 30.6, 10.1, 10.0) # seats of party / 631 seats +acc_parties$twitter <- 0 +for(p in 1:nrow(acc_parties)) { +acc_parties$twitter[p] <- round(nrow(acc_df[acc_df$party == as.character(acc_parties$party[p]), ]) / 280 * 100) +} +require(jsonlite) require(stringr) -require(XML) -readYN <- function(question) { -n <- readline(prompt=question) -n <- as.character(n) -return(n) -} -checkIssue <- function(string, issuelist) { -status <- any(str_detect(string, issuelist)) -return(status) -} -checkAllIssues <- function(string, issuelist) { -status <- NULL -for(i in 1:length(string)) { -if(checkIssue(string[i], issuelist)) { -status[i] <- TRUE -} -else { -cat("Issue",string[i],"does not exist. Please try again.\n") -status[i] <- FALSE -} -} -return(status) -} -View(tweets) -c_tweets <- read.csv("tweets.csv", colClasses="character") -for(r in 1:nrow(c_tweets)) { -curtext <- as.character(c_tweets$text[r]) -if(str_detect(curtext, "\"")) { -c_tweets$text[r] <- str_replace(curtext, "\"", "") -} -} -c_tweets$X <- NULL -c_issues <- data.frame(date = drange) -c_issuelist <- xmlToList("issues-v2.xml") -c_issueheads <- names(issuelist) -c_issues[issueheads] <- 0 -source("issuecomp-codingsample-function.R") -require(stringr) -curtext -curtext <- str_replace_all(curtext, "#", "") -curtext <- str_replace_all(curtext, "-", " ") -curtext -curtext -str_replace_all(curtext, "[^[:alnum:]]", "") -str_replace_all(curtext, "[^[:alnum:]\s]", "") -str_replace_all(curtext, "[^[:alnum:]\\s]", "") -str_replace_all(curtext, "[^[:alnum:]^\\s]", "") -str_replace_all(curtext, "[^[:alnum:]^\\S]", "") -str_replace_all(curtext, "[^[:alnum:]][^\\s]", "") -str_replace_all(curtext, "[^[:alnum:]][^\\S]", "") -str_replace_all(curtext, "[^[:alnum:]][^[:blank]]", "") -str_replace_all(curtext, "[^[:alnum:]][^[:blank:]]", "") -str_replace_all(curtext, "[^[:alnum:]]", "") -str_replace_all(curtext, "\\W", "") -str_replace_all(curtext, "[\\W|\\S]", "") -str_replace_all(curtext, "(\\W|\\S)", "") -str_replace_all(curtext, "\\W|\\S", "") -str_replace_all(curtext, "\\W", "") -str_replace_all(curtext, "[\\W\\S]", "") -str_replace_all(curtext, "[\\S\\W]", "") -str_replace_all(curtext, "[\\s\\W]", "") -str_replace_all(curtext, "[\\W\\s]", "") -str_replace_all(curtext, "[\\W\s]", "") -str_replace_all(curtext, "[\\Ws]", "") -str_replace_all(curtext, "[\\W]", "") -str_replace_all(curtext, "\\W", "") -str_replace_all(curtext, "\\W|\\S", "") -str_replace_all(curtext, "\\W|\\s", "") -str_replace_all(curtext, "[^[:alnum:]]", "") -str_replace_all(curtext, "[^[:alnum:] ]", "") -str_replace_all(curtext, "[^[:alnum:]\\s]", "") -str_replace_all(curtext, "[^[:alnum:] ]", "") -curtext -curtext <- "liebe @cdu, wir finden #Steuer gut, aber die KFZ-Steuer nicht!" -curtext <- str_replace_all(curtext, "-", " ") -curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "") -curtext -curtext <- "liebe @cdu, wir finden #Steuer gut, aber die KFZ--Steuer nicht!" -curtext <- str_replace_all(curtext, "-", " ") -curtext <- str_replace_all(curtext, "[^[:alnum:] ]", "") -curtext -str_replace_all(curtext, " ", " ") -smartPatternMatch -require(vars) -require(stringr) -adf1 <- summary(ur.df(issues)) -issues -summary(issues) -summary(issues[2:44]) -summary(issues[2:44], digits = 2) -adf1 <- summary(ur.df(issues[, 2:44]), type ="trend", lags=1) -data("Canda") -data("Canada") -class(Canada) -class(issues) -view(Canada) -View(Canada) -as.ts(issues) -issues_ts <- as.ts(issues) -class(issues_ts) -View(issues_ts) -View(issues) -adf1 <- summary(ur.df(issues_ts[, 2:44]), type ="trend", lags=1) -adf1 <- summary(ur.df(issues_ts[, 2]), type ="trend", lags=1) -adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1)) -adf1 <- summary(ur.df(issues_ts[, 2:44], type ="trend", lags=1)) -adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1)) -adf1 -adf1 <- summary(ur.df(issues_ts[, 3], type ="trend", lags=1)) -adf1 -adf1 <- summary(ur.df(issues_ts[, 2], type ="none", lags=1)) -adf1 -adf1 <- summary(ur.df(issues_ts[, 2], type ="trend", lags=1)) -adf1 -summary(ur.df(issues_ts[, 2], type ="none", lags=1)) -VARselect(issues_ts[2:44], lag.max = 8, type = "both") -VARselect(issues_ts[1:44], lag.max = 8, type = "both") -VARselect(issues[1:44], lag.max = 8, type = "both") -VARselect(issues[2:44], lag.max = 8, type = "both") -VARselect(issues_ts[2:44], lag.max = 8, type = "both") -VARselect(issues[2:44], lag.max = 8, type = "none") -VARselect(issues[2:44], lag.max = 8, type = "trend") -VARselect(issues[2:44], lag.max = 8, type = "const") -VARselect(issues[2:44], lag.max = 8, type = "both") -test <- VAR(issues[,2:44], p=1, type="both") -# VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22]) -plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22]))) -summary(ur.df(issues_ts[, 2], type ="both", lags=1)) -summary(ur.df(issues_ts[, 2], type ="none", lags=1)) -test <- VAR(issues_ts[,2:44], p=1, type="both") -plot(irf(test, impulse = names(issues_s[2:23]), response = names(issues_i[2:22]))) +require(devtools) +require(RTwitterAPI) acc_df <- read.csv("MdB-twitter.csv") delrow <- NULL for(r in 1:nrow(acc_df)) { @@ -288,225 +182,331 @@ acc_df <- acc_df[-delrow, ] rm(delrow, r, acc) acc_df$row.names <- NULL row.names(acc_df) <- NULL -View(acc_df) -View(c_tweets) -issueheads -length(issueheads) -issuelist -length(issuelist) -length(issuelist[*]) -length(issuelist[[*]]) -length(issuelist[1:43]) -length(issuelist[1) -length(issuelist[1]) -length(issuelist[2]) -length(issuelist[[1]]) -length(issuelist[[2]]) -length(issuelist[[70]]) -length(issuelist[[43]]) -length(issuelist[[44]]) -length(issuelist[[1:43]]) -length(issuelist[[1-43]]) -length(issuelist[[2]]) -test <- 0 -num <- 0 -for(i in 1:length(issuelist)) { -j <- length(issuelist[[i]]) -num <- num + j -rm(j) +acc_parties <- data.frame(party = c("cducsu", "spd", "linke", "gruene")) +acc_parties$btw13 <- c(49.3, 30.6, 10.1, 10.0) # seats of party / 631 seats +acc_parties$twitter <- 0 +for(p in 1:nrow(acc_parties)) { +acc_parties$twitter[p] <- round(nrow(acc_df[acc_df$party == as.character(acc_parties$party[p]), ]) / 280 * 100) } -num -drop_s <- which(str_detect(names(issues), "^s")) -drop_i <- which(str_detect(names(issues), "^i")) -issues_i <- issues[,-drop_s] -issues_s <- issues[,-drop_i] -require(stringr) -drop_s <- which(str_detect(names(issues), "^s")) -drop_i <- which(str_detect(names(issues), "^i")) -issues_i <- issues[,-drop_s] -issues_s <- issues[,-drop_i] -issues_i$total <- rowSums(issues_i[2:ncol(issues_i)]) -issues_i$entropy <- 0 -for(r in 1:nrow(issues_i)) { -curtotal <- as.numeric(issues_i$total[r]) -curp <- 0 -for(c in 2:ncol(issues_i)) { -curcount <- as.numeric(issues_i[r,c]) -curp[c] <- curcount / curtotal +pie(acc_parties$btw13, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T, +main = "Seats of parties in the parliament") +pie(acc_parties$twitter, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T, +main = "Percentage of parties' MdBs of all Twitter accounts") +pie(acc_parties$btw13, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T) +pie(acc_parties$twitter, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T) +View(acc_parties) +pie(acc_parties$btw13, col=c("black", "red", "purple", "green"), +labels = c("CDU/CSU (49.3%)", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T) +pie(acc_parties$btw13, col=c("black", "red", "purple", "green"), +labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Die LINKE (10,1%)", "Bündnis 90/Grüne(10.0%)"), +clockwise = T) +acc_parties <- data.frame(party = c("cducsu", "spd", "gruene", "linke")) +acc_parties$btw13 <- c(49.3, 30.6, 10.0, 10.1) # seats of party / 631 seats +acc_parties$twitter <- 0 +for(p in 1:nrow(acc_parties)) { +acc_parties$twitter[p] <- round(nrow(acc_df[acc_df$party == as.character(acc_parties$party[p]), ]) / 280 * 100) } -curp <- curp [2:length(curp)-2] -curdrop <- which(curp==0) -curp <- curp[-curdrop] -issues_i$entropy[r] <- sum(-1 * curp * log(curp)) -} -issues_s$total <- rowSums(issues_s[2:ncol(issues_s)]) -issues_s$entropy <- 0 -for(r in 1:nrow(issues_s)) { -curtotal <- as.numeric(issues_s$total[r]) -curp <- 0 -for(c in 2:ncol(issues_s)) { -curcount <- as.numeric(issues_s[r,c]) -curp[c] <- curcount / curtotal -} -curp <- curp [2:length(curp)-2] -curdrop <- which(curp==0) -curp <- curp[-curdrop] -issues_s$entropy[r] <- sum(-1 * curp * log(curp)) -} -stats_total <- data.frame(date=drange) -stats_total$tpd <- 0 -stats_total$ipd <- issues_i$total -stats_total$spd <- issues_s$total -# Total number of tweets per day over time -for(r in 1:length(drange)) { -stats_total$tpd[r] <- length(tweets[tweets[, "created_at"] == drange[r], "id_str"]) -} -stats_melt <- melt(stats_total, id="date") -g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + -geom_line()+ -geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) -g1 -require(ggplot2) -stats_melt <- melt(stats_total, id="date") -g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + -geom_line()+ -geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) -g1 -g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + -geom_line()+ -geom_smooth(size=1,formula = y ~ x, method="lm", se=FALSE, color=1) -g1 -g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + -geom_line()+ -geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) -g1 -# Visuals for entropy in time series -stats_entropy <- data.frame(date=drange) -stats_entropy$entropy <- issues_i$entropy -stats_entropy <- melt(stats_entropy, id="date") -require(reshape2) -stats_melt <- melt(stats_total, id="date") -g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + -geom_line()+ -geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) -g1 -stats_entropy <- data.frame(date=drange) -stats_entropy$entropy <- issues_i$entropy -stats_entropy <- melt(stats_entropy, id="date") -g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + -geom_line() + -geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) -g1 -g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + -geom_line() + -geom_smooth(size=1,formula = y ~ x, method="lm", se=FALSE, color=1) -g1 -vIssues <- VAR(issues_ts[,2:44], p=1, type="both") -require(vars) -VARselect(issues_ts, lag.max = 8, type = "both") -vIssues <- VAR(issues_ts[,2:44], p=1, type="both") -VARselect(issues_ts, lag.max = 16, type = "both") -VARselect(issues_ts, lag.max = 4, type = "both") -VARselect(issues_ts, lag.max = 5, type = "both") -VARselect(issues_ts, lag.max = 6, type = "both") -VARselect(issues_ts, lag.max = 5, type = "both") -names(issues_ts) -issues_ts -issues_ts[2:44] -issues_ts <- as.ts(issues[,2:44]) -issues_ts[1:1] -issues_ts[,1] -issues_ts[1,1] -issues_ts2,1] -issues_ts[2,1] +pie(acc_parties$btw13, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10.0%)", "Die LINKE (10,1%)"), +clockwise = T) +pie(acc_parties$btw13, col=c("black", "red", "green", "purple"), +pie(acc_parties$btw13, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10,0%)", "Die LINKE (10,1%)"), +clockwise = T) +pie(acc_parties$btw13, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10,0%)", "Die LINKE (10,1%)"), +clockwise = T) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), +clockwise = T) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), +clockwise = T, init.angle = 90) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), +clockwise = T) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), +clockwise = T, init.angle = 180) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), +clockwise = T, init.angle = 270) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), +clockwise = T, init.angle = 360) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), +clockwise = T, init.angle = 20) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), +clockwise = T, init.angle = 20) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), +clockwise = T, init.angle = 90) +pie(acc_parties$btw13, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10,0%)", "Die LINKE (10,1%)"), +clockwise = T) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), +labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), +clockwise = T) +2359 / 200 * 100 issues_ts <- as.ts(issues[,2:44]) VARselect(issues_ts, lag.max = 5, type = "both") -VARselect(issues_ts, lag.max = 8, type = "both") -VARselect(issues_ts, lag.max = 7, type = "both") -VARselect(issues_ts, lag.max = 5, type = "both") -vIssues <- VAR(issues_ts[,2:44], p=5, type="both") vIssues <- VAR(issues_ts, p=5, type="both") -plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22]))) -require(stringr) -require(XML) -c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") -names(c_errors) <- c("str_id", "code", "issue", "tags", "text") -for(r in 1:nrow(c_errors)) { -c_errcode <- as.character(c_errors$code[r]) -c_errissue <- as.character(c_errors$issue[r]) -c_errtags <- as.character(c_errors$tags[r]) -c_errtext <- as.character(c_errors$text[r]) -c_errid <- as.character(c_errors$str_id[r]) -cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") -source("issuecomp-codingsample-function2.R") +vIssues <- VAR(issues_ts, p=1, type="both") +issues_ts <- as.ts(issues) +VARselect(issues[2:44], lag.max = 8, type = "both") +summary(ur.df(issues_ts[, 2], type ="none", lags=1)) +VARselect(issues_ts, lag.max = 5, type = "both") +issues_ts <- as.ts(issues[,2:44]) +VARselect(issues_ts, lag.max = 5, type = "both") +VARselect(issues_ts, lag.max = 5, type = "both") +VARselect(issues_ts, lag.max = 5, type = "both") +VARselect(issues_ts, lag.max = 5, type = "both") +VARselect(issues_ts, lag.max = 5, type = "both") +VARselect(issues_ts, lag.max = 5, type = "both") +VARselect(issues_ts, lag.max = 5, type = "both") +summary(ur.df(issues_ts[, 2], type ="none", lags=1)) +ur.df(issues_ts[, 2], type ="none", lags=1) +head(issues_ts) +issues_ts$i1.macro +issues_ts[, "i1.macro"] +summary(ur.df(issues_ts[, "i1.macro"], type ="none", lags=1)) +ncol(issues_ts) +for(i in 2:ncol(issues_ts)) { +summary(ur.df(issues_ts[, i], type ="none", lags=1)) } -for(r in 1:nrow(c_errors)) { -c_errcode <- as.character(c_errors$code[r]) -c_errissue <- as.character(c_errors$issue[r]) -c_errtags <- as.character(c_errors$tags[r]) -c_errtext <- as.character(c_errors$text[r]) -c_errid <- as.character(c_errors$str_id[r]) -cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") -source("issuecomp-codingsample-function2.R") +cat(summary(ur.df(issues_ts[, "i1.macro"], type ="none", lags=1))) +paste(summary(ur.df(issues_ts[, "i1.macro"], type ="none", lags=1))) +i +summary(ur.df(issues_ts[, i], type ="none", lags=1)) +summary(ur.df(issues_ts[, 1], type ="none", lags=1)) +summary(ur.df(issues_ts[, 2], type ="none", lags=1)) +summary(ur.df(issues_ts[, 1], type ="none", lags=1)) +summary(ur.df(issues_ts[, 1], type ="none", lags=1)) +names(issues_ts) +issues_ts[1,] +summary(ur.df(issues_ts[, 1], type ="none", lags=1)) +summary(ur.df(issues_ts[, 2], type ="none", lags=1)) +summary(ur.df(issues_ts[, 43], type ="none", lags=1)) +summary(ur.df(issues_ts[, 43], type ="trend", lags=1)) +summary(ur.df(issues_ts[, 43], type ="none", lags=1)) +summary(ur.df(issues_ts[, 1], type ="none", lags=1)) +ur.df(issues_ts[, 1], type ="none", lags=1) +for(i in 2:ncol(issues_ts)) { +ur.df(issues_ts[, i], type ="none", lags=1) } +ur.df(issues_ts[, i], type ="none", lags=1) +test <- ur.df(issues_ts[, i], type ="none", lags=1) +test +for(i in 2:ncol(issues_ts)) { +test <- ur.df(issues_ts[, i], type ="none", lags=1) +cat(test) +} +class(test) +as.character(test) +for(i in 2:ncol(issues_ts)) { +test[i] <- ur.df(issues_ts[, i], type ="none", lags=1) +} +ur.df(issues_ts[, 1], type ="none", lags=1) +ur.df(issues_ts[, sprintf("%s", c(1,2,3))], type ="none", lags=1) +ur.df(issues_ts[, sprintf("%i", c(1,2,3))], type ="none", lags=1) +ur.df(issues_ts[, 2], type ="none", lags=1) +ur.df(issues_ts[, 3], type ="none", lags=1) +i <- 0 +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +i issueheads -for(i in 1:length(issueheads)) {paste(issueheads[i])} -for(i in 1:length(issueheads)) {cat(issueheads[i], "\n")} -for(r in 1:nrow(c_errors)) { -c_errcode <- as.character(c_errors$code[r]) -c_errissue <- as.character(c_errors$issue[r]) -c_errtags <- as.character(c_errors$tags[r]) -c_errtext <- as.character(c_errors$text[r]) -c_errid <- as.character(c_errors$str_id[r]) -cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") -source("issuecomp-codingsample-function2.R") +length(issuelist[[2]]) +# List all issues in one row +for(i in 1:length(issueheads)) { +cat(issueheads[i], "\n") } -for(r in 1:nrow(c_errors)) { -c_errcode <- as.character(c_errors$code[r]) -c_errissue <- as.character(c_errors$issue[r]) -c_errtags <- as.character(c_errors$tags[r]) -c_errtext <- as.character(c_errors$text[r]) -c_errid <- as.character(c_errors$str_id[r]) -cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") -source("issuecomp-codingsample-function2.R") -} -c_errors <- read.csv("issuecomp-codingsample-error.csv", header = F, sep=",", colClasses="character") -names(c_errors) <- c("str_id", "code", "issue", "tags", "text") -for(r in 1:nrow(c_errors)) { -c_errcode <- as.character(c_errors$code[r]) -c_errissue <- as.character(c_errors$issue[r]) -c_errtags <- as.character(c_errors$tags[r]) -c_errtext <- as.character(c_errors$text[r]) -c_errid <- as.character(c_errors$str_id[r]) -cat("===============\n\n[TWEET]: ",c_errtext,"\n[ISSUES]: ", c_errissue, " (", c_errtags, ")\n", sep="") -source("issuecomp-codingsample-function2.R") -} -c_tmp <- read.csv("issuecomp-codingsample-error1.csv", header = F, colClasses="character") -names(c_tmp) <- c("str_id", "all", "wrong", "tagged", "text") -c_error1 <- c_tmp[, c("wrong", "tagged", "all", "text")] -c_tmp <- read.csv("issuecomp-codingsample-error2.csv", header = F, colClasses="character") -names(c_tmp) <- c("str_id", "all", "missing", "tagged", "text") -c_error2 <- c_tmp[, c("missing", "text", "tagged", "all")] -View(c_error2) -summary(ur.df(issues_ts[, 2], type ="none", lags=1)) +vIssues <- VAR(issues_ts, p=1, type="both") +issues_ts[1:20,1] +issues_ts[,1] +issues_ts[1,] +issues_ts[1:21,] +issues_ts[,1:21] +issues_ts[1,1:21] +issues_ts[1,22:43] +issues_ts[1,22:44] +issues_ts[1,22:43] +plot(irf(vIssues, impulse = names(issues_ts[1:21]), response = names(issues_i[22:43]))) +require(stringr) +require(reshape2) +require(ggplot2) require(vars) -summary(ur.df(issues_ts[, 2], type ="none", lags=1)) -stability(vIssues) -stability(vIssues[2:]) -stability(vIssues[2:44]) -plot(stability(vIssues)) -class(vIssues) -summary(vIssues) -plot(stability(vIssues[2])) -plot(stability(vIssues), nc=2) -plot(stability(vIssues), h=0.15) -stability(vIssues) -efp(formula = formula, data = data, type = type, h = h, dynamic = dynamic, -rescale = rescale) -plot(stability(vIssues), h=0.15) -plot(stability(vIssues, h=0.15)) -plot(stability(vIssues, h=0.15, rescale = TRUE)) -plot(stability(vIssues, h=0.15, rescale = TRUE), nc=2) -par("mar") -par(mar=c(1,1,1,1)) -plot(stability(vIssues, h=0.15, rescale = TRUE), nc=2) +vIssues +plot(irf(vIssues, impulse = names(issues_ts[1:21]), response = names(issues_i[22:43]))) +plot(irf(vIssues, impulse = names(issues_ts[1:21]), response = names(issues_ts[22:43]))) +issues_s +names(issues_s) +names(issues_s[2:23]) +names(issuesi[2:22]) +names(issues_i[2:22]) +plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22]))) +plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22]))) +plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22]))) +irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])) +vIRF <- irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22])) +summary(vIRF) +vIRF$irf +vIRF$boot +vIRF$ortho +vIRF$Lower +vIRF$irf[1] +vIRF$irf["s.boko"] +summary(issues$hk) +summary(issues$s.hk) +summary(issues$s.nsa) +summary(issues$s.gaza) +summary(issues$s.boko) +summary(issues$s.ebola) +summary(issues$s.edathy) +summary(issues$s.ferguson) +summary(issues$s.gurlitt) +summary(issues$s.is) +summary(issues$s.pegida) +summary(issues$s.schumi) +summary(issues$s.tebartz) +summary(issues$s.wm) +summary(issues$s.wulff) +plot(vIRF) +names(issues) +summary(issues[2:44]) +plot(vIRF) +plot(vIRF, x=300, y=200) +plot(vIRF, res = 300) +plot(vIRF[1]) +plot(vIRF$irf[1]) +summary(issues[2:44]) +ur.df(issues_ts[, i], type ="none", lags=1) +summary(ur.df(issues_ts[, i], type ="none", lags=1)) +summary(ur.df(issues_ts[,30], type ="none", lags=1)) +summary(issues[2]) +stats_entropy +names(issues) +issues_bak <- issues +issues$total <- rowSums(issues[2:ncol(issues)]) +issues$entropy <- 0 +names(issues) +issues$total <- rowSums(issues[2:ncol(issues)]) +issues$entropy <- 0 +for(r in 1:nrow(issues)) { +curtotal <- as.numeric(issues$total[r]) +curp <- 0 +for(c in 2:ncol(issues)) { +curcount <- as.numeric(issues[r,c]) +curp[c] <- curcount / curtotal +} +curp <- curp [2:length(curp)-2] +curdrop <- which(curp==0) +curp <- curp[-curdrop] +issues$entropy[r] <- sum(-1 * curp * log(curp)) +} +stats_entropy <- data.frame(date=drange) +stats_entropy$entropy <- issues$entropy +stats_entropy <- melt(stats_entropy, id="date") +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +geom_line() + +geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + +xlab("Zeitraum") + ylab("Entropie") + +scale_colour_discrete(name = "", labels = "Entropie") + +theme(legend.title = element_text(size=14)) + +theme(legend.text = element_text(size=12)) + +theme(axis.title = element_text(size = 14)) +g_entrop +View(issues) +summary(issues$entropy) +summary(issues$total) +stats_total +summary(issues[2:44]) diff --git a/.gitignore b/.gitignore index eefdaaa..f96c777 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ issuecomp-analysis.log issuecomp-codingsample-correct.csv issuecomp-codingsample-error.csv issuecomp-codingsample-error2.csv +twitter-api-credentials.txt diff --git a/issuecomp-2-analysis.R b/issuecomp-2-analysis.R index 92c228a..c3d2bd1 100644 --- a/issuecomp-2-analysis.R +++ b/issuecomp-2-analysis.R @@ -21,7 +21,7 @@ drange <- date_start + days(0:drange) # Import issues and prepare everything # Will only be filled after the large categorisation loop issues <- data.frame(date = drange) -issuelist <- readLines("issues-v2.xml") +issuelist <- readLines("issues-v3.xml") issuelist <- str_replace_all(string = issuelist, pattern = ".*", "") issuelist <- xmlToList(issuelist) issueheads <- names(issuelist) diff --git a/issuecomp-3-calc.R b/issuecomp-3-calc.R index 55b4cea..52c741d 100644 --- a/issuecomp-3-calc.R +++ b/issuecomp-3-calc.R @@ -7,7 +7,7 @@ require(vars) drop_s <- which(str_detect(names(issues), "^s")) drop_i <- which(str_detect(names(issues), "^i")) issues_i <- issues[,-drop_s] -issues_s <- issues[,-drop_i] +issues <- issues[,-drop_i] # # # ENTROPY @@ -15,7 +15,6 @@ issues_s <- issues[,-drop_i] # Entropy non-sensational issues issues_i$total <- rowSums(issues_i[2:ncol(issues_i)]) issues_i$entropy <- 0 - for(r in 1:nrow(issues_i)) { curtotal <- as.numeric(issues_i$total[r]) curp <- 0 @@ -30,71 +29,102 @@ for(r in 1:nrow(issues_i)) { } # Entropy sensational issues -issues_s$total <- rowSums(issues_s[2:ncol(issues_s)]) -issues_s$entropy <- 0 - -for(r in 1:nrow(issues_s)) { - curtotal <- as.numeric(issues_s$total[r]) +issues$total <- rowSums(issues[2:ncol(issues)]) +issues$entropy <- 0 +for(r in 1:nrow(issues)) { + curtotal <- as.numeric(issues$total[r]) curp <- 0 - for(c in 2:ncol(issues_s)) { - curcount <- as.numeric(issues_s[r,c]) + for(c in 2:ncol(issues)) { + curcount <- as.numeric(issues[r,c]) curp[c] <- curcount / curtotal } curp <- curp [2:length(curp)-2] curdrop <- which(curp==0) curp <- curp[-curdrop] - issues_s$entropy[r] <- sum(-1 * curp * log(curp)) + issues$entropy[r] <- sum(-1 * curp * log(curp)) +} + +# Entropy overall +issues$total <- rowSums(issues[2:ncol(issues)]) +issues$entropy <- 0 +for(r in 1:nrow(issues)) { + curtotal <- as.numeric(issues$total[r]) + curp <- 0 + for(c in 2:ncol(issues)) { + curcount <- as.numeric(issues[r,c]) + curp[c] <- curcount / curtotal + } + curp <- curp [2:length(curp)-2] + curdrop <- which(curp==0) + curp <- curp[-curdrop] + issues$entropy[r] <- sum(-1 * curp * log(curp)) } -# Compare total tweets vs. total issue findings + +# Compare total tweets vs. total sensational & total unsensational stats_total <- data.frame(date=drange) stats_total$tpd <- 0 stats_total$ipd <- issues_i$total -stats_total$spd <- issues_s$total +stats_total$spd <- issues$total # Total number of tweets per day over time for(r in 1:length(drange)) { stats_total$tpd[r] <- length(tweets[tweets[, "created_at"] == drange[r], "id_str"]) } +# VISUALS: Tweets per day vs. sensational vs. general findings stats_melt <- melt(stats_total, id="date") -g1 <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + +g_perday <- ggplot(data = stats_melt, aes(x=date,y=value,colour=variable, group=variable)) + geom_line()+ - geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) -g1 + geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + + xlab("Zeitraum") + ylab("Tweets pro Tag") + + scale_colour_discrete(name = "Tweets", labels = c("Gesamt", "Allgemein", "Sensation")) + + theme(legend.title = element_text(size=14)) + + theme(legend.text = element_text(size=12)) + + theme(axis.title = element_text(size = 14)) +g_perday # Visuals for entropy in time series stats_entropy <- data.frame(date=drange) -stats_entropy$entropy <- issues_i$entropy +stats_entropy$entropy <- issues$entropy stats_entropy <- melt(stats_entropy, id="date") -g1 <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + +g_entrop <- ggplot(data = stats_entropy, aes(x=date,y=value,colour=variable, group=variable)) + geom_line() + - geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) -g1 + geom_smooth(size=1,formula = y ~ x, method="loess", se=FALSE, color=1) + + xlab("Zeitraum") + ylab("Entropie") + + scale_colour_discrete(name = "", labels = "Entropie") + + theme(legend.title = element_text(size=14)) + + theme(legend.text = element_text(size=12)) + + theme(axis.title = element_text(size = 14)) +g_entrop # VAR --------------------------------------------------------------------- # test <- VAR(issues[,2:32], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE")) -# test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues_s[,2:3]) -# test <- VAR(issues_s[,2:11], p=1, type="none") -# VAR(issues_s[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22]) +# test <- VAR(issues_i[,2:22], p=1, type="none", exogen = issues[,2:3]) +# test <- VAR(issues[,2:11], p=1, type="none") +# VAR(issues[,2:23], p=1, type=c("const", "trend", "both", "none"), season=NULL, exogen = issues_i[2:22]) issues_ts <- as.ts(issues[,2:44]) -VARselect(issues_ts, lag.max = 5, type = "both") -vIssues <- VAR(issues_ts, p=5, type="both") - -plot(irf(vIssues, impulse = names(issues_s[2:23]), response = names(issues_i[2:22]))) - -capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt") # Tests -issues_ts <- as.ts(issues) -VARselect(issues[2:44], lag.max = 8, type = "both") -summary(ur.df(issues_ts[, 2], type ="none", lags=1)) +VARselect(issues_ts, lag.max = 5, type = "both") +i <- 0 +i <- i + 1 +ur.df(issues_ts[, i], type ="none", lags=1) +summary(issues[2:44]) + + +# VAR and IRF +vIssues <- VAR(issues_ts, p=1, type="both") +vIRF <- irf(vIssues, impulse = names(issues[2:23]), response = names(issues_i[2:22])) +plot(vIRF) + +# capture.output(print(summary(test), prmsd=TRUE, digits=1), file="out.txt") # SOME TESTS -------------------------------------------------------------- @@ -116,16 +146,18 @@ rm(g1, r) # Show party percentage of twitter users -acc_parties <- data.frame(party = c("cducsu", "spd", "linke", "gruene")) -acc_parties$btw13 <- c(49.3, 30.6, 10.1, 10.0) # seats of party / 631 seats +acc_parties <- data.frame(party = c("cducsu", "spd", "gruene", "linke")) +acc_parties$btw13 <- c(49.3, 30.6, 10.0, 10.1) # seats of party / 631 seats acc_parties$twitter <- 0 for(p in 1:nrow(acc_parties)) { acc_parties$twitter[p] <- round(nrow(acc_df[acc_df$party == as.character(acc_parties$party[p]), ]) / 280 * 100) } -pie(acc_parties$btw13, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T, - main = "Seats of parties in the parliament") -pie(acc_parties$twitter, col=c("black", "red", "purple", "green"), labels = c("CDU/CSU", "SPD", "Die LINKE", "Bündnis 90/Grüne"), clockwise = T, - main = "Percentage of parties' MdBs of all Twitter accounts") +pie(acc_parties$btw13, col=c("black", "red", "green", "purple"), + labels = c("CDU/CSU (49,3%)", "SPD (30,6%)", "Bündnis 90/Grüne(10,0%)", "Die LINKE (10,1%)"), + clockwise = T) +pie(acc_parties$twitter, col=c("black", "red", "green", "purple"), + labels = c("CDU/CSU (36%)", "SPD (30%)", "Bündnis 90/Grüne(19%)", "Die LINKE (15%)"), + clockwise = T) rm(acc_parties, p) @@ -152,6 +184,11 @@ ggplot(issues_melt,aes(x=date,y=value,colour=variable,group=variable)) + geom_sm # POSSIBLY USEFUL CODE ---------------------------------------------------- +# List all issues in one row +for(i in 1:length(issueheads)) { + cat(issueheads[i], "\n") +} + # Limits of list length(issuelist) length(issuelist[[2]]) diff --git a/twitter-api-credentials.txt b/twitter-api-credentials.txt deleted file mode 100644 index 3b7d2a8..0000000 --- a/twitter-api-credentials.txt +++ /dev/null @@ -1,5 +0,0 @@ -# 1. line: consumer key, 2. consumer secret, 3. oauth token, 4. oauth secret -c9Ob2fWNSONMC0mA2JlNaeRke -cZ3Il2hmbLgK0Lc57mj5kUvymjVdsmZKYwKOGHR3NhCpvWgEOI -1007025684-RFxCDFc4OPkt02bASmdci00TB4jgaPjfqxLRT58 -rvfv8MgexFKTqrPNSoGrdrZVNhV4fTJb2Bgz249nbvKNg diff --git a/twitter-api-credentials.txt.sample b/twitter-api-credentials.txt.sample new file mode 100644 index 0000000..54b1b3b --- /dev/null +++ b/twitter-api-credentials.txt.sample @@ -0,0 +1,5 @@ +# 1. line: consumer key, 2. consumer secret, 3. oauth token, 4. oauth secret +xxxxxxxxxxxxxxxxxxxx +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +xxxxxxxxxxxxxxxxxxxxxxxxxxxxx diff --git a/ur.df-testvalues.txt b/ur.df-testvalues.txt new file mode 100644 index 0000000..82aff49 --- /dev/null +++ b/ur.df-testvalues.txt @@ -0,0 +1,22 @@ +i1.macro & -5,869 & s.nsa & -7,3292 \\ +i2.civil & -4,4172 & s.is & -7,762 \\ +i3.health & -9,5973 & s.ebola & -6,1723 \\ +i4.agrar & -8,5183 & s.edathy & -7,0335 \\ +i5.labor & -7,523 & s.ukraine & -5,6195 \\ +i6.edu & -6,4374 & s.hk & -6,7599 \\ +i7.env & -7,1426 & s.mh17 & -6,1481 \\ +i8.energy & -7,3613 & s.gaza & -5,3861 \\ +i10.trans & -5,6718 & s.ferguson & -8,8098 \\ +i12.law & -5,9882 & s.boko & -10,4431 \\ +i13.social & -6,7765 & s.pegida & -6,3831 \\ +i14.house & -8,9577 & s.schumi & -12,4947 \\ +i15.finance & -5,9094 & s.mh370 & -7,8991 \\ +i16.defense & -6,9535 & s.esc & -11,6332 \\ +i17.science & -5,9651 & s.wulff & -12,4189 \\ +i18.trade & -7,6121 & s.tebartz & -12,6301 \\ +i19.ib & -3,8057 & s.gurlitt & -10,7665 \\ +i20.pubadmin & -6,7123 & s.hoen & -9,3721 \\ +i21.publand & -9,9521 & s.pistorius & -13,435 \\ +i24.stateadm & -13,435 & s.philae & -12,7024 \\ +i25.reuni & -5,2011 & s.wm & -8,7283 \\ + & & s.sotschi & -8,2339 \\