better year handling, removed early ssh pfadiskn clean, added better twitter-acc-list

This commit is contained in:
2015-01-10 01:50:01 +01:00
parent 34807191b9
commit b85be742d4
11 changed files with 2778 additions and 577 deletions
+163 -510
View File
@@ -1,512 +1,165 @@
cat("Positive in", curissue,"\n")
setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
read.csv("politiker2.csv")
read.csv("politiker2.csv")
list1 <- read.csv("politiker2.csv")
View(list1)
list1$name
list1$name[1]
for(i in 1:length(list1)) {
lastname <- as.character(list1$name[i])
}
} # /for issuelist
} # /for tweets_curday
} # /for drange
d
for(d in 1:length(drange)) {
# Put all tweets from specific day in a temporary DF
tweets_curday <- tweets[tweets[, "created_at"] == drange[d], ]
for(t in 1:nrow(tweets_curday)){
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
curtext <- tolower(as.character(tweets_curday$text[t]))
curtext <- str_replace_all(text, "#", "")
for(i in 1:length(issuelist)) {
curtags <- as.character(issuelist[[i]])
curissue <- names(issuelist)[i]
tags_found <- str_detect(curtext, sprintf("%s", tags))
tags_found <- any(tags_found)
if(tags_found) {
cat("Positive in", curissue,"from",drange[d],"\n")
}
} # /for issuelist
} # /for tweets_curday
} # /for drange
View(tweets_curday)
tags_found
drange[d]
as.character(drange[d])
cat(as.character(drange[d]))
cat(as.character(drange[d])
cat(drange[d])
for(d in 1:length(drange)) {
# Put all tweets from specific day in a temporary DF
tweets_curday <- tweets[tweets[, "created_at"] == drange[d], ]
for(t in 1:nrow(tweets_curday)){
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
curtext <- tolower(as.character(tweets_curday$text[t]))
curtext <- str_replace_all(curtext, "#", "")
for(i in 1:length(issuelist)) {
curtags <- as.character(issuelist[[i]])
curissue <- names(issuelist)[i]
tags_found <- str_detect(curtext, sprintf("%s", tags))
tags_found <- any(tags_found)
if(tags_found) {
cat("Positive in", curissue,"from",as.character(drange[d]),"\n")
}
} # /for issuelist
} # /for tweets_curday
} # /for drange
View(tweets_curday)
curtext
for(i in 1:length(issuelist)) {
curtags <- as.character(issuelist[[i]])
curissue <- names(issuelist)[i]
tags_found <- str_detect(curtext, sprintf("%s", tags))
tags_found <- any(tags_found)
if(tags_found) {
cat("Positive in", curissue,"from",as.character(drange[d]),"\n")
}
}
tags_found
for(d in 1:length(drange)) {
# Put all tweets from specific day in a temporary DF
tweets_curday <- tweets[tweets[, "created_at"] == drange[d], ]
for(t in 1:nrow(tweets_curday)){
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
curtext <- tolower(as.character(tweets_curday$text[t]))
curtext <- str_replace_all(curtext, "#", "")
for(i in 1:length(issuelist)) {
curtags <- as.character(issuelist[[i]])
curissue <- names(issuelist)[i]
tags_found <- str_detect(curtext, sprintf("%s", tags))
tags_found <- any(tags_found)
if(tags_found) {
cat("Positive in", curissue,"from",as.character(drange[d]),"\n")
}
else {
cat("Nothing found\n")
}
} # /for issuelist
} # /for tweets_curday
} # /for drange
View(issues)
curissue
issues[1,2]
issues[1,]
issues[1,curissue]
issues[2,curissue]
issues[t,curissue]
drange[d]
issues$date[d]
for(d in 1:nrow(issues)) {
curdate <- issues$date[d]
# Put all tweets from specific day in a temporary DF
tweets_curday <- tweets[tweets[, "created_at"] == curdate, ]
for(t in 1:nrow(tweets_curday)){
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
curtext <- tolower(as.character(tweets_curday$text[t]))
curtext <- str_replace_all(curtext, "#", "")
for(i in 1:length(issuelist)) {
curtags <- as.character(issuelist[[i]])
curissue <- names(issuelist)[i]
tags_found <- str_detect(curtext, sprintf("%s", tags))
tags_found <- any(tags_found)
if(tags_found) {
#cat("Positive in", curissue,"from",as.character(drange[d]),"\n")
issues[d,curissue] <- issues[d,curissue] + 1
}
else {
#cat("Nothing found\n")
}
} # /for issuelist
} # /for tweets_curday
} # /for drange
View(issues)
tags_found
curtags
curissue
curtext
curdate
tags
issues$issue.edathy <- 0
issues$issue.ttip <- 0
View(issues)
for(d in 1:nrow(issues)) {
curdate <- issues$date[d]
# Put all tweets from specific day in a temporary DF
tweets_curday <- tweets[tweets[, "created_at"] == curdate, ]
for(t in 1:nrow(tweets_curday)){
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
curtext <- tolower(as.character(tweets_curday$text[t]))
curtext <- str_replace_all(curtext, "#", "")
for(i in 1:length(issuelist)) {
curtags <- as.character(issuelist[[i]])
curissue <- names(issuelist)[i]
tags_found <- str_detect(curtext, sprintf("%s", curtags))
tags_found <- any(tags_found)
if(tags_found) {
#cat("Positive in", curissue,"from",as.character(drange[d]),"\n")
issues[d,curissue] <- issues[d,curissue] + 1
}
else {
#cat("Nothing found\n")
}
} # /for issuelist
} # /for tweets_curday
} # /for drange
View(issues)
View(tweets_curday)
write.csv(tweets_curday, "tweets_curday.csv")
curtags
curtext <- "Dies ist ein toller Text zum Testen mit Spacko"
curtags <- c("toller", "testen", "pack")
str_detect(curtext, sprintf("%s", curtags))
curtags
str_c("", curtags, "")
str_c(" ", curtags, " ")
curtags <- str_c(" ", curtags, " ")
str_detect(curtext, sprintf("%s", curtags))
curtags <- c("toller", "testen", "pack")
curtext <- tolower(curtext)
str_detect(curtext, sprintf("%s", curtags))
curtext
curtext <- "ein toller text testen(haha) spacko"
"bla"
str_detect(curtext, sprintf("%s", curtags))
str_detect(curtext, "\\Wtesten\\W")
str_detect(curtext, "\\Wtesten\\w")
str_detect(curtext, "\\Wtesten\\W")
str_detect(curtext, "\\Wpack\\W")
str_detect(curtext, "\\Wpacko\\W")
curtags <- str_c("\\W", curtags, "\\W")
str_detect(curtext, sprintf("%s", curtags))
curtags
curtext <- "ein toller text testen-mit spacko"
str_detect(curtext, sprintf("%s", curtags))
curtags
issues$issue.edathy <- 0
issues$issue.ttip <- 0
issuelist <- xmlToList("issues.xml")
issueheads <- names(issuelist)
issues[issueheads] <- 0
View(issues)
for(d in 1:nrow(issues)) {
curdate <- issues$date[d]
# Put all tweets from specific day in a temporary DF
tweets_curday <- tweets[tweets[, "created_at"] == curdate, ]
for(t in 1:nrow(tweets_curday)){
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
curtext <- tolower(as.character(tweets_curday$text[t]))
curtext <- str_replace_all(curtext, "#", "")
for(i in 1:length(issuelist)) {
curtags <- as.character(issuelist[[i]])
curissue <- names(issuelist)[i]
curtags <- str_c("\\W", curtags, "\\W")
tags_found <- str_detect(curtext, sprintf("%s", curtags))
tags_found <- any(tags_found)
if(tags_found) {
#cat("Positive in", curissue,"from",as.character(drange[d]),"\n")
issues[d,curissue] <- issues[d,curissue] + 1
}
else {
#cat("Nothing found\n")
}
} # /for issuelist
} # /for tweets_curday
} # /for drange
View(issues)
issuelist <- xmlToList("issues.xml")
issueheads <- names(issuelist)
issues[issueheads] <- 0
for(d in 1:nrow(issues)) {
curdate <- issues$date[d]
# Put all tweets from specific day in a temporary DF
tweets_curday <- tweets[tweets[, "created_at"] == curdate, ]
for(t in 1:nrow(tweets_curday)){
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
curtext <- tolower(as.character(tweets_curday$text[t]))
curtext <- str_replace_all(curtext, "#", "")
for(i in 1:length(issuelist)) {
curtags <- as.character(issuelist[[i]])
curissue <- names(issuelist)[i]
curtags <- str_c("\\W", curtags, "\\W")
tags_found <- str_detect(curtext, sprintf("%s", curtags))
tags_found <- any(tags_found)
if(tags_found) {
#cat("Positive in", curissue,"from",as.character(drange[d]),"\n")
issues[d,curissue] <- issues[d,curissue] + 1
}
else {
#cat("Nothing found\n")
}
} # /for issuelist
} # /for tweets_curday
} # /for drange
View(issues)
issuelist <- xmlToList("issues.xml")
issueheads <- names(issuelist)
issues[issueheads] <- 0
View(issues)
issuelist <- xmlToList("issues.xml")
issueheads <- names(issuelist)
issues[issueheads] <- 0
View(issues)
issues <- data.frame(date = drange)
issuelist <- xmlToList("issues.xml")
issueheads <- names(issuelist)
issues[issueheads] <- 0
View(issues)
for(d in 1:nrow(issues)) {
curdate <- issues$date[d]
cat(as.character(curdate),"\n")
# Put all tweets from specific day in a temporary DF
tweets_curday <- tweets[tweets[, "created_at"] == curdate, ]
for(t in 1:nrow(tweets_curday)){
# Select tweet's text, make it lowercase and remove hashtag indicators (#)
curtext <- tolower(as.character(tweets_curday$text[t]))
curtext <- str_replace_all(curtext, "#", "")
for(i in 1:length(issuelist)) {
curtags <- as.character(issuelist[[i]])
curissue <- names(issuelist)[i]
curtags <- str_c("\\W", curtags, "\\W")
tags_found <- str_detect(curtext, sprintf("%s", curtags))
tags_found <- any(tags_found)
if(tags_found) {
#cat("Positive in", curissue,"from",as.character(drange[d]),"\n")
issues[d,curissue] <- issues[d,curissue] + 1
}
else {
#cat("Nothing found\n")
}
} # /for issuelist
} # /for tweets_curday
} # /for drange
View(issues)
plot(x = issues$date, y=issues$issue.ttip)
plot(x = issues$date, y=issues$issue.ttip, type="l")
test <- c("issues$issue.ttip", "issues$issue.nsa")
plot(x = issues$date, y=test, type="l")
test
melt
library(ggplot2)
library(reshape2)
df <- melt(issues,id="date")
View(df)
ggplot(df,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_line()
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth()
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_point()
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=1,colour="red",method="loess", se=FALSE)
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=1,method="loess", se=FALSE)
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_line()
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=1,method="lm", se=FALSE)
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=1,method="gam", se=FALSE)
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=1,method="gam",formula = y ~ s(x, k = 3), se=FALSE)
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=1,method="gam",formula = y ~ x, se=FALSE)
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=1,method="loess",formula = y ~ x, se=FALSE)
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=3,method="loess",formula = y ~ x, se=FALSE)
ggplot(df,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=0.5,method="loess",formula = y ~ x, se=FALSE)
opts(legend.position = "none")
theme(legend.position = "none")
View(issues)
require(lubridate)
str_extract(lastname, "\\w+?")
require(XML)
require(ggplot2)
require(reshape2)
date_start
save(issues, "issues.RData")
save(issues, file = "issues.RData")
weeks(1)
weeks(354)
drange
date_start + weeks(0:7)
date_start <- as.Date("2014-01-01")
date_end <- as.Date("2014-12-01")
drange <- as.integer(date_end - date_start)
drange / 7
round(drange/7)
round(drange/7,0)
signif(drange/7)
signif(drange/7, 0)
issues_bak <- issues
View(df)
issues_melt <- melt(issues,id="date")
ggplot(issues_melt,aes(x=date,y=value,colour=variable,group=variable)) + geom_line()
ggplot(issues_melt,aes(x=date,y=value,colour=variable,group=variable)) + geom_smooth(size=0.5,method="loess",formula = y ~ x, se=FALSE)
drange <- as.integer(date_end - date_start)
d
wrange <- as.integer(date_end - date_start)
wrange
wrange <- (as.integer(date_end - date_start) / 7)
wrange
format(round(wrange, 1), nsmall = 1).
format(round(wrange, 1), nsmall = 1)
sprintf("%.1f",wrange)
sprintf("%f",wrange)
sprintf("%.1",wrange)
sprintf("%1f",wrange)
sprintf("%.0f",wrange)
floor(wrange)
wrange <- (as.integer(date_end - date_start) / 7)
wrange <- floor(wrange)
wrange <- date_start + weeks(0:wdrange)
wrange <- date_start + weeks(0:wrange)
wrange
wrange[3]
wrange[3] + 1800
wrange[3] + 1
wrange[3] + 7
View(issues)
issues$dates[3] - wrange [2]
issues$dates[3] - wrange[2]
wrange
class(wrange[2])
class(issues$dates[3])
class(issues$date[3])
issues$date[3] - wrange[2]
issues$date[1]
issues$date[2]
wrange[1]
wrange[2]
wrange[1:2]
days(wrange[1:2])
ddays(wrange[1:2])
days(1:2)
wrange[1] + days(0:6)
wrange
issues_week <- data.frame(week = wrange)
View(issues_week)
wrange <- floor(wrange)
wrange <- (as.integer(date_end - date_start) / 7)
wrange <- floor(wrange)
wrange
wrange <- date_start + weeks(0:wrange)
issues_week <- data.frame(week = wrange)
View(issues_week)
wrange <- (as.integer(date_end - date_start) / 7)
wrange <- floor(wrange) - 1
wrange <- date_start + weeks(0:wrange)
issues_week <- data.frame(week = wrange)
View(issues_week)
wrange[1] + days(0:6)
issues_week$week[2]
issues_week$week[2] + 1
currange <- issues_week$week[w] + days(0:6)
w <- 1
currange <- issues_week$week[w] + days(0:6)
currange
currange[7]
str_detect(names(issues), "^issue")
str_extract(names(issues), "^issue\.+")
str_extract(names(issues), "^issue\\.+")
str_extract(names(issues), "^issue\\..+")
issueheads
issues[issueheads[2],]
issues[,issueheads[2]]
issues[,issueheads[1]]
View(issues_week)
View(issues)
issues[issues[, "date"] == wrange[1]]
issues[issues[, "date"] == wrange[1], ]
issues[issues[, "date"] == wrange[3], ]
issues[issues[, "date"] == currange, ]
currange
issues[issues[, "date"] = currange, ]
issues[issues[, "date"] == currange, ]
View(issues)
warning()
issues[issues[, "date"] == currange, ]
warning()
issues[issues[, "date"] == sprintf("%s", currange), ]
issues[issues[, "date"] == 2014-01-02, ]
issues[issues[, "date"] == "2014-01-02", ]
issues[issues[, "date"] == "2014-01-03", ]
issues[issues[, "date"] == "2014-01-07", ]
issues[issues[, "date"] == "2014-01-08", ]
issues[issues[, "date"] == "2014-01-01:2014-01-08", ]
issues[issues[, "date"] == "2014-01-01:2014-01-07", ]
test <-issues[issues[, "date"] == currange, ]
test
sum(testz)
sum(test)
curweek <- issues_week$week[w]
currange <- curweek + days(0:6)
curweek
currange
d=1
curday <- issues$date[d]
curday
names(issues)[2]
curvalue <- issues[d,c]
d
c
c <- 2
curvalue <- issues[d,c]
vurv
curvalue
c=56
curvalue
curvalue <- issues[d,c]
curvalue
c
d
View(issues)
issues[2,7]
issues[7,2]
issues[7,3]
curissue
curissue <- names(issues)[c]
c
c = 7
curissue <- names(issues)[c]
curvalue <- issues[d,c]
curvalue
issues[d,curissue]
issues[d,7]
issues[d,curissue]
curissue
d
issues[d:d+6,curissue]
issues[(d:d+6),curissue]
d2 <- d+6
d2
issues[(d:d2),curissue]
issues[d:d2,curissue]
sum(issues[d:d2,curissue])
issues_week[issueheads] <- 0
View(issues_week)
issues_week[w,curissue]
View(issues)
for(w in 1:nrow(issues_week)) {
curweek <- issues_week$week[w]
currange <- curweek + days(0:6)
day <- 1
for(d in 1:nrow(issues)) {
curday <- issues$date[d]
if(curweek == curday) {
for(c in 2:ncol(issues)) {
curissue <- names(issues)[c]
d2 <- d + 6
curvalue <- sum(issues[d:d2,curissue])
issues_week[w, curissue] <- curvalue
} # /for issues columns
} # /if day matches first day of week
break
} # /for issues rows
} # /for issues_week
View(issues_week)
for(w in 1:nrow(issues_week)) {
curweek <- issues_week$week[w]
currange <- curweek + days(0:6)
day <- 1
for(d in 1:nrow(issues)) {
curday <- issues$date[d]
if(curweek == curday) {
for(c in 2:ncol(issues)) {
curissue <- names(issues)[c]
d2 <- d + 6
curvalue <- sum(issues[d:d2,curissue])
issues_week[w, curissue] <- curvalue
} # /for issues columns
} # /if day matches first day of week
} # /for issues rows
} # /for issues_week
View(issues_week)
View(issues)
View(issues_week)
View(issues)
View(issues_week)
issues_week_melt <- melt(issues_week,id="week")
ggplot(issues_week_melt,aes(x=week,y=value,colour=variable,group=variable)) + geom_line()
ggplot(issues_week_melt,aes(x=week,y=value,colour=variable,group=variable)) + geom_smooth(size=0.5,method="loess",formula = y ~ x, se=FALSE)
require(stringr)
str_extract(lastname, "\\w+?")
str_extract(lastname, "\\w+")
str_extract(lastname, "\\w+")
list2 <- read.csv("politiker.csv")
View(list2)
View(list2)
View(list2)
View(list2)
list1 <- read.csv("politiker2.csv")
list2 <- read.csv("politiker.csv")
for(i in 1:length(list1)) {
name1 <- as.character(list1$name[i])
lastname1 <- str_extract(name1, "\\w+")
for(a in 1:length(list2)) {
name2 <- as.character(list2$name[a])
if(str_detect(name2, lastname1)) {
acc <- as.character(list2$screenname[a])
cat(name1,":",acc,sep = "")
}
}
}
cat(name1,":",acc,sep = "")
for(i in 1:length(list1)) {
name1 <- as.character(list1$name[i])
lastname1 <- str_extract(name1, "\\w+")
for(a in 1:length(list2)) {
name2 <- as.character(list2$name[a])
if(str_detect(name2, lastname1)) {
acc <- as.character(list2$screenname[a])
#cat(name1,":",acc,"\n",sep = "")
cat("Found\n")
}
}
}
a
for(i in 1:nrow(list1)) {
name1 <- as.character(list1$name[i])
lastname1 <- str_extract(name1, "\\w+")
for(a in 1:nrow(list2)) {
name2 <- as.character(list2$name[a])
if(str_detect(name2, lastname1)) {
acc <- as.character(list2$screenname[a])
cat(name1,":",acc,"\n",sep = "")
#cat("Found\n")
}
}
}
for(i in 1:nrow(list1)) {
detect <- FALSE
name1 <- as.character(list1$name[i])
lastname1 <- str_extract(name1, "\\w+")
for(a in 1:nrow(list2)) {
name2 <- as.character(list2$name[a])
if(str_detect(name2, lastname1)) {
detect <- TRUE
acc <- as.character(list2$screenname[a])
cat(name1," --> ",acc,"\n",sep = "")
}
}
if(detect) {
cat("\n")
}
}
for(i in 1:nrow(list1)) {
detect <- FALSE
name1 <- as.character(list1$name[i])
lastname1 <- str_extract(name1, "\\w+")
for(a in 1:nrow(list2)) {
name2 <- as.character(list2$name[a])
if(str_detect(name2, lastname1)) {
detect <- TRUE
acc <- as.character(list2$screenname[a])
cat(name1," --> ",acc,"(",name2,")","\n",sep = "")
}
}
if(detect) {
cat("\n")
}
}
for(i in 1:nrow(list1)) {
detect <- FALSE
name1 <- as.character(list1$name[i])
lastname1 <- str_extract(name1, "\\w+")
for(a in 1:nrow(list2)) {
name2 <- as.character(list2$name[a])
if(str_detect(name2, lastname1)) {
detect <- TRUE
acc <- as.character(list2$screenname[a])
cat(name1," --> ",acc," (",name2,")","\n",sep = "")
}
}
if(detect) {
cat("\n")
}
}
c(name1," --> ",acc," (",name2,")","\n",sep = "")
str_c(name1," --> ",acc," (",name2,")","\n",sep = "")
str_c(name1," --> ",acc," (",name2,")")
result <- str_c(name1," --> ",acc," (",name2,")")
for(i in 1:nrow(list1)) {
detect <- FALSE
name1 <- as.character(list1$name[i])
lastname1 <- str_extract(name1, "\\w+")
for(a in 1:nrow(list2)) {
name2 <- as.character(list2$name[a])
if(str_detect(name2, lastname1)) {
detect <- TRUE
acc <- as.character(list2$screenname[a])
result <- str_c(name1," --> ",acc," (",name2,")")
write(result, "merge.txt", append = TRUE)
}
}
if(detect) {
write("", "merge.txt", append = TRUE)
}
}
for(i in 1:nrow(list1)) {
detect <- FALSE
name1 <- as.character(list1$name[i])
lastname1 <- str_extract(name1, "\\w+")
for(a in 1:nrow(list2)) {
name2 <- as.character(list2$name[a])
if(str_detect(name2, lastname1)) {
detect <- TRUE
acc <- as.character(list2$screenname[a])
verif <- as.character(list2$verified[a])
result <- str_c(name1," --> ",acc," (",name2,") ",verif)
write(result, "merge.txt", append = TRUE)
}
}
if(detect) {
write("", "merge.txt", append = TRUE)
}
}
for(i in 1:nrow(list1)) {
detect <- FALSE
name1 <- as.character(list1$name[i])
lastname1 <- str_extract(name1, "\\w+")
for(a in 1:nrow(list2)) {
name2 <- as.character(list2$name[a])
if(str_detect(name2, lastname1)) {
detect <- TRUE
acc <- as.character(list2$screenname[a])
verif <- as.character(list2$verified[a])
result <- str_c(name1," --> ",acc," (",name2,") ",verif)
write(result, "merge.txt", append = TRUE)
}
}
if(detect) {
write("", "merge.txt", append = TRUE)
}
}