starting df cleanup
This commit is contained in:
@@ -11,11 +11,6 @@ setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
|
|||||||
|
|
||||||
source("functions.R")
|
source("functions.R")
|
||||||
|
|
||||||
# # Set curl handle for friendly scraping
|
|
||||||
# handle <- getCurlHandle(httpheader = list(from = "max.mehl@uni.kn",
|
|
||||||
# 'user-agent' = str_c(R.version$version.string)
|
|
||||||
# )
|
|
||||||
# )
|
|
||||||
|
|
||||||
acc_url <- "http://www.bundestwitter.de/api/politiker"
|
acc_url <- "http://www.bundestwitter.de/api/politiker"
|
||||||
acc_df <- fromJSON(acc_url)
|
acc_df <- fromJSON(acc_url)
|
||||||
@@ -166,5 +161,37 @@ for(a in 346:nrow(acc_df)) {
|
|||||||
|
|
||||||
# Every tweet from 2014 from user[a] is downloaded. Now next user in for-loop
|
# Every tweet from 2014 from user[a] is downloaded. Now next user in for-loop
|
||||||
}
|
}
|
||||||
|
rm(a, code, current, error, loop, max_id, name, query, r, status, user, wait, tweets_full, tweets_temp)
|
||||||
|
|
||||||
|
|
||||||
|
# CLEAR DATAFRAME ---------------------------------------------------------
|
||||||
|
|
||||||
|
tweets <- tweets_bak
|
||||||
|
|
||||||
|
# Remove duplicates
|
||||||
|
tweets <- tweets_complete[!duplicated(tweets_complete), ]
|
||||||
|
save(tweets_complete, file="tweets_complete.RData")
|
||||||
|
save(tweets, file="tweets.RData")
|
||||||
|
rm(tweets_complete)
|
||||||
|
|
||||||
|
# Format dates in data frame
|
||||||
|
Sys.setlocale("LC_TIME", "C")
|
||||||
|
|
||||||
|
tweets$created_at <- as.POSIXct(tweets$created_at, format = "%a %b %d %H:%M:%S %z %Y")
|
||||||
|
tweets <- tweets[order(tweets$created_at), ]
|
||||||
|
|
||||||
|
# Finally delete every tweet not from 2014
|
||||||
|
delrow <- NULL
|
||||||
|
for(r in 1:nrow(tweets)) {
|
||||||
|
if(format(tweets$created_at[r], "%Y") != "2014") {
|
||||||
|
delrow <- c(delrow, r)
|
||||||
|
}
|
||||||
|
if(format(tweets$created_at[r], "%Y") == "2014") {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tweets <- tweets[-delrow, ]
|
||||||
|
rm(delrow, r)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user