JSON fixed, now large numbers to do
This commit is contained in:
+26
-12
@@ -4,10 +4,10 @@ require(RCurl)
|
||||
require(devtools)
|
||||
require(RTwitterAPI)
|
||||
|
||||
source("functions.R")
|
||||
|
||||
setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
|
||||
|
||||
source("functions.R")
|
||||
|
||||
# Set curl handle for friendly scraping
|
||||
handle <- getCurlHandle(httpheader = list(from = "max.mehl@uni.kn",
|
||||
'user-agent' = str_c(R.version$version.string)
|
||||
@@ -47,46 +47,60 @@ api_params <- c(
|
||||
)
|
||||
|
||||
api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
|
||||
# api_url <- "https://api.twitter.com/1.1/statuses/show.json";
|
||||
user <- "peteraltmaier"
|
||||
user <- "GregorGysi"
|
||||
max_count <- "200"
|
||||
max_id <- "999999999999999999"
|
||||
loop <- 1
|
||||
keep <- c("created_at", "id_str", "text", "retweet_count")
|
||||
rm(tweets_full, tweets_temp)
|
||||
rm(tweets_full)
|
||||
repeat {
|
||||
# Define specific search query
|
||||
query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
|
||||
screen_name=user,
|
||||
count=max_count,
|
||||
max_id=max_id);
|
||||
# query <- c(trim_user="true", include_entities="false",
|
||||
# id="431858659656990721");
|
||||
|
||||
# If a tweets_full DB already exists (after the first loop this should be the case)
|
||||
if(exists("tweets_full")) {
|
||||
current <- twitter_api_call(api_url, query, api_params)
|
||||
tweets_temp <- fromJSON(current)
|
||||
tweets_temp <- fromJSON(correctJSON(current))
|
||||
tweets_temp <- tweets_temp[keep]
|
||||
tweets_full <- insertRow(tweets_full, tweets_temp)
|
||||
rm(tweets_temp)
|
||||
}
|
||||
# First loop
|
||||
else {
|
||||
current <- twitter_api_call(api_url, query, api_params)
|
||||
tweets_full <- fromJSON(correctJSON(current))
|
||||
tweets_full <- tweets_full[keep]
|
||||
}
|
||||
|
||||
# Is the last tweet in tweets_full from 2013?
|
||||
status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
|
||||
# Last loop is reached. Now clear the data frame
|
||||
if (status) {
|
||||
rm(tweets_temp)
|
||||
|
||||
# Delete all tweets from 2013
|
||||
old <- 0
|
||||
for(r in 1:nrow(tweets_full)) {
|
||||
status <- str_detect(tweets_full$created_at[r], "2013$")
|
||||
if(is.na(status)) { status <- FALSE }
|
||||
if(status) {
|
||||
tweets_full <- tweets_full[-r,]
|
||||
old <- old + 1
|
||||
}
|
||||
}
|
||||
break
|
||||
if(old > 0) {
|
||||
old <- old - 1
|
||||
tweets_full <- head(tweets_full, -old)
|
||||
}
|
||||
rm(old)
|
||||
|
||||
break # End loop because 2013 is reached
|
||||
}
|
||||
|
||||
# The last tweet is still from 2014, so we need another loop
|
||||
else {
|
||||
max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
|
||||
max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 2)
|
||||
loop <- loop + 1
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user