diff --git a/extract-twitter-accounts.R b/extract-twitter-accounts.R index e90c910..837fbeb 100644 --- a/extract-twitter-accounts.R +++ b/extract-twitter-accounts.R @@ -18,18 +18,6 @@ acc_url <- "http://www.bundestwitter.de/api/politiker" #acc_json <- readLines("politiker.txt") acc_df <- fromJSON(acc_url) -# -------------- - - - - -url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; -query <- c(screen_name="mxmehl", include_rts=1, count="200", exclude_replies="true", trim_user="true", include_entities="false"); - -as.character(as.numeric(result$id_str[nrow(result)]) - 1) -str_detect(result$created_at[nrow(result)], "2013$") - -result <- fromJSON(correctJSON(twitter_api_call(url, query, params))) # --------------- # http://www.joyofdata.de/blog/twitters-rest-api-v1-1-with-r-for-linux-and-windows/ @@ -53,6 +41,7 @@ max_id <- "999999999999999999" loop <- 1 keep <- c("created_at", "id_str", "text", "retweet_count") rm(tweets_full) +last_id <- NULL repeat { # Define specific search query query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false", @@ -75,6 +64,10 @@ repeat { tweets_full <- tweets_full[keep] } + # Now sleep 3 second to dodge 300queries/15min limit + cat("User:",user,"in loop:",loop,"- now waiting 3 secs...\n") + Sys.sleep(3) + # Is the last tweet in tweets_full from 2013? status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$") # Last loop is reached. Now clear the data frame @@ -100,8 +93,9 @@ repeat { # The last tweet is still from 2014, so we need another loop else { - max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 2) - loop <- loop + 1 + # Setting max_id to gather next 200 tweets + max_id <- tweets_full$id_str[nrow(tweets_full)] + loop <- loop + 1 # just for stats } }