diff --git a/extract-twitter-accounts.R b/extract-twitter-accounts.R index 5c4a647..9331006 100644 --- a/extract-twitter-accounts.R +++ b/extract-twitter-accounts.R @@ -39,10 +39,10 @@ api_params <- c( "oauth_token_secret" = "rvfv8MgexFKTqrPNSoGrdrZVNhV4fTJb2Bgz249nbvKNg" ) -api_url2 <- "https://api.twitter.com/1.1/statuses/show.json" -id2="325330742961909760" -query2 <- c(id=id2, trim_user="true", include_entities="false") -current2 <- twitter_api_call(api_url2, query2, api_params) +#api_url2 <- "https://api.twitter.com/1.1/statuses/show.json" +#id2="498492933922754560" # 499533113676931073(\" ), 325320073906622464(\\>), 498492933922754560(\"W) +#query2 <- c(id=id2, trim_user="true", include_entities="false") +#current2 <- twitter_api_call(api_url2, query2, api_params) @@ -50,7 +50,8 @@ api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; max_count <- "200" keep <- c("created_at", "id_str", "text", "retweet_count") tweets_full <- data.frame(user=character(), name=character(), created_at=character(), id_str=character(), text=character(), retweet_count=character()) -for(a in 201:nrow(acc_df)) { +tweets_complete <- tweets_full +for(a in 1:nrow(acc_df)) { user <- as.character(acc_df$screenname[a]) name <- as.character(acc_df$name[a]) max_id <- "999999999999999999" @@ -67,7 +68,7 @@ for(a in 201:nrow(acc_df)) { current <- twitter_api_call(api_url, query, api_params) tweets_temp <- fromJSON(correctJSON(current)) - ## STAT ERROR HANDLING ## + ## START ERROR HANDLING ## # Check for empty API returns status <- length(tweets_temp) @@ -95,9 +96,9 @@ for(a in 201:nrow(acc_df)) { status <- "errors" %in% names(tweets_temp) if(status) { cat("[WARNING] Error in API request:", tweets_temp$errors[1,1],"\n") - - # Rate limit exceeded? status <- tweets_temp$errors[1,2] + + # "Rate limit exceeded" if(status == 88) { rate_api_url <- "https://api.twitter.com/1.1/application/rate_limit_status.json" rate_query <-c (resources="statuses") @@ -107,7 +108,17 @@ for(a in 201:nrow(acc_df)) { wait <- round(resettime - curtime + 10) cat("[INFO] Rate limit is exceeded. Now waiting",wait,"seconds.\n") Sys.sleep(wait) - } + } + + # "Sorry, that page does not exist" + if(status == 34) { + if(error > 2) { + cat("[WARNING] 3x Not existing page. Aborting now.\n") + break + } + error <- error + 1 + } + rm(tweets_temp) Sys.sleep(3) next @@ -128,10 +139,10 @@ for(a in 201:nrow(acc_df)) { cat("[INFO] Last tweet of temp is last tweet of full. Abort loop and begin with next user.\n") break } - + + ## Last loop is reached. Now clear the data frame # Is the last tweet in tweets_temp from 2013? status <- str_detect(tweets_temp$created_at[nrow(tweets_temp)], "2014$") - # Last loop is reached. Now clear the data frame if (!status) { # Starting when tweet not from 2014 # Delete all tweets other than from 2014 @@ -177,9 +188,11 @@ for(a in 201:nrow(acc_df)) { } } # /repeat - stat_tweet <- nrow(tweets_full) - cat("User:",user,"finished after",loop,"loops. Total Tweets now:",nrow(tweets_full),"\n") - write.csv(tweets_full, "tweets_full.csv") + tweets_complete <- insertRow(tweets_complete, tweets_full) + tweets_full <- head(tweets_full, -nrow(tweets_full)) # Empty tweets_full + cat("User:",user,"finished after",loop,"loops. Total Tweets now:",nrow(tweets_complete),"\n") + write.csv(tweets_complete, "tweets_complete.csv") + # Every tweet from 2014 from user[r] is downloaded. Now next user in for-loop }