working

2014-12-07 12:06:09 +01:00
parent c296884028
commit 39571e0890
10 changed files with 310 additions and 109286 deletions
@@ -1,7 +1,18 @@
+cat("[INFO] Last tweet of temp is last tweet of full. Abort loop and begin with next user.\n")
+#break
+}
+tweets_full$id_str[nrow(tweets_full)]
+tweets_temp$id_str[nrow(tweets_temp)]
+tweets_complete$user[20674]
+a
+current
+tweets_full <- data.frame(user=character(), name=character(), created_at=character(), id_str=character(), text=character(), retweet_count=character())
+for(a in 1:nrow(acc_df)) {
 user <- as.character(acc_df$screenname[a])
 name <- as.character(acc_df$name[a])
 max_id <- "999999999999999999"
 loop <- 1
+error <- 0
 repeat {
 # Define specific search query
 query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
@@ -10,41 +21,27 @@ count=max_count,
 max_id=max_id);
 # At first, work with an temporary tweet-DB
 current <- twitter_api_call(api_url, query, api_params)
+rm(tweets_temp)
 tweets_temp <- fromJSON(correctJSON(current))
-## STAT ERROR HANDLING ##
-# Check for empty API returns
-status <- length(tweets_temp)
-if(status == 0) {
-cat("[WARNING] Empty API result. Trying again.\n")
-rm(tweets_temp)
-Sys.sleep(3)
-next
+## START ERROR HANDLING ##
+# Empty API output
+status <- errorEmptyAPI(tweets_temp)
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+# Contains "error" column
+status <- errorEmptyAPI(tweets_temp)
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+# Check if error code exists
+code <- errorCheckCode(tweets_temp) # 0 if no error
+if(code == 34) {  # page does not exist
+status <- errorCode34
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
 }
-# Check if API output contains error fields
-status <- "error" %in% names(tweets_temp)
-if(status) {
-cat("[WARNING] Error in API request:", tweets_temp$error[1],"\n")
-rm(tweets_temp)
-break
-}
-# Check for other errors, mostly rate limits
-status <- "errors" %in% names(tweets_temp)
-if(status) {
-cat("[WARNING] Error in API request:", tweets_temp$errors[1,1],"\n")
-# Rate limit exceeded?
-status <- tweets_temp$errors[1,2]
-if(status == 88) {
-rate_api_url <- "https://api.twitter.com/1.1/application/rate_limit_status.json"
-rate_query <-c (resources="statuses")
-resettime <- fromJSON(twitter_api_call(rate_api_url, rate_query, api_params))
-resettime <- resettime$resources$statuses$`/statuses/user_timeline`$reset
-curtime <- as.numeric(as.POSIXct(Sys.time()))
-wait <- round(resettime - curtime + 10)
-cat("[INFO] Rate limit is exceeded. Now waiting",wait,"seconds.\n")
+if(code == 88) {  # rate limit exceeded
+wait <- errorCode88()
 Sys.sleep(wait)
-}
-rm(tweets_temp)
-Sys.sleep(3)
 next
 }
 ## END ERROR HANDLING ##
@@ -57,11 +54,11 @@ cat("User: ",user," in loop: ",loop,". \n", sep = "")
 Sys.sleep(2)
 if(tweets_full$id_str[nrow(tweets_full)] == tweets_temp$id_str[nrow(tweets_temp)] && nrow(tweets_full) > 0) {
 cat("[INFO] Last tweet of temp is last tweet of full. Abort loop and begin with next user.\n")
-break
+#break
 }
+## Last loop is reached. Now clear the data frame
 # Is the last tweet in tweets_temp from 2013?
 status <- str_detect(tweets_temp$created_at[nrow(tweets_temp)], "2014$")
-# Last loop is reached. Now clear the data frame
 if (!status) { # Starting when tweet not from 2014
 # Delete all tweets other than from 2014
 old <- 0
@@ -88,7 +85,7 @@ tweets_temp <- head(tweets_temp, -old)
 }
 rm(old)
 tweets_full <- insertRow(tweets_full, tweets_temp)
-rm(tweets_temp)
+#rm(tweets_temp)
 break  # End loop because 2013 is reached
 }
 # The last tweet is still from 2014, so we need another loop
@@ -97,24 +94,224 @@ else {
 max_id <- tweets_temp$id_str[nrow(tweets_temp)]
 loop <- loop + 1  # just for stats
 tweets_full <- insertRow(tweets_full, tweets_temp)
-rm(tweets_temp)
+#rm(tweets_temp)
 }
 } # /repeat
-stat_tweet <- nrow(tweets_full)
-cat("User:",user,"finished after",loop,"loops. Total Tweets now:",nrow(tweets_full),"\n")
-write.csv(tweets_full, "tweets_full.csv")
+tweets_complete <- insertRow(tweets_complete, tweets_full)
+tweets_full <- head(tweets_full, -nrow(tweets_full)) # Empty tweets_full
+cat("User:",user,"finished after",loop,"loops. Total Tweets now:",nrow(tweets_complete),"\n")
+write.csv(tweets_complete, "tweets_complete.csv")
+# Every tweet from 2014 from user[r] is downloaded. Now next user in for-loop
+}
+tweets_full <- data.frame(user=character(), name=character(), created_at=character(), id_str=character(), text=character(), retweet_count=character())
+for(a in 66:nrow(acc_df)) {
+user <- as.character(acc_df$screenname[a])
+name <- as.character(acc_df$name[a])
+max_id <- "999999999999999999"
+loop <- 1
+error <- 0
+repeat {
+# Define specific search query
+query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+# At first, work with an temporary tweet-DB
+current <- twitter_api_call(api_url, query, api_params)
+rm(tweets_temp)
+tweets_temp <- fromJSON(correctJSON(current))
+## START ERROR HANDLING ##
+# Empty API output
+status <- errorEmptyAPI(tweets_temp)
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+# Contains "error" column
+status <- errorEmptyAPI(tweets_temp)
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+# Check if error code exists
+code <- errorCheckCode(tweets_temp) # 0 if no error
+if(code == 34) {  # page does not exist
+status <- errorCode34
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+}
+if(code == 88) {  # rate limit exceeded
+wait <- errorCode88()
+Sys.sleep(wait)
+next
+}
+## END ERROR HANDLING ##
+# Delete unnecessary columns and add username and real name to dataframe
+tweets_temp <- tweets_temp[keep]
+tweets_temp <- cbind(user=user, name=name, tweets_temp)
+# Now sleep 3 second to dodge 300queries/15min limit
+cat("[",a,"/",nrow(acc_df),"] ", sep = "")
+cat("User: ",user," in loop: ",loop,". \n", sep = "")
+Sys.sleep(2)
+if(tweets_full$id_str[nrow(tweets_full)] == tweets_temp$id_str[nrow(tweets_temp)] && nrow(tweets_full) > 0) {
+cat("[INFO] Last tweet of temp is last tweet of full. Abort loop and begin with next user.\n")
+#break
+}
+## Last loop is reached. Now clear the data frame
+# Is the last tweet in tweets_temp from 2013?
+status <- str_detect(tweets_temp$created_at[nrow(tweets_temp)], "2014$")
+if (!status) { # Starting when tweet not from 2014
+# Delete all tweets other than from 2014
+old <- 0
+for(r in 1:nrow(tweets_temp)) {
+status <- str_detect(tweets_temp$created_at[r], "2014$")
+if(is.na(status)) {
+#status <- FALSE
+cat("[INFO] NA-Status in Tweet", r)
+}
+if(!status) { # Starting when tweet not from 2014
+old <- old + 1
+}
+}
+if(old > 0) {
+old <- old - 1
+# If even the first entry isn't from 2014, we have to set "old" manually because of a bug
+status <- str_detect(tweets_temp$created_at[1], "2014$")
+if(!status) {
+old <- nrow(tweets_temp)
+cat("[INFO] Timeline enhält keinen einzigen aus 2014\n")
+}
+# delete all lines which are older than 2014
+tweets_temp <- head(tweets_temp, -old)
+}
+rm(old)
+tweets_full <- insertRow(tweets_full, tweets_temp)
+#rm(tweets_temp)
+break  # End loop because 2013 is reached
+}
+# The last tweet is still from 2014, so we need another loop
+else {
+# Setting max_id to gather next 200 tweets
+max_id <- tweets_temp$id_str[nrow(tweets_temp)]
+loop <- loop + 1  # just for stats
+tweets_full <- insertRow(tweets_full, tweets_temp)
+#rm(tweets_temp)
+}
+} # /repeat
+tweets_complete <- insertRow(tweets_complete, tweets_full)
+tweets_full <- head(tweets_full, -nrow(tweets_full)) # Empty tweets_full
+cat("User:",user,"finished after",loop,"loops. Total Tweets now:",nrow(tweets_complete),"\n")
+write.csv(tweets_complete, "tweets_complete.csv")
+# Every tweet from 2014 from user[r] is downloaded. Now next user in for-loop
+}
+a
+tweets_complete$user[22982]
+tweets_full <- data.frame(user=character(), name=character(), created_at=character(), id_str=character(), text=character(), retweet_count=character())
+for(a in 68:nrow(acc_df)) {
+user <- as.character(acc_df$screenname[a])
+name <- as.character(acc_df$name[a])
+max_id <- "999999999999999999"
+loop <- 1
+error <- 0
+repeat {
+# Define specific search query
+query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+# At first, work with an temporary tweet-DB
+current <- twitter_api_call(api_url, query, api_params)
+rm(tweets_temp)
+tweets_temp <- fromJSON(correctJSON(current))
+## START ERROR HANDLING ##
+# Empty API output
+status <- errorEmptyAPI(tweets_temp)
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+# Contains "error" column
+status <- errorEmptyAPI(tweets_temp)
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+# Check if error code exists
+code <- errorCheckCode(tweets_temp) # 0 if no error
+if(code == 34) {  # page does not exist
+status <- errorCode34
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+}
+if(code == 88) {  # rate limit exceeded
+wait <- errorCode88()
+Sys.sleep(wait)
+next
+}
+## END ERROR HANDLING ##
+# Delete unnecessary columns and add username and real name to dataframe
+tweets_temp <- tweets_temp[keep]
+tweets_temp <- cbind(user=user, name=name, tweets_temp)
+# Now sleep 3 second to dodge 300queries/15min limit
+cat("[",a,"/",nrow(acc_df),"] ", sep = "")
+cat("User: ",user," in loop: ",loop,". \n", sep = "")
+Sys.sleep(2)
+if(tweets_full$id_str[nrow(tweets_full)] == tweets_temp$id_str[nrow(tweets_temp)] && nrow(tweets_full) > 0) {
+cat("[INFO] Last tweet of temp is last tweet of full. Abort loop and begin with next user.\n")
+break
+}
+## Last loop is reached. Now clear the data frame
+# Is the last tweet in tweets_temp from 2013?
+status <- str_detect(tweets_temp$created_at[nrow(tweets_temp)], "2014$")
+if (!status) { # Starting when tweet not from 2014
+# Delete all tweets other than from 2014
+old <- 0
+for(r in 1:nrow(tweets_temp)) {
+status <- str_detect(tweets_temp$created_at[r], "2014$")
+if(is.na(status)) {
+#status <- FALSE
+cat("[INFO] NA-Status in Tweet", r)
+}
+if(!status) { # Starting when tweet not from 2014
+old <- old + 1
+}
+}
+if(old > 0) {
+old <- old - 1
+# If even the first entry isn't from 2014, we have to set "old" manually because of a bug
+status <- str_detect(tweets_temp$created_at[1], "2014$")
+if(!status) {
+old <- nrow(tweets_temp)
+cat("[INFO] Timeline enhält keinen einzigen aus 2014\n")
+}
+# delete all lines which are older than 2014
+tweets_temp <- head(tweets_temp, -old)
+}
+rm(old)
+tweets_full <- insertRow(tweets_full, tweets_temp)
+#rm(tweets_temp)
+break  # End loop because 2013 is reached
+}
+# The last tweet is still from 2014, so we need another loop
+else {
+# Setting max_id to gather next 200 tweets
+max_id <- tweets_temp$id_str[nrow(tweets_temp)]
+loop <- loop + 1  # just for stats
+tweets_full <- insertRow(tweets_full, tweets_temp)
+#rm(tweets_temp)
+}
+} # /repeat
+tweets_complete <- insertRow(tweets_complete, tweets_full)
+tweets_full <- head(tweets_full, -nrow(tweets_full)) # Empty tweets_full
+cat("User:",user,"finished after",loop,"loops. Total Tweets now:",nrow(tweets_complete),"\n")
+write.csv(tweets_complete, "tweets_complete.csv")
 # Every tweet from 2014 from user[r] is downloaded. Now next user in for-loop
 }
 status
-content
 current
-query
-tweets_full$text[59203]
-tweets_full$text[59202]
-tweets_full$text[59203]
-tweets_full$user[59203]
-tweets_full$user[59202]
-for(a in 157:nrow(acc_df)) {
+tweets_temp
+status
+fromJSON(current)
+tweets_temp <- fromJSON(correctJSON(current))
+tweets_temp
+status <- errorErrorColumn(tweets_temp)
+a
+View(acc_df)
+tweets_complete$user[32539]
+tweets_full <- data.frame(user=character(), name=character(), created_at=character(), id_str=character(), text=character(), retweet_count=character())
+for(a in 94:nrow(acc_df)) {
 user <- as.character(acc_df$screenname[a])
 name <- as.character(acc_df$name[a])
 max_id <- "999999999999999999"
@@ -128,45 +325,27 @@ count=max_count,
 max_id=max_id);
 # At first, work with an temporary tweet-DB
 current <- twitter_api_call(api_url, query, api_params)
+rm(tweets_temp)
 tweets_temp <- fromJSON(correctJSON(current))
-## STAT ERROR HANDLING ##
-# Check for empty API returns
-status <- length(tweets_temp)
-if(status == 0) {
-if(error > 3) {
-cat("[WARNING] 3x empty API result. Aborting now.\n")
+## START ERROR HANDLING ##
+# Empty API output
+status <- errorEmptyAPI(tweets_temp)
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+# Contains "error" column
+status <- errorErrorColumn(tweets_temp)
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+# Check if error code exists
+code <- errorCheckCode(tweets_temp) # 0 if no error
+if(code == 34) {  # page does not exist
+status <- errorCode34
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
 }
-cat("[WARNING] Empty API result. Trying again.\n")
-rm(tweets_temp)
-error <- error + 1
-Sys.sleep(3)
-next
-}
-# Check if API output contains error fields
-status <- "error" %in% names(tweets_temp)
-if(status) {
-cat("[WARNING] Error in API request:", tweets_temp$error[1],"\n")
-rm(tweets_temp)
-break
-}
-# Check for other errors, mostly rate limits
-status <- "errors" %in% names(tweets_temp)
-if(status) {
-cat("[WARNING] Error in API request:", tweets_temp$errors[1,1],"\n")
-# Rate limit exceeded?
-status <- tweets_temp$errors[1,2]
-if(status == 88) {
-rate_api_url <- "https://api.twitter.com/1.1/application/rate_limit_status.json"
-rate_query <-c (resources="statuses")
-resettime <- fromJSON(twitter_api_call(rate_api_url, rate_query, api_params))
-resettime <- resettime$resources$statuses$`/statuses/user_timeline`$reset
-curtime <- as.numeric(as.POSIXct(Sys.time()))
-wait <- round(resettime - curtime + 10)
-cat("[INFO] Rate limit is exceeded. Now waiting",wait,"seconds.\n")
+if(code == 88) {  # rate limit exceeded
+wait <- errorCode88()
 Sys.sleep(wait)
-}
-rm(tweets_temp)
-Sys.sleep(3)
 next
 }
 ## END ERROR HANDLING ##
@@ -181,9 +360,9 @@ if(tweets_full$id_str[nrow(tweets_full)] == tweets_temp$id_str[nrow(tweets_temp)
 cat("[INFO] Last tweet of temp is last tweet of full. Abort loop and begin with next user.\n")
 break
 }
+## Last loop is reached. Now clear the data frame
 # Is the last tweet in tweets_temp from 2013?
 status <- str_detect(tweets_temp$created_at[nrow(tweets_temp)], "2014$")
-# Last loop is reached. Now clear the data frame
 if (!status) { # Starting when tweet not from 2014
 # Delete all tweets other than from 2014
 old <- 0
@@ -210,7 +389,7 @@ tweets_temp <- head(tweets_temp, -old)
 }
 rm(old)
 tweets_full <- insertRow(tweets_full, tweets_temp)
-rm(tweets_temp)
+#rm(tweets_temp)
 break  # End loop because 2013 is reached
 }
 # The last tweet is still from 2014, so we need another loop
@@ -219,15 +398,19 @@ else {
 max_id <- tweets_temp$id_str[nrow(tweets_temp)]
 loop <- loop + 1  # just for stats
 tweets_full <- insertRow(tweets_full, tweets_temp)
-rm(tweets_temp)
+#rm(tweets_temp)
 }
 } # /repeat
-stat_tweet <- nrow(tweets_full)
-cat("User:",user,"finished after",loop,"loops. Total Tweets now:",nrow(tweets_full),"\n")
-write.csv(tweets_full, "tweets_full.csv")
+tweets_complete <- insertRow(tweets_complete, tweets_full)
+tweets_full <- head(tweets_full, -nrow(tweets_full)) # Empty tweets_full
+cat("User:",user,"finished after",loop,"loops. Total Tweets now:",nrow(tweets_complete),"\n")
+write.csv(tweets_complete, "tweets_complete.csv")
 # Every tweet from 2014 from user[r] is downloaded. Now next user in for-loop
 }
-for(a in 157:nrow(acc_df)) {
+status
+tweets_full <- data.frame(user=character(), name=character(), created_at=character(), id_str=character(), text=character(), retweet_count=character())
+a
+for(a in 346:nrow(acc_df)) {
 user <- as.character(acc_df$screenname[a])
 name <- as.character(acc_df$name[a])
 max_id <- "999999999999999999"
@@ -241,46 +424,27 @@ count=max_count,
 max_id=max_id);
 # At first, work with an temporary tweet-DB
 current <- twitter_api_call(api_url, query, api_params)
+rm(tweets_temp)
 tweets_temp <- fromJSON(correctJSON(current))
-## STAT ERROR HANDLING ##
-# Check for empty API returns
-status <- length(tweets_temp)
-if(status == 0) {
-if(error > 2) {
-cat("[WARNING] 3x empty API result. Aborting now.\n")
-break
+## START ERROR HANDLING ##
+# Empty API output
+status <- errorEmptyAPI(tweets_temp)
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+# Contains "error" column
+status <- errorErrorColumn(tweets_temp)
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
+# Check if error code exists
+code <- errorCheckCode(tweets_temp) # 0 if no error
+if(code == 34) {  # page does not exist
+status <- errorCode34()
+if(status == 1) { Sys.sleep(3);error <- error + 1;next}
+if(status == 2) {break}
 }
-cat("[WARNING] Empty API result. Trying again.\n")
-rm(tweets_temp)
-error <- error + 1
-Sys.sleep(3)
-next
-}
-# Check if API output contains error fields
-status <- "error" %in% names(tweets_temp)
-if(status) {
-cat("[WARNING] Error in API request:", tweets_temp$error[1],"\n")
-rm(tweets_temp)
-break
-}
-# Check for other errors, mostly rate limits
-status <- "errors" %in% names(tweets_temp)
-if(status) {
-cat("[WARNING] Error in API request:", tweets_temp$errors[1,1],"\n")
-# Rate limit exceeded?
-status <- tweets_temp$errors[1,2]
-if(status == 88) {
-rate_api_url <- "https://api.twitter.com/1.1/application/rate_limit_status.json"
-rate_query <-c (resources="statuses")
-resettime <- fromJSON(twitter_api_call(rate_api_url, rate_query, api_params))
-resettime <- resettime$resources$statuses$`/statuses/user_timeline`$reset
-curtime <- as.numeric(as.POSIXct(Sys.time()))
-wait <- round(resettime - curtime + 10)
-cat("[INFO] Rate limit is exceeded. Now waiting",wait,"seconds.\n")
+if(code == 88) {  # rate limit exceeded
+wait <- errorCode88()
 Sys.sleep(wait)
-}
-rm(tweets_temp)
-Sys.sleep(3)
 next
 }
 ## END ERROR HANDLING ##
@@ -295,9 +459,9 @@ if(tweets_full$id_str[nrow(tweets_full)] == tweets_temp$id_str[nrow(tweets_temp)
 cat("[INFO] Last tweet of temp is last tweet of full. Abort loop and begin with next user.\n")
 break
 }
+## Last loop is reached. Now clear the data frame
 # Is the last tweet in tweets_temp from 2013?
 status <- str_detect(tweets_temp$created_at[nrow(tweets_temp)], "2014$")
-# Last loop is reached. Now clear the data frame
 if (!status) { # Starting when tweet not from 2014
 # Delete all tweets other than from 2014
 old <- 0
@@ -324,7 +488,7 @@ tweets_temp <- head(tweets_temp, -old)
 }
 rm(old)
 tweets_full <- insertRow(tweets_full, tweets_temp)
-rm(tweets_temp)
+#rm(tweets_temp)
 break  # End loop because 2013 is reached
 }
 # The last tweet is still from 2014, so we need another loop
@@ -333,180 +497,16 @@ else {
 max_id <- tweets_temp$id_str[nrow(tweets_temp)]
 loop <- loop + 1  # just for stats
 tweets_full <- insertRow(tweets_full, tweets_temp)
-rm(tweets_temp)
+#rm(tweets_temp)
 }
 } # /repeat
-stat_tweet <- nrow(tweets_full)
-cat("User:",user,"finished after",loop,"loops. Total Tweets now:",nrow(tweets_full),"\n")
-write.csv(tweets_full, "tweets_full.csv")
-# Every tweet from 2014 from user[r] is downloaded. Now next user in for-loop
+tweets_complete <- insertRow(tweets_complete, tweets_full)
+tweets_full <- head(tweets_full, -nrow(tweets_full)) # Empty tweets_full
+cat("User:",user,"finished after",loop,"loops. Total Tweets now:",nrow(tweets_complete),"\n")
+write.csv(tweets_complete, "tweets_complete.csv")
+# Every tweet from 2014 from user[a] is downloaded. Now next user in for-loop
 }
-current
-correctJSON(current)
-correctJSON <- function(string) {
-#   string <- str_replace_all(string, pattern = "\n", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\r", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\\^", replacement = " ")
-string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
-string <- str_replace_all(string, pattern = "&..;", replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-return(string)
-}
-correctJSON(current)
-a
-tweets_temp <- fromJSON(correctJSON(current))
-current
-correctJSON <- function(string) {
-#   string <- str_replace_all(string, pattern = "\n", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\r", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\\^", replacement = " ")
-string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
-string <- str_replace_all(string, pattern = "&..;", replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-string <- str_replace_all(string, pattern = "\\\\\\\\\\", replacement = "\\\\\\")
-return(string)
-}
-tweets_temp <- fromJSON(correctJSON(current))
-correctJSON <- function(string) {
-#   string <- str_replace_all(string, pattern = "\n", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\r", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\\^", replacement = " ")
-string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
-string <- str_replace_all(string, pattern = "&..;", replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\\\(?![tn"])'), replacement = " ")
-return(string)
-}
-tweets_temp <- fromJSON(correctJSON(current))
-correctJSON <- function(string) {
-#   string <- str_replace_all(string, pattern = "\n", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\r", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\\^", replacement = " ")
-string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
-string <- str_replace_all(string, pattern = "&..;", replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\\\\\(?![tn"])'), replacement = " ")
-return(string)
-}
-tweets_temp <- fromJSON(correctJSON(current))
-correctJSON <- function(string) {
-#   string <- str_replace_all(string, pattern = "\n", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\r", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\\^", replacement = " ")
-string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
-string <- str_replace_all(string, pattern = "&..;", replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\\\\\\\'), replacement = " ")
-return(string)
-}
-tweets_temp <- fromJSON(correctJSON(current))
-correctJSON <- function(string) {
-#   string <- str_replace_all(string, pattern = "\n", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\r", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\\^", replacement = " ")
-string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
-string <- str_replace_all(string, pattern = "&..;", replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-string <- str_replace_all(string, pattern = "[\]{5}", replacement = " ")
-return(string)
-}
-correctJSON <- function(string) {
-#   string <- str_replace_all(string, pattern = "\n", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\r", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\\^", replacement = " ")
-string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
-string <- str_replace_all(string, pattern = "&..;", replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-string <- str_replace_all(string, pattern = "[\\]{5}", replacement = " ")
-return(string)
-}
-tweets_temp <- fromJSON(correctJSON(current))
-correctJSON <- function(string) {
-#   string <- str_replace_all(string, pattern = "\n", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\r", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\\^", replacement = " ")
-string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
-string <- str_replace_all(string, pattern = "&..;", replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-string <- str_replace_all(string, pattern = "[\\]{6}", replacement = " ")
-return(string)
-}
-tweets_temp <- fromJSON(correctJSON(current))
-correctJSON <- function(string) {
-#   string <- str_replace_all(string, pattern = "\n", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\r", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\\^", replacement = " ")
-string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
-string <- str_replace_all(string, pattern = "&..;", replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-string <- str_replace_all(string, pattern = "\\{6}", replacement = " ")
-return(string)
-}
-tweets_temp <- fromJSON(correctJSON(current))
-correctJSON <- function(string) {
-#   string <- str_replace_all(string, pattern = "\n", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\r", replacement = " ")
-#   string <- str_replace_all(string, pattern = "\\^", replacement = " ")
-string <- str_replace_all(string, pattern = "[^[:print:]]", replacement = " ")
-string <- str_replace_all(string, pattern = "&..;", replacement = " ")
-string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-string <- str_replace_all(string, pattern = "\\{5}", replacement = " ")
-return(string)
-}
-insertRow <- function(existingDF, newrow, r) {
-r <- as.numeric(nrow(existingDF)) + 1
-existingDF <- rbind(existingDF,newrow)
-existingDF <- existingDF[order(c(1:(nrow(existingDF)-1),r-0.5)),]
-row.names(existingDF) <- 1:nrow(existingDF)
-return(existingDF)
-}
-tweets_temp <- fromJSON(correctJSON(current))
-str_detect(current, pattern = "\\")
-str_detect(current, pattern = '\\')
-str_detect(current, pattern = "\\\")
-str_detect(current, pattern = "[\\]")
-str_detect(current, pattern = "[\\\\]")
-str_detect(current, pattern = "[\\\\\\]")
-str_detect(current, pattern = "[\\\\\\\\]")
-str_detect(current, pattern = "[\\\\\\\\\\\\]")
-str_detect(current, pattern = "[\\\\\\\\\\\\\\]")
-str_detect(current, pattern = "[\\]{1}")
-str_detect(current, pattern = "[\\]{7}")
-str_detect(current, pattern = "[\\]{2}")
-str_detect(current, pattern = "[\\]{3}")
-str_detect(current, pattern = "[\\]{4}")
-str_detect(current, pattern = "[\\]{5}")
-current
-str_detect(current, pattern = "[\\]{5}")
-str_detect(current, pattern = "[\]{5}")
-str_detect(current, pattern = "[\\]{2}")
-grep("[\\]", current)
-grep("[\\]{2}", current)
-grep("[\\]{3}", current)
-grep("[\\]{4}", current)
-grep("[\\]{5}", current)
-grep("[\\]{1}", current)
-str_detect(current, pattern = fixed("\\\\\"))
-str_detect(current, pattern = fixed("\\\\\\\\\\\"))
-str_detect(current, pattern = fixed("\\\\\\\\\\"))
-str_detect(current, pattern = fixed("\\\\\\\\"))
-str_detect(current, pattern = fixed("\\\\\\"))
-str_detect(current, pattern = fixed("\\\\\"))
-str_detect(current, pattern = fixed("\\\\"))
-str_detect(current, pattern = fixed("\\"))
-nchar("\\")
-nchar("\\\\")
-nchar("\\\\\\")
-nchar("\\\\\\\\")
-nchar("\\\\\\\\\\")
-print("\\\\\\\\\\")
-print("\\\\\")
-print("\\\\\\")
-print("\\\\")
-print("\\\\\")
-write(current, "current.txt")
-current2 <- readLines("current.txt")
-current
-current2
-rm(current2)
-a
+save(tweets_complete, file="tweets_complete.RData")
+tweets_complete$id_str[146982]
+class(tweets_complete$id_str[146982])
+tweets_complete$id_str[1]
@@ -0,0 +1,3 @@
+tweets_complete.csv
+current.txt
+.RData
@@ -52,7 +52,7 @@ keep <- c("created_at", "id_str", "text", "retweet_count")
 tweets_full <- data.frame(user=character(), name=character(), created_at=character(), id_str=character(), text=character(), retweet_count=character())
 tweets_complete <- tweets_full

-for(a in 94:nrow(acc_df)) {
+for(a in 346:nrow(acc_df)) {
  user <- as.character(acc_df$screenname[a])
  name <- as.character(acc_df$name[a])
  max_id <- "999999999999999999"
@@ -84,7 +84,7 @@ for(a in 94:nrow(acc_df)) {
    # Check if error code exists
    code <- errorCheckCode(tweets_temp) # 0 if no error
    if(code == 34) {  # page does not exist
-      status <- errorCode34
+      status <- errorCode34()
      if(status == 1) { Sys.sleep(3);error <- error + 1;next}
      if(status == 2) {break}
    }
@@ -164,7 +164,7 @@ for(a in 94:nrow(acc_df)) {
  write.csv(tweets_complete, "tweets_complete.csv")

  
-  # Every tweet from 2014 from user[r] is downloaded. Now next user in for-loop
+  # Every tweet from 2014 from user[a] is downloaded. Now next user in for-loop
 }


@@ -1 +0,0 @@
-[{"created_at":"Fri Feb 07 18:35:02 +0000 2014","id":431858659656990721,"id_str":"431858659656990721","text":"RT @FHubersr: @peteraltmaier //die Schwarz-Grünen werden zeigen, daß sich Ökologie und Ökonomie vertragen und kein Widerspruch sind.Mutig v…","source":"<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":378693834,"id_str":"378693834"},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Fri Feb 07 18:32:30 +0000 2014","id":431858022366064640,"id_str":"431858022366064640","text":"@peteraltmaier //die Schwarz-Grünen werden zeigen, daß sich Ökologie und Ökonomie vertragen und kein Widerspruch sind.Mutig von bd. Seiten������������","source":"<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>","truncated":false,"in_reply_to_status_id":431845492579123201,"in_reply_to_status_id_str":"431845492579123201","in_reply_to_user_id":378693834,"in_reply_to_user_id_str":"378693834","in_reply_to_screen_name":"peteraltmaier","user":{"id":2172292811,"id_str":"2172292811"},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweet_count":3,"favorite_count":4,"favorited":false,"retweeted":false,"lang":"de"},"retweet_count":3,"favorite_count":0,"favorited":false,"retweeted":false,"lang":"de"}]
				`@@ -1 +0,0 @@`
				[{"created_at":"Fri Feb 07 18:35:02 +0000 2014","id":431858659656990721,"id_str":"431858659656990721","text":"RT @FHubersr: @peteraltmaier //die Schwarz-Grünen werden zeigen, daß sich Ökologie und Ökonomie vertragen und kein Widerspruch sind.Mutig v…","source":"<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":378693834,"id_str":"378693834"},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Fri Feb 07 18:32:30 +0000 2014","id":431858022366064640,"id_str":"431858022366064640","text":"@peteraltmaier //die Schwarz-Grünen werden zeigen, daß sich Ökologie und Ökonomie vertragen und kein Widerspruch sind.Mutig von bd. Seiten��","source":"<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>","truncated":false,"in_reply_to_status_id":431845492579123201,"in_reply_to_status_id_str":"431845492579123201","in_reply_to_user_id":378693834,"in_reply_to_user_id_str":"378693834","in_reply_to_screen_name":"peteraltmaier","user":{"id":2172292811,"id_str":"2172292811"},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweet_count":3,"favorite_count":4,"favorited":false,"retweeted":false,"lang":"de"},"retweet_count":3,"favorite_count":0,"favorited":false,"retweeted":false,"lang":"de"}]