status <- str_detect(tweets_full$created_at[r], "2013$") if(is.na(status)) { status <- FALSE } if(status) { tweets_full <- tweets_full[-r,] } } break } else { max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1) loop <- loop + 1 } } View(tweets_full) api_params <- c( "oauth_consumer_key" = "c9Ob2fWNSONMC0mA2JlNaeRke", "oauth_nonce" = NA, "oauth_signature_method" = "HMAC-SHA1", "oauth_timestamp" = NA, "oauth_token" = "1007025684-RFxCDFc4OPkt02bASmdci00TB4jgaPjfqxLRT58", "oauth_version" = "1.0", "consumer_secret" = "cZ3Il2hmbLgK0Lc57mj5kUvymjVdsmZKYwKOGHR3NhCpvWgEOI", "oauth_token_secret" = "rvfv8MgexFKTqrPNSoGrdrZVNhV4fTJb2Bgz249nbvKNg" ) api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; user <- "peteraltmaier" max_count <- "10" max_id <- "454359322768990208" loop <- 1 keep <- c("created_at", "id_str", "text", "retweet_count") rm(tweets_full, tweets_temp) repeat { query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false", screen_name=user, count=max_count, max_id=max_id); if(exists("tweets_full")) { tweets_temp <- fromJSON(correctJSON(twitter_api_call(api_url, query, api_params))) tweets_temp <- tweets_temp[keep] tweets_full <- insertRow(tweets_full, tweets_temp) } else { tweets_full <- fromJSON(correctJSON(twitter_api_call(api_url, query, api_params))) tweets_full <- tweets_full[keep] } status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$") if (status) { rm(tweets_temp) for(r in 1:nrow(tweets_full)) { status <- str_detect(tweets_full$created_at[r], "2013$") if(is.na(status)) { status <- FALSE } if(status) { tweets_full <- tweets_full[-r,] } } break } else { max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1) loop <- loop + 1 } } View(tweets_full) api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; user <- "peteraltmaier" max_count <- "200" max_id <- "999999999999999999" loop <- 1 keep <- c("created_at", "id_str", "text", "retweet_count") rm(tweets_full, tweets_temp) repeat { query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false", screen_name=user, count=max_count, max_id=max_id); if(exists("tweets_full")) { tweets_temp <- fromJSON(correctJSON(twitter_api_call(api_url, query, api_params))) tweets_temp <- tweets_temp[keep] tweets_full <- insertRow(tweets_full, tweets_temp) } else { tweets_full <- fromJSON(correctJSON(twitter_api_call(api_url, query, api_params))) tweets_full <- tweets_full[keep] } status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$") if (status) { rm(tweets_temp) for(r in 1:nrow(tweets_full)) { status <- str_detect(tweets_full$created_at[r], "2013$") if(is.na(status)) { status <- FALSE } if(status) { tweets_full <- tweets_full[-r,] } } break } else { max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1) loop <- loop + 1 } } tweets_temp <- fromJSON(correctJSON(twitter_api_call(api_url, query, api_params))) current <- twitter_api_call(api_url, query, api_params) correctJSON(current) api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; user <- "peteraltmaier" max_count <- "200" max_id <- "999999999999999999" loop <- 1 keep <- c("created_at", "id_str", "text", "retweet_count") rm(tweets_full, tweets_temp) repeat { query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false", screen_name=user, count=max_count, max_id=max_id); if(exists("tweets_full")) { current <- twitter_api_call(api_url, query, api_params) tweets_temp <- fromJSON(correctJSON(current)) tweets_temp <- tweets_temp[keep] tweets_full <- insertRow(tweets_full, tweets_temp) } else { current <- twitter_api_call(api_url, query, api_params) tweets_full <- fromJSON(correctJSON(current)) tweets_full <- tweets_full[keep] } status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$") if (status) { rm(tweets_temp) for(r in 1:nrow(tweets_full)) { status <- str_detect(tweets_full$created_at[r], "2013$") if(is.na(status)) { status <- FALSE } if(status) { tweets_full <- tweets_full[-r,] } } break } else { max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1) loop <- loop + 1 } } current string <- current string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ") string <- str_replace_all(string, pattern = fixed("\n"), replacement = " ") str_replace(string, pattern = fixed("\n"), replacement = "X-X-X-X") str_replace_all(string, pattern = fixed("\n"), replacement = "X-X-X-X") api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; user <- "peteraltmaier" max_count <- "10" max_id <- "999999999999999999" loop <- 1 keep <- c("created_at", "id_str", "text", "retweet_count") rm(tweets_full, tweets_temp) repeat { query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false", screen_name=user, count=max_count, max_id=max_id); if(exists("tweets_full")) { current <- twitter_api_call(api_url, query, api_params) tweets_temp <- fromJSON(correctJSON(current)) tweets_temp <- tweets_temp[keep] tweets_full <- insertRow(tweets_full, tweets_temp) } else { current <- twitter_api_call(api_url, query, api_params) tweets_full <- fromJSON(correctJSON(current)) tweets_full <- tweets_full[keep] } status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$") if (status) { rm(tweets_temp) for(r in 1:nrow(tweets_full)) { status <- str_detect(tweets_full$created_at[r], "2013$") if(is.na(status)) { status <- FALSE } if(status) { tweets_full <- tweets_full[-r,] } } break } else { max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1) loop <- loop + 1 } } View(tweets_full) View(tweets_temp) api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; user <- "peteraltmaier" max_count <- "20" max_id <- "999999999999999999" loop <- 1 keep <- c("created_at", "id_str", "text", "retweet_count") rm(tweets_full, tweets_temp) repeat { query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false", screen_name=user, count=max_count, max_id=max_id); if(exists("tweets_full")) { current <- twitter_api_call(api_url, query, api_params) tweets_temp <- fromJSON(correctJSON(current)) tweets_temp <- tweets_temp[keep] tweets_full <- insertRow(tweets_full, tweets_temp) } else { current <- twitter_api_call(api_url, query, api_params) tweets_full <- fromJSON(correctJSON(current)) tweets_full <- tweets_full[keep] } status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$") if (status) { rm(tweets_temp) for(r in 1:nrow(tweets_full)) { status <- str_detect(tweets_full$created_at[r], "2013$") if(is.na(status)) { status <- FALSE } if(status) { tweets_full <- tweets_full[-r,] } } break } else { max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1) loop <- loop + 1 } } View(tweets_full) api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; user <- "peteraltmaier" max_count <- "10" max_id <- "999999999999999999" loop <- 1 keep <- c("created_at", "id_str", "text", "retweet_count") rm(tweets_full, tweets_temp) repeat { query <- c(include_rts=1, exclude_replies="false", trim_user="true", include_entities="false", screen_name=user, count=max_count, max_id=max_id); if(exists("tweets_full")) { current <- twitter_api_call(api_url, query, api_params) tweets_temp <- fromJSON(correctJSON(current)) tweets_temp <- tweets_temp[keep] tweets_full <- insertRow(tweets_full, tweets_temp) } else { current <- twitter_api_call(api_url, query, api_params) tweets_full <- fromJSON(correctJSON(current)) tweets_full <- tweets_full[keep] } status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$") if (status) { rm(tweets_temp) for(r in 1:nrow(tweets_full)) { status <- str_detect(tweets_full$created_at[r], "2013$") if(is.na(status)) { status <- FALSE } if(status) { tweets_full <- tweets_full[-r,] } } break } else { max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1) loop <- loop + 1 } } View(tweets_full) api_params <- c( "oauth_consumer_key" = "c9Ob2fWNSONMC0mA2JlNaeRke", "oauth_nonce" = NA, "oauth_signature_method" = "HMAC-SHA1", "oauth_timestamp" = NA, "oauth_token" = "1007025684-RFxCDFc4OPkt02bASmdci00TB4jgaPjfqxLRT58", "oauth_version" = "1.0", "consumer_secret" = "cZ3Il2hmbLgK0Lc57mj5kUvymjVdsmZKYwKOGHR3NhCpvWgEOI", "oauth_token_secret" = "rvfv8MgexFKTqrPNSoGrdrZVNhV4fTJb2Bgz249nbvKNg" ) api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; user <- "peteraltmaier" max_count <- "10" max_id <- "999999999999999999" loop <- 1 keep <- c("created_at", "id_str", "text", "retweet_count") rm(tweets_full, tweets_temp) repeat { query <- c(include_rts=1, exclude_replies="false", trim_user="true", include_entities="false", screen_name=user, count=max_count, max_id=max_id); if(exists("tweets_full")) { current <- twitter_api_call(api_url, query, api_params) tweets_temp <- fromJSON(correctJSON(current)) tweets_temp <- tweets_temp[keep] tweets_full <- insertRow(tweets_full, tweets_temp) } else { current <- twitter_api_call(api_url, query, api_params) tweets_full <- fromJSON(correctJSON(current)) tweets_full <- tweets_full[keep] } status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$") if (status) { rm(tweets_temp) for(r in 1:nrow(tweets_full)) { status <- str_detect(tweets_full$created_at[r], "2013$") if(is.na(status)) { status <- FALSE } if(status) { tweets_full <- tweets_full[-r,] } } break } else { max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1) loop <- loop + 1 } } current correctJSON(current) validate(current) string <- current str_replace_all(string, pattern = fixed("\n"), replacement = " ") current fromJSON(current) str_replace_all(string, pattern = fixed("\n"), replacement = " ") str_replace_all(string, pattern = fixed("ewifjweaoegj"), replacement = " ") str_detect(string, pattern = fixed("ewifjweaoegj")) str_detect(string, pattern = fixed("ewifjweaoegj")) str_detect(string, pattern = fixed("wfwe")) str_detect(string, pattern = fixed("peter")) str_detect(string, pattern = "kefme") str_detect(string, pattern = "\n") str_detect(string, pattern = "\\n") string <- str_replace_all(string, pattern = fixed("\n"), replacement = " ") string <- "bla bla \nwoqdwqowd" string str_replace_all(string, pattern = fixed("\n"), replacement = " ") string <- str_replace_all(string, pattern = "\n", replacement = " ") string correctJSON <- function(string) { string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ") string <- str_replace_all(string, pattern = "\n", replacement = " ") string <- str_replace_all(string, pattern = "\r", replacement = " ") string <- str_replace_all(string, pattern = fixed("^"), replacement = " ") return(string) } api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; user <- "peteraltmaier" max_count <- "200" max_id <- "999999999999999999" loop <- 1 keep <- c("created_at", "id_str", "text", "retweet_count") rm(tweets_full, tweets_temp) repeat { query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false", screen_name=user, count=max_count, max_id=max_id); if(exists("tweets_full")) { current <- twitter_api_call(api_url, query, api_params) tweets_temp <- fromJSON(correctJSON(current)) tweets_temp <- tweets_temp[keep] tweets_full <- insertRow(tweets_full, tweets_temp) } else { current <- twitter_api_call(api_url, query, api_params) tweets_full <- fromJSON(correctJSON(current)) tweets_full <- tweets_full[keep] } status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$") if (status) { rm(tweets_temp) for(r in 1:nrow(tweets_full)) { status <- str_detect(tweets_full$created_at[r], "2013$") if(is.na(status)) { status <- FALSE } if(status) { tweets_full <- tweets_full[-r,] } } break } else { max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1) loop <- loop + 1 } } current tweets_temp <- fromJSON(correctJSON(current)) correctJSON(current) string <- current str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ") str_replace_all(string, pattern = "\n", replacement = " ") str_replace_all(string, pattern = "\r", replacement = " ") str_replace_all(string, pattern = fixed("^"), replacement = " ") str_replace_all(string, pattern = "^", replacement = " ") correctJSON <- function(string) { string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ") string <- str_replace_all(string, pattern = "\n", replacement = " ") string <- str_replace_all(string, pattern = "\r", replacement = " ") string <- str_replace_all(string, pattern = "^", replacement = " ") return(string) } api_url <- "https://api.twitter.com/1.1/statuses/user_timeline.json"; user <- "peteraltmaier" max_count <- "200" max_id <- "999999999999999999" loop <- 1 keep <- c("created_at", "id_str", "text", "retweet_count") rm(tweets_full, tweets_temp) repeat { query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false", screen_name=user, count=max_count, max_id=max_id); if(exists("tweets_full")) { current <- twitter_api_call(api_url, query, api_params) tweets_temp <- fromJSON(correctJSON(current)) tweets_temp <- tweets_temp[keep] tweets_full <- insertRow(tweets_full, tweets_temp) } else { current <- twitter_api_call(api_url, query, api_params) tweets_full <- fromJSON(correctJSON(current)) tweets_full <- tweets_full[keep] } status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$") if (status) { rm(tweets_temp) for(r in 1:nrow(tweets_full)) { status <- str_detect(tweets_full$created_at[r], "2013$") if(is.na(status)) { status <- FALSE } if(status) { tweets_full <- tweets_full[-r,] } } break } else { max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1) loop <- loop + 1 } } current str_replace_all(string, pattern = "^", replacement = " ") current correctJSON <- function(string) { string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ") string <- str_replace_all(string, pattern = "\n", replacement = " ") string <- str_replace_all(string, pattern = "\r", replacement = " ") string <- str_replace_all(string, pattern = "^", replacement = " ") return(string) } tweets_temp <- fromJSON(correctJSON(current)) fromJSON(current) validate(correctJSON(current)) current str_extract_all(string, pattern = "\n") str_extract_all(string, pattern = "\xed") str_extract_all(string, pattern = "\[:alnum:]{3}") str_extract_all(string, pattern = "\[[:alnum:]]{3}") str_extract_all(string, pattern = "\\[[:alnum:]]{3}") str_extract_all(string, pattern = "\([[:alnum:]]{3})") str_extract_all(string, pattern = "\\([[:alnum:]]{3})") str_extract_all(string, pattern = "([[:alnum:]]{3})") str_extract_all(string, pattern = "\x..") str_extract_all(string, pattern = "\\x..") str_extract_all(string, pattern = "\x..") str_extract_all(string, pattern = "\xed") str_extract_all(string, pattern = "\x") str_extract_all(string, pattern = "\x+") str_extract_all(string, pattern = "\x.+") str_extract_all(string, pattern = "\x") str_extract_all(string, pattern = "\xhh") str_extract_all(string, pattern = "\xxx") str_extract_all(string, pattern = "[[:xdigit:]]") str_extract_all(string, pattern = "(\xhh)") str_extract_all(string, pattern = "[\xhh]") str_extract_all(string, pattern = "\xHH") str_extract_all(string, pattern = "\xa0") str_extract_all(string, pattern = "\xba?") str_extract_all(string, pattern = "(\xba)+?") str_extract_all(string, pattern = "(\xba)?") str_extract_all(string, pattern = "(\xba)") str_extract_all(string, pattern = "\xba") str_extract_all(string, pattern = "\xba") str_extract_all(string, pattern = "\xed") str_extract_all(string, pattern = "(\xed)") str_extract_all(string, pattern = "(\xed)?") str_extract_all(string, pattern = "(\xed)+") str_extract_all(string, pattern = "(\xed)+?") str_extract_all(string, pattern = "\xed") write(string, "temp.txt") Encoding("temp.txt") readLines("temp.txt") validate(string) string <- readLines("temp.txt") validate(string) string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ") string <- str_replace_all(string, pattern = "\n", replacement = " ") string <- str_replace_all(string, pattern = "\r", replacement = " ") string <- str_replace_all(string, pattern = "^", replacement = " ") validate(string) fromJSON("temp.txt") current fromJSON(current)