still have to fix JSON validation errors

2014-11-30 03:41:23 +01:00
parent c2190c7060
commit 0be709ff08
5 changed files with 566 additions and 237 deletions
@@ -1,142 +1,18 @@
-install.packages("jsonlite")
-install.packages(c("BH", "dplyr", "Lahman", "magrittr", "maps", "sp", "xtable"))
-setwd("/home/max/Dokumente/Uni/Aktuell/BA-Arbeit/R-Code")
-setwd("/home/max/Dokumente/Uni/Aktuell/BA-Arbeit/R-Code")
-setwd("~/")
-setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/R-Code")
-ls()
-# Set curl handle for friendly scraping
-handle <- getCurlHandle(httpheader = list(from         = "max.mehl@uni.kn",
-'user-agent' = str_c(R.version$version.string)
-)
-)
-source("functions.R")
-handle <- getCurlHandle(httpheader = list(from         = "max.mehl@uni.kn",
-'user-agent' = str_c(R.version$version.string)
-)
-)
-require(stringr)
-require(RCurl)
-handle <- getCurlHandle(httpheader = list(from         = "max.mehl@uni.kn",
-'user-agent' = str_c(R.version$version.string)
-)
-)
-acc_url <- "http://www.bundestwitter.de/api/politiker"
-source("functions.R")
-require(jsonlite)
-fromJSON(acc)
-fromJSON(acc_url)
-require(httr)
-install.packages("httr")
-require(httr)
-fromJSON(acc_url)
-df <- fromJSON(acc_url)
-View(df)
-require(jsonlite)
-require(stringr)
-require(RCurl)
-require(httr)
-source("functions.R")
-setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/R-Code")
-# Set curl handle for friendly scraping
-handle <- getCurlHandle(httpheader = list(from         = "max.mehl@uni.kn",
-'user-agent' = str_c(R.version$version.string)
-)
-)
-acc_url <- "http://www.bundestwitter.de/api/politiker"
-df <- fromJSON(acc_url)
-setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
-View(df)
-acc_df <- fromJSON(acc_url)
-rm df
-rm(df)
-View(acc_df)
-install.packages("twitteR")
-require(twitteR)
-userTimeline(user = "peteraltmaier", n = "20")
-read("twitter-api-keys.txt")
-read.csv(file = "twitter-api-keys.txt", header = TRUE, sep = ";")
-read.csv(file = "twitter-api-keys.txt", header = TRUE, sep = ";")
-read.csv(file = "twitter-api-keys.txt", header = TRUE, sep = ";")
-oauth <- read.csv(file = "twitter-api-keys.txt", header = TRUE, sep = ";")
-View(oauth)
-getTwitterOAuth(oauth$ConsumerKey, oauth$ConsumerSecret)
-readLines("twitter-api-consumerkey.txt")
-oauthKey <- readLines("twitter-api-consumerkey.txt")
-oauthSec <- readLines("twitter-api-consumersecret.txt")
-rm(oauth)
-getTwitterOAuth(oauthKey, oauthSec)
-getTwitterOAuth(oauthKey, oauthSec)
-reqURL <- "https://api.twitter.com/oauth/request_token"
-accessURL <- "https://api.twitter.com/oauth/access_token"
-authURL <- "https://api.twitter.com/oauth/authorize"
-consumerKey <- readLines("twitter-api-consumerkey.txt")
-consumerSecret <- readLines("twitter-api-consumersecret.txt")
-twitCred <- OAuthFactory$new(consumerKey=consumerKey,
-consumerSecret=consumerSecret,
-requestURL=reqURL,
-accessURL=accessURL,
-authURL=authURL)
-twitCred$handshake()
-rm(list=ls())
-require(jsonlite)
-require(stringr)
-require(RCurl)
-require(httr)
-require(twitteR)
-source("functions.R")
-setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
-# Set curl handle for friendly scraping
-handle <- getCurlHandle(httpheader = list(from         = "max.mehl@uni.kn",
-'user-agent' = str_c(R.version$version.string)
-)
-)
-acc_url <- "http://www.bundestwitter.de/api/politiker"
-acc_df <- fromJSON(acc_url)
-# --------------
-twAPI_reqURL <- "https://api.twitter.com/oauth/request_token"
-twAPI_accessURL <- "https://api.twitter.com/oauth/access_token"
-twAPI_authURL <- "https://api.twitter.com/oauth/authorize"
-twAPI_consumerKey <- readLines("twitter-api-consumerkey.txt")
-twAPI_consumerSecret <- readLines("twitter-api-consumersecret.txt")
-twAPI_twitCred <- OAuthFactory$new(consumerKey=consumerKey,
-consumerSecret=consumerSecret,
-requestURL=reqURL,
-accessURL=accessURL,
-authURL=authURL)
-twAPI_twitCred$handshake()
-registerTwitterOAuth(twAPI_twitCred)
-twAPI_reqURL <- "https://api.twitter.com/oauth/request_token"
-twAPI_accessURL <- "https://api.twitter.com/oauth/access_token"
-twAPI_authURL <- "https://api.twitter.com/oauth/authorize"
-twAPI_consumerKey <- readLines("twitter-api-consumerkey.txt")
-twAPI_consumerSecret <- readLines("twitter-api-consumersecret.txt")
-twAPI_twitCred <- OAuthFactory$new(consumerKey=twAPI_consumerKey,
-consumerSecret=twAPI_consumerSecret,
-requestURL=twAPI_reqURL,
-accessURL=twAPI_accessURL,
-authURL=twAPI_authURL)
-twAPI_twitCred$handshake()
-twAPI_twitCred$handshake()
-registerTwitterOAuth(twAPI_twitCred)
-registerTwitterOAuth(twAPI_twitCred)
-userTimeline(user = "peteraltmaier", n = "20")
-userTimeline(user = "peteraltmaier", n = "20", includeRts = TRUE)
-showStatus("530033823577550848")
-getUser("peteraltmaier")
-getUser("mxmehl")
-lookupUsers("mxmehl")
-userTimeline(user = "peteraltmaier", n = "20", includeRts = TRUE)
-wd
-userTimeline(user = "peteraltmaier", n = "20", includeRts = TRUE)
-userTimeline(user = "peteraltmaier", n = "20", includeRts = TRUE)
-lookupUsers("mxmehl")
-lookupUsers("mxmehl")
-install.packages("devtools")
-require(devtools)
-devtools::install_github("joyofdata/RTwitterAPI")
-require(RTwitterAPI)
-arams <- c(
+status <- str_detect(tweets_full$created_at[r], "2013$")
+if(is.na(status)) { status <- FALSE }
+if(status) {
+tweets_full <- tweets_full[-r,]
+}
+}
+break
+}
+else {
+max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+loop <- loop + 1
+}
+}
+View(tweets_full)
+api_params <- c(
 "oauth_consumer_key"     = "c9Ob2fWNSONMC0mA2JlNaeRke",
 "oauth_nonce"            = NA,
 "oauth_signature_method" = "HMAC-SHA1",
@@ -145,9 +21,258 @@ arams <- c(
 "oauth_version"          = "1.0",
 "consumer_secret"        = "cZ3Il2hmbLgK0Lc57mj5kUvymjVdsmZKYwKOGHR3NhCpvWgEOI",
 "oauth_token_secret"     = "rvfv8MgexFKTqrPNSoGrdrZVNhV4fTJb2Bgz249nbvKNg"
-);
-rm(arams)
-params <- c(
+)
+api_url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+user <- "peteraltmaier"
+max_count <- "10"
+max_id <- "454359322768990208"
+loop <- 1
+keep <- c("created_at", "id_str", "text", "retweet_count")
+rm(tweets_full, tweets_temp)
+repeat {
+query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+if(exists("tweets_full")) {
+tweets_temp <- fromJSON(correctJSON(twitter_api_call(api_url, query, api_params)))
+tweets_temp <- tweets_temp[keep]
+tweets_full <- insertRow(tweets_full, tweets_temp)
+}
+else {
+tweets_full <- fromJSON(correctJSON(twitter_api_call(api_url, query, api_params)))
+tweets_full <- tweets_full[keep]
+}
+status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
+if (status) {
+rm(tweets_temp)
+for(r in 1:nrow(tweets_full)) {
+status <- str_detect(tweets_full$created_at[r], "2013$")
+if(is.na(status)) { status <- FALSE }
+if(status) {
+tweets_full <- tweets_full[-r,]
+}
+}
+break
+}
+else {
+max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+loop <- loop + 1
+}
+}
+View(tweets_full)
+api_url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+user <- "peteraltmaier"
+max_count <- "200"
+max_id <- "999999999999999999"
+loop <- 1
+keep <- c("created_at", "id_str", "text", "retweet_count")
+rm(tweets_full, tweets_temp)
+repeat {
+query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+if(exists("tweets_full")) {
+tweets_temp <- fromJSON(correctJSON(twitter_api_call(api_url, query, api_params)))
+tweets_temp <- tweets_temp[keep]
+tweets_full <- insertRow(tweets_full, tweets_temp)
+}
+else {
+tweets_full <- fromJSON(correctJSON(twitter_api_call(api_url, query, api_params)))
+tweets_full <- tweets_full[keep]
+}
+status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
+if (status) {
+rm(tweets_temp)
+for(r in 1:nrow(tweets_full)) {
+status <- str_detect(tweets_full$created_at[r], "2013$")
+if(is.na(status)) { status <- FALSE }
+if(status) {
+tweets_full <- tweets_full[-r,]
+}
+}
+break
+}
+else {
+max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+loop <- loop + 1
+}
+}
+tweets_temp <- fromJSON(correctJSON(twitter_api_call(api_url, query, api_params)))
+current <- twitter_api_call(api_url, query, api_params)
+correctJSON(current)
+api_url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+user <- "peteraltmaier"
+max_count <- "200"
+max_id <- "999999999999999999"
+loop <- 1
+keep <- c("created_at", "id_str", "text", "retweet_count")
+rm(tweets_full, tweets_temp)
+repeat {
+query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+if(exists("tweets_full")) {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_temp <- fromJSON(correctJSON(current))
+tweets_temp <- tweets_temp[keep]
+tweets_full <- insertRow(tweets_full, tweets_temp)
+}
+else {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_full <- fromJSON(correctJSON(current))
+tweets_full <- tweets_full[keep]
+}
+status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
+if (status) {
+rm(tweets_temp)
+for(r in 1:nrow(tweets_full)) {
+status <- str_detect(tweets_full$created_at[r], "2013$")
+if(is.na(status)) { status <- FALSE }
+if(status) {
+tweets_full <- tweets_full[-r,]
+}
+}
+break
+}
+else {
+max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+loop <- loop + 1
+}
+}
+current
+string <- current
+string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
+string <- str_replace_all(string, pattern = fixed("\n"), replacement = " ")
+str_replace(string, pattern = fixed("\n"), replacement = "X-X-X-X")
+str_replace_all(string, pattern = fixed("\n"), replacement = "X-X-X-X")
+api_url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+user <- "peteraltmaier"
+max_count <- "10"
+max_id <- "999999999999999999"
+loop <- 1
+keep <- c("created_at", "id_str", "text", "retweet_count")
+rm(tweets_full, tweets_temp)
+repeat {
+query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+if(exists("tweets_full")) {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_temp <- fromJSON(correctJSON(current))
+tweets_temp <- tweets_temp[keep]
+tweets_full <- insertRow(tweets_full, tweets_temp)
+}
+else {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_full <- fromJSON(correctJSON(current))
+tweets_full <- tweets_full[keep]
+}
+status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
+if (status) {
+rm(tweets_temp)
+for(r in 1:nrow(tweets_full)) {
+status <- str_detect(tweets_full$created_at[r], "2013$")
+if(is.na(status)) { status <- FALSE }
+if(status) {
+tweets_full <- tweets_full[-r,]
+}
+}
+break
+}
+else {
+max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+loop <- loop + 1
+}
+}
+View(tweets_full)
+View(tweets_temp)
+api_url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+user <- "peteraltmaier"
+max_count <- "20"
+max_id <- "999999999999999999"
+loop <- 1
+keep <- c("created_at", "id_str", "text", "retweet_count")
+rm(tweets_full, tweets_temp)
+repeat {
+query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+if(exists("tweets_full")) {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_temp <- fromJSON(correctJSON(current))
+tweets_temp <- tweets_temp[keep]
+tweets_full <- insertRow(tweets_full, tweets_temp)
+}
+else {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_full <- fromJSON(correctJSON(current))
+tweets_full <- tweets_full[keep]
+}
+status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
+if (status) {
+rm(tweets_temp)
+for(r in 1:nrow(tweets_full)) {
+status <- str_detect(tweets_full$created_at[r], "2013$")
+if(is.na(status)) { status <- FALSE }
+if(status) {
+tweets_full <- tweets_full[-r,]
+}
+}
+break
+}
+else {
+max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+loop <- loop + 1
+}
+}
+View(tweets_full)
+api_url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+user <- "peteraltmaier"
+max_count <- "10"
+max_id <- "999999999999999999"
+loop <- 1
+keep <- c("created_at", "id_str", "text", "retweet_count")
+rm(tweets_full, tweets_temp)
+repeat {
+query <- c(include_rts=1, exclude_replies="false", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+if(exists("tweets_full")) {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_temp <- fromJSON(correctJSON(current))
+tweets_temp <- tweets_temp[keep]
+tweets_full <- insertRow(tweets_full, tweets_temp)
+}
+else {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_full <- fromJSON(correctJSON(current))
+tweets_full <- tweets_full[keep]
+}
+status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
+if (status) {
+rm(tweets_temp)
+for(r in 1:nrow(tweets_full)) {
+status <- str_detect(tweets_full$created_at[r], "2013$")
+if(is.na(status)) { status <- FALSE }
+if(status) {
+tweets_full <- tweets_full[-r,]
+}
+}
+break
+}
+else {
+max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+loop <- loop + 1
+}
+}
+View(tweets_full)
+api_params <- c(
 "oauth_consumer_key"     = "c9Ob2fWNSONMC0mA2JlNaeRke",
 "oauth_nonce"            = NA,
 "oauth_signature_method" = "HMAC-SHA1",
@@ -156,83 +281,232 @@ params <- c(
 "oauth_version"          = "1.0",
 "consumer_secret"        = "cZ3Il2hmbLgK0Lc57mj5kUvymjVdsmZKYwKOGHR3NhCpvWgEOI",
 "oauth_token_secret"     = "rvfv8MgexFKTqrPNSoGrdrZVNhV4fTJb2Bgz249nbvKNg"
-);
-rm(twAPI_*)
-rm(twAPI_)
-rm(twAPI_accessURL)
-params <- c(
-"oauth_consumer_key"     = "c9Ob2fWNSONMC0mA2JlNaeRke",
-"oauth_nonce"            = NA,
-"oauth_signature_method" = "HMAC-SHA1",
-"oauth_timestamp"        = NA,
-"oauth_token"            = "1007025684-RFxCDFc4OPkt02bASmdci00TB4jgaPjfqxLRT58",
-"oauth_version"          = "1.0",
-"consumer_secret"        = "cZ3Il2hmbLgK0Lc57mj5kUvymjVdsmZKYwKOGHR3NhCpvWgEOI",
-"oauth_token_secret"     = "rvfv8MgexFKTqrPNSoGrdrZVNhV4fTJb2Bgz249nbvKNg"
-);
-url   <- "https://api.twitter.com/1.1/friends/ids.json";
-query <- c(cursor=-1, screen_name="hrw", count=10);
-result <- RTwitterAPI::twitter_api_call(url, query, params)
-result
-url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
-query <- c(screen_name="peteraltmaier", exclude_replies="true", include_rts="true");
-result <- RTwitterAPI::twitter_api_call(url, query, params)
-result
-query <- c(screen_name="peteraltmaier", exclude_replies="true", include_rts="true", count="5");
-result <- fromJSON(RTwitterAPI::twitter_api_call(url, query, params))
-result
-query <- c(screen_name="peteraltmaier", exclude_replies="true", include_rts=1, count="5");
-result <- fromJSON(RTwitterAPI::twitter_api_call(url, query, params))
-query <- c(screen_name="peteraltmaier", exclude_replies="true", include_rts=1, count="1");
-result <- fromJSON(RTwitterAPI::twitter_api_call(url, query, params))
-query <- c(screen_name="peteraltmaier", exclude_replies="true", include_rts=1, count="20");
-result <- fromJSON(RTwitterAPI::twitter_api_call(url, query, params))
-query <- c(screen_name="peteraltmaier", exclude_replies="true", include_rts=1, count="1");
-result <- fromJSON(RTwitterAPI::twitter_api_call(url, query, params))
-result
-result[[1]]
-result[[1]]$created_at
-result[[1]]$id_str
-result[[1]]$text
-require(jsonlite)
-require(stringr)
-require(RCurl)
-require(httr)
-require(twitteR)
-require(devtools)
-require(RTwitterAPI)
-source("functions.R")
-setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
-# Set curl handle for friendly scraping
-handle <- getCurlHandle(httpheader = list(from         = "max.mehl@uni.kn",
-'user-agent' = str_c(R.version$version.string)
 )
-)
-acc_url <- "http://www.bundestwitter.de/api/politiker"
-acc_df <- fromJSON(acc_url)
-require(jsonlite)
-require(stringr)
-require(RCurl)
-require(httr)
-require(twitteR)
-require(devtools)
-require(RTwitterAPI)
-source("functions.R")
-setwd("~/Dokumente/Uni/Aktuell/BA-Arbeit/uni-ba-issuecomp")
-# Set curl handle for friendly scraping
-handle <- getCurlHandle(httpheader = list(from         = "max.mehl@uni.kn",
-'user-agent' = str_c(R.version$version.string)
-)
-)
-acc_url <- "http://www.bundestwitter.de/api/politiker"
-acc_df <- fromJSON(acc_url)
-acc_df <- fromJSON(acc_url)
-acc_url <- "http://www.bundestwitter.de/api/politiker"
-acc_df <- fromJSON(acc_url)
-acc_url <- "http://www.bundestwitter.de/api/politiker"
-acc_df <- fromJSON(acc_url)
-acc_jsonreadLines("politiker.txt")
-acc_json <- readLines("politiker.txt")
-acc_df <- fromJSON(acc_json)
-acc_df$name
-acc_df[[1]]$name
+api_url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+user <- "peteraltmaier"
+max_count <- "10"
+max_id <- "999999999999999999"
+loop <- 1
+keep <- c("created_at", "id_str", "text", "retweet_count")
+rm(tweets_full, tweets_temp)
+repeat {
+query <- c(include_rts=1, exclude_replies="false", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+if(exists("tweets_full")) {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_temp <- fromJSON(correctJSON(current))
+tweets_temp <- tweets_temp[keep]
+tweets_full <- insertRow(tweets_full, tweets_temp)
+}
+else {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_full <- fromJSON(correctJSON(current))
+tweets_full <- tweets_full[keep]
+}
+status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
+if (status) {
+rm(tweets_temp)
+for(r in 1:nrow(tweets_full)) {
+status <- str_detect(tweets_full$created_at[r], "2013$")
+if(is.na(status)) { status <- FALSE }
+if(status) {
+tweets_full <- tweets_full[-r,]
+}
+}
+break
+}
+else {
+max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+loop <- loop + 1
+}
+}
+current
+correctJSON(current)
+validate(current)
+string <- current
+str_replace_all(string, pattern = fixed("\n"), replacement = " ")
+current
+fromJSON(current)
+str_replace_all(string, pattern = fixed("\n"), replacement = " ")
+str_replace_all(string, pattern = fixed("ewifjweaoegj"), replacement = " ")
+str_detect(string, pattern = fixed("ewifjweaoegj"))
+str_detect(string, pattern = fixed("ewifjweaoegj"))
+str_detect(string, pattern = fixed("wfwe"))
+str_detect(string, pattern = fixed("peter"))
+str_detect(string, pattern = "kefme")
+str_detect(string, pattern = "\n")
+str_detect(string, pattern = "\\n")
+string <- str_replace_all(string, pattern = fixed("\n"), replacement = " ")
+string <- "bla bla \nwoqdwqowd"
+string
+str_replace_all(string, pattern = fixed("\n"), replacement = " ")
+string <- str_replace_all(string, pattern = "\n", replacement = " ")
+string
+correctJSON <- function(string) {
+string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
+string <- str_replace_all(string, pattern = "\n", replacement = " ")
+string <- str_replace_all(string, pattern = "\r", replacement = " ")
+string <- str_replace_all(string, pattern = fixed("^"), replacement = " ")
+return(string)
+}
+api_url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+user <- "peteraltmaier"
+max_count <- "200"
+max_id <- "999999999999999999"
+loop <- 1
+keep <- c("created_at", "id_str", "text", "retweet_count")
+rm(tweets_full, tweets_temp)
+repeat {
+query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+if(exists("tweets_full")) {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_temp <- fromJSON(correctJSON(current))
+tweets_temp <- tweets_temp[keep]
+tweets_full <- insertRow(tweets_full, tweets_temp)
+}
+else {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_full <- fromJSON(correctJSON(current))
+tweets_full <- tweets_full[keep]
+}
+status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
+if (status) {
+rm(tweets_temp)
+for(r in 1:nrow(tweets_full)) {
+status <- str_detect(tweets_full$created_at[r], "2013$")
+if(is.na(status)) { status <- FALSE }
+if(status) {
+tweets_full <- tweets_full[-r,]
+}
+}
+break
+}
+else {
+max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+loop <- loop + 1
+}
+}
+current
+tweets_temp <- fromJSON(correctJSON(current))
+correctJSON(current)
+string <- current
+str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
+str_replace_all(string, pattern = "\n", replacement = " ")
+str_replace_all(string, pattern = "\r", replacement = " ")
+str_replace_all(string, pattern = fixed("^"), replacement = " ")
+str_replace_all(string, pattern = "^", replacement = " ")
+correctJSON <- function(string) {
+string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
+string <- str_replace_all(string, pattern = "\n", replacement = " ")
+string <- str_replace_all(string, pattern = "\r", replacement = " ")
+string <- str_replace_all(string, pattern = "^", replacement = " ")
+return(string)
+}
+api_url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+user <- "peteraltmaier"
+max_count <- "200"
+max_id <- "999999999999999999"
+loop <- 1
+keep <- c("created_at", "id_str", "text", "retweet_count")
+rm(tweets_full, tweets_temp)
+repeat {
+query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false",
+screen_name=user,
+count=max_count,
+max_id=max_id);
+if(exists("tweets_full")) {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_temp <- fromJSON(correctJSON(current))
+tweets_temp <- tweets_temp[keep]
+tweets_full <- insertRow(tweets_full, tweets_temp)
+}
+else {
+current <- twitter_api_call(api_url, query, api_params)
+tweets_full <- fromJSON(correctJSON(current))
+tweets_full <- tweets_full[keep]
+}
+status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
+if (status) {
+rm(tweets_temp)
+for(r in 1:nrow(tweets_full)) {
+status <- str_detect(tweets_full$created_at[r], "2013$")
+if(is.na(status)) { status <- FALSE }
+if(status) {
+tweets_full <- tweets_full[-r,]
+}
+}
+break
+}
+else {
+max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+loop <- loop + 1
+}
+}
+current
+str_replace_all(string, pattern = "^", replacement = " ")
+current
+correctJSON <- function(string) {
+string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
+string <- str_replace_all(string, pattern = "\n", replacement = " ")
+string <- str_replace_all(string, pattern = "\r", replacement = " ")
+string <- str_replace_all(string, pattern = "^", replacement = " ")
+return(string)
+}
+tweets_temp <- fromJSON(correctJSON(current))
+fromJSON(current)
+validate(correctJSON(current))
+current
+str_extract_all(string, pattern = "\n")
+str_extract_all(string, pattern = "\xed")
+str_extract_all(string, pattern = "\[:alnum:]{3}")
+str_extract_all(string, pattern = "\[[:alnum:]]{3}")
+str_extract_all(string, pattern = "\\[[:alnum:]]{3}")
+str_extract_all(string, pattern = "\([[:alnum:]]{3})")
+str_extract_all(string, pattern = "\\([[:alnum:]]{3})")
+str_extract_all(string, pattern = "([[:alnum:]]{3})")
+str_extract_all(string, pattern = "\x..")
+str_extract_all(string, pattern = "\\x..")
+str_extract_all(string, pattern = "\x..")
+str_extract_all(string, pattern = "\xed")
+str_extract_all(string, pattern = "\x")
+str_extract_all(string, pattern = "\x+")
+str_extract_all(string, pattern = "\x.+")
+str_extract_all(string, pattern = "\x")
+str_extract_all(string, pattern = "\xhh")
+str_extract_all(string, pattern = "\xxx")
+str_extract_all(string, pattern = "[[:xdigit:]]")
+str_extract_all(string, pattern = "(\xhh)")
+str_extract_all(string, pattern = "[\xhh]")
+str_extract_all(string, pattern = "\xHH")
+str_extract_all(string, pattern = "\xa0")
+str_extract_all(string, pattern = "\xba?")
+str_extract_all(string, pattern = "(\xba)+?")
+str_extract_all(string, pattern = "(\xba)?")
+str_extract_all(string, pattern = "(\xba)")
+str_extract_all(string, pattern = "\xba")
+str_extract_all(string, pattern = "\xba")
+str_extract_all(string, pattern = "\xed")
+str_extract_all(string, pattern = "(\xed)")
+str_extract_all(string, pattern = "(\xed)?")
+str_extract_all(string, pattern = "(\xed)+")
+str_extract_all(string, pattern = "(\xed)+?")
+str_extract_all(string, pattern = "\xed")
+write(string, "temp.txt")
+Encoding("temp.txt")
+readLines("temp.txt")
+validate(string)
+string <- readLines("temp.txt")
+validate(string)
+string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
+string <- str_replace_all(string, pattern = "\n", replacement = " ")
+string <- str_replace_all(string, pattern = "\r", replacement = " ")
+string <- str_replace_all(string, pattern = "^", replacement = " ")
+validate(string)
+fromJSON("temp.txt")
+current
+fromJSON(current)
@@ -15,16 +15,27 @@ handle <- getCurlHandle(httpheader = list(from         = "max.mehl@uni.kn",
 )

 acc_url <- "http://www.bundestwitter.de/api/politiker"
-acc_json <- readLines("politiker.txt")
-acc_df <- fromJSON(acc_json)
+#acc_json <- readLines("politiker.txt")
+acc_df <- fromJSON(acc_url)

 # --------------


+
+
+url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+query <- c(screen_name="mxmehl", include_rts=1, count="200", exclude_replies="true", trim_user="true", include_entities="false");
+
+as.character(as.numeric(result$id_str[nrow(result)]) - 1)
+str_detect(result$created_at[nrow(result)], "2013$")
+
+result <- fromJSON(correctJSON(twitter_api_call(url, query, params)))
+
+# ---------------
 # http://www.joyofdata.de/blog/twitters-rest-api-v1-1-with-r-for-linux-and-windows/
 # devtools::install_github("joyofdata/RTwitterAPI")

-params <- c(
+api_params <- c(
  "oauth_consumer_key"     = "c9Ob2fWNSONMC0mA2JlNaeRke", 
  "oauth_nonce"            = NA,
  "oauth_signature_method" = "HMAC-SHA1",
@@ -33,18 +44,49 @@ params <- c(
  "oauth_version"          = "1.0",
  "consumer_secret"        = "cZ3Il2hmbLgK0Lc57mj5kUvymjVdsmZKYwKOGHR3NhCpvWgEOI",
  "oauth_token_secret"     = "rvfv8MgexFKTqrPNSoGrdrZVNhV4fTJb2Bgz249nbvKNg"
-);
+)

-url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
-query <- c(screen_name="mxmehl", include_rts=1, count="200", exclude_replies="true", trim_user="true", include_entities="false");
-
-
-as.character(as.numeric(result$id_str[nrow(result)]) - 1)
-str_detect(result$created_at[nrow(result)], "2013$")
-
-
-result <- twitter_api_call(url, query, params)
-result <- fromJSON(correctJSON(result))
+api_url   <- "https://api.twitter.com/1.1/statuses/user_timeline.json";
+user <- "peteraltmaier"
+max_count <- "200"
+max_id <- "999999999999999999"
+loop <- 1
+keep <- c("created_at", "id_str", "text", "retweet_count")
+rm(tweets_full, tweets_temp)
+repeat {
+  query <- c(include_rts=1, exclude_replies="true", trim_user="true", include_entities="false", 
+             screen_name=user, 
+             count=max_count, 
+             max_id=max_id);
+  if(exists("tweets_full")) {
+    current <- twitter_api_call(api_url, query, api_params)
+    tweets_temp <- fromJSON(correctJSON(current))
+    tweets_temp <- tweets_temp[keep]
+    tweets_full <- insertRow(tweets_full, tweets_temp)
+  }
+  else {
+    current <- twitter_api_call(api_url, query, api_params)
+    tweets_full <- fromJSON(correctJSON(current))
+    tweets_full <- tweets_full[keep]
+  }
+  
+  status <- str_detect(tweets_full$created_at[nrow(tweets_full)], "2013$")
+  if (status) {
+    rm(tweets_temp)
+    for(r in 1:nrow(tweets_full)) {
+      status <- str_detect(tweets_full$created_at[r], "2013$")
+      if(is.na(status)) { status <- FALSE }
+      if(status) {
+        tweets_full <- tweets_full[-r,]
+      }
+    }
+    break
+  }
+  else {
+    max_id <- as.character(as.numeric(tweets_full$id_str[nrow(tweets_full)]) - 1)
+    loop <- loop + 1
+  }
+}

 # ---------------

@@ -3,7 +3,17 @@ require(stringr)
 # Replace characters messing up JSON validation (\,\n,^)
 correctJSON <- function(string) {
  string <- str_replace_all(string, pattern = perl('\\\\(?![tn"])'), replacement = " ")
-  string <- str_replace_all(string, pattern = fixed("\n"), replacement = " ")
-  string <- str_replace_all(string, pattern = fixed("^"), replacement = " ")
+  string <- str_replace_all(string, pattern = "\n", replacement = " ")
+  string <- str_replace_all(string, pattern = "\r", replacement = " ")
+  string <- str_replace_all(string, pattern = "^", replacement = " ")
+  \xed\xa0\xbd\xed\xb1\x8d\xed\xa0\xbd\xed\xb8\x8e\
  return(string)
+}
+
+insertRow <- function(existingDF, newrow, r) {
+  r <- as.numeric(nrow(existingDF)) + 1
+  existingDF <- rbind(existingDF,newrow)
+  existingDF <- existingDF[order(c(1:(nrow(existingDF)-1),r-0.5)),]
+  row.names(existingDF) <- 1:nrow(existingDF)
+  return(existingDF)  
 }