# THIS FILE CONTAINS ALL FUNCTIONS USED BY imprintanalyse.R urlCheck <- function(url, i) { status <- url.exists(url, ssl.verifypeer = FALSE, timeout = 5, followlocation = TRUE, .header = FALSE, curl = handle) return(status) } downloadFile <- function(url, dest, i) { if (df$ison[i]) { if (!file.exists(dest)) { file <- getURLContent(url, ssl.verifypeer = FALSE, timeout = 10, followlocation = TRUE, .encoding="UTF-8", curl = handle) Sys.sleep(0.2) write(file, dest) } else { cat(i, "The file", dest, "already exists locally.\n") } } } downloadContact <- function(url, dest, i) { if (!file.exists(dest)) { file <- getURLContent(url, ssl.verifypeer = FALSE, timeout = 10, followlocation = TRUE, .encoding="UTF-8", curl = handle) Sys.sleep(0.2) write(file, dest) } else { cat(i, "The file", dest, "already exists locally.\n") } } urlToPath <- function(url, parentfolder, subfolder) { url <- as.character(url) parentfolder <- as.character(parentfolder) subfolder <- as.character(subfolder) folder <- str_c(parentfolder, "/", subfolder) dir.create(folder, recursive=TRUE, showWarnings=FALSE) # use the full domain as a filename. basename() is not suitable filename <- str_replace(url, "^https?://", "") filename <- str_replace_all(filename, "/$", "") filename <- str_replace_all(filename, "/", "_") filename <- str_c(filename, ".html") # now write this into the dataframe. We need it later return(str_c(folder, "/", filename)) } relToAbsUrl <- function(abslink, rellink) { abslink <- as.character(abslink) rellink <- as.character(rellink) status <- str_detect(rellink, "https?://") if (!status) { abslink <- str_c(abslink, "/", rellink) } else { abslink <- rellink } return(abslink) }