59 lines
1.8 KiB
R
59 lines
1.8 KiB
R
# THIS FILE CONTAINS ALL FUNCTIONS USED BY imprintanalyse.R
|
|
|
|
urlCheck <- function(url, i) {
|
|
status <- url.exists(url, ssl.verifypeer = FALSE, timeout = 5, followlocation = TRUE, .header = FALSE, curl = handle)
|
|
return(status)
|
|
}
|
|
|
|
downloadFile <- function(url, dest, i) {
|
|
if (df$ison[i]) {
|
|
if (!file.exists(dest)) {
|
|
file <- getURLContent(url, ssl.verifypeer = FALSE, timeout = 10, followlocation = TRUE, .encoding="UTF-8", curl = handle)
|
|
Sys.sleep(0.2)
|
|
write(file, dest)
|
|
}
|
|
else {
|
|
cat(i, "The file", dest, "already exists locally.\n")
|
|
}
|
|
}
|
|
}
|
|
|
|
downloadContact <- function(url, dest, i) {
|
|
if (!file.exists(dest)) {
|
|
file <- getURLContent(url, ssl.verifypeer = FALSE, timeout = 10, followlocation = TRUE, .encoding="UTF-8", curl = handle)
|
|
Sys.sleep(0.2)
|
|
write(file, dest)
|
|
}
|
|
else {
|
|
cat(i, "The file", dest, "already exists locally.\n")
|
|
}
|
|
}
|
|
|
|
|
|
urlToPath <- function(url, parentfolder, subfolder) {
|
|
url <- as.character(url)
|
|
parentfolder <- as.character(parentfolder)
|
|
subfolder <- as.character(subfolder)
|
|
folder <- str_c(parentfolder, "/", subfolder)
|
|
dir.create(folder, recursive=TRUE, showWarnings=FALSE)
|
|
# use the full domain as a filename. basename() is not suitable
|
|
filename <- str_replace(url, "^https?://", "")
|
|
filename <- str_replace_all(filename, "/$", "")
|
|
filename <- str_replace_all(filename, "/", "_")
|
|
filename <- str_c(filename, ".html")
|
|
# now write this into the dataframe. We need it later
|
|
return(str_c(folder, "/", filename))
|
|
}
|
|
|
|
relToAbsUrl <- function(abslink, rellink) {
|
|
abslink <- as.character(abslink)
|
|
rellink <- as.character(rellink)
|
|
status <- str_detect(rellink, "https?://")
|
|
if (!status) {
|
|
abslink <- str_c(abslink, "/", rellink)
|
|
}
|
|
else {
|
|
abslink <- rellink
|
|
}
|
|
return(abslink)
|
|
} |