Files
uni-imprint-analysis/R-Code/imprintanalyse-functions.R
2014-11-28 18:14:09 +01:00

59 lines
1.8 KiB
R

# THIS FILE CONTAINS ALL FUNCTIONS USED BY imprintanalyse.R
urlCheck <- function(url, i) {
status <- url.exists(url, ssl.verifypeer = FALSE, timeout = 5, followlocation = TRUE, .header = FALSE, curl = handle)
return(status)
}
downloadFile <- function(url, dest, i) {
if (df$ison[i]) {
if (!file.exists(dest)) {
file <- getURLContent(url, ssl.verifypeer = FALSE, timeout = 10, followlocation = TRUE, .encoding="UTF-8", curl = handle)
Sys.sleep(0.2)
write(file, dest)
}
else {
cat(i, "The file", dest, "already exists locally.\n")
}
}
}
downloadContact <- function(url, dest, i) {
if (!file.exists(dest)) {
file <- getURLContent(url, ssl.verifypeer = FALSE, timeout = 10, followlocation = TRUE, .encoding="UTF-8", curl = handle)
Sys.sleep(0.2)
write(file, dest)
}
else {
cat(i, "The file", dest, "already exists locally.\n")
}
}
urlToPath <- function(url, parentfolder, subfolder) {
url <- as.character(url)
parentfolder <- as.character(parentfolder)
subfolder <- as.character(subfolder)
folder <- str_c(parentfolder, "/", subfolder)
dir.create(folder, recursive=TRUE, showWarnings=FALSE)
# use the full domain as a filename. basename() is not suitable
filename <- str_replace(url, "^https?://", "")
filename <- str_replace_all(filename, "/$", "")
filename <- str_replace_all(filename, "/", "_")
filename <- str_c(filename, ".html")
# now write this into the dataframe. We need it later
return(str_c(folder, "/", filename))
}
relToAbsUrl <- function(abslink, rellink) {
abslink <- as.character(abslink)
rellink <- as.character(rellink)
status <- str_detect(rellink, "https?://")
if (!status) {
abslink <- str_c(abslink, "/", rellink)
}
else {
abslink <- rellink
}
return(abslink)
}