initial commit v1.0
This commit is contained in:
59
R-Code/imprintanalyse-functions.R
Normal file
59
R-Code/imprintanalyse-functions.R
Normal file
@@ -0,0 +1,59 @@
|
||||
# THIS FILE CONTAINS ALL FUNCTIONS USED BY imprintanalyse.R
|
||||
|
||||
urlCheck <- function(url, i) {
|
||||
status <- url.exists(url, ssl.verifypeer = FALSE, timeout = 5, followlocation = TRUE, .header = FALSE, curl = handle)
|
||||
return(status)
|
||||
}
|
||||
|
||||
downloadFile <- function(url, dest, i) {
|
||||
if (df$ison[i]) {
|
||||
if (!file.exists(dest)) {
|
||||
file <- getURLContent(url, ssl.verifypeer = FALSE, timeout = 10, followlocation = TRUE, .encoding="UTF-8", curl = handle)
|
||||
Sys.sleep(0.2)
|
||||
write(file, dest)
|
||||
}
|
||||
else {
|
||||
cat(i, "The file", dest, "already exists locally.\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
downloadContact <- function(url, dest, i) {
|
||||
if (!file.exists(dest)) {
|
||||
file <- getURLContent(url, ssl.verifypeer = FALSE, timeout = 10, followlocation = TRUE, .encoding="UTF-8", curl = handle)
|
||||
Sys.sleep(0.2)
|
||||
write(file, dest)
|
||||
}
|
||||
else {
|
||||
cat(i, "The file", dest, "already exists locally.\n")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
urlToPath <- function(url, parentfolder, subfolder) {
|
||||
url <- as.character(url)
|
||||
parentfolder <- as.character(parentfolder)
|
||||
subfolder <- as.character(subfolder)
|
||||
folder <- str_c(parentfolder, "/", subfolder)
|
||||
dir.create(folder, recursive=TRUE, showWarnings=FALSE)
|
||||
# use the full domain as a filename. basename() is not suitable
|
||||
filename <- str_replace(url, "^https?://", "")
|
||||
filename <- str_replace_all(filename, "/$", "")
|
||||
filename <- str_replace_all(filename, "/", "_")
|
||||
filename <- str_c(filename, ".html")
|
||||
# now write this into the dataframe. We need it later
|
||||
return(str_c(folder, "/", filename))
|
||||
}
|
||||
|
||||
relToAbsUrl <- function(abslink, rellink) {
|
||||
abslink <- as.character(abslink)
|
||||
rellink <- as.character(rellink)
|
||||
status <- str_detect(rellink, "https?://")
|
||||
if (!status) {
|
||||
abslink <- str_c(abslink, "/", rellink)
|
||||
}
|
||||
else {
|
||||
abslink <- rellink
|
||||
}
|
||||
return(abslink)
|
||||
}
|
||||
Reference in New Issue
Block a user