Enables splitted downloading of huge files with MD5 comparison and other features
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

459 lines
12 KiB

#!/bin/bash
########################################################################
# Copyright (C) 2015 Max Mehl <mail@mehl.mx>
########################################################################
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
#
# This script enabled downloading huge files with unstable and slow
# internet connections by splitting files and always checking their
# hashsums. By this, file integrity can be ensured without big data loss
#
# Please read my blog post about this application:
# http://blog.mehl.mx/2015/splitdl-downloading-huge-files-from-slow-and-unstable-internet-connections/
#
########################################################################
SPLITSIZE="10M" # Desired size of splitted files
INFO="info.cfg" # Desired name of document containing information
CHECKSUM="md5sum" # Define application to create hashsums
WGETOPT="-nv --show-progress" # Define options for wget. The default value is good for wget 1.16.x
FILE=""
MODE=""
function help {
echo "This program is used to split large files into several small
files to avoid data loss and the need to re-download them from
slow or unstable internet connections."
echo
echo "Written by Max Mehl <mail@mehl.mx> in 2015."
echo
echo "Necessary arguments to start this application:"
echo "-m Desired MODE. Possible values are \"server\" or \"client\""
echo " \"server\" splits the file on the server and needs a file for -f."
echo " \"client\" downloads the files and needs a download link for -f."
echo "-f File in the same directory to be splitted or link to be downloaded."
echo
echo "Optional arguments:"
echo "-s Define the desired size of splitted files. Default is $SPLITSIZE"
echo "-i Define the desired name of the info document.
Default is $INFO"
echo "-c Define the program which should be used to calculate the hashsum.
Default is $CHECKSUM"
echo "-w Define the options for wget.
Default is $WGETOPT"
echo
echo "Examples:"
echo "Split Hugefile.iso in parts of 50MB and use sha1sum as hashing program."
echo " split-dl -m server -f Hugefile.iso -s 50M -c sha1sum"
echo
echo "Download the splitted files of Hugefile.iso.
The hashing program has to be the same as on the server side."
echo " split-dl -m client -f http://example.com/dl-Hugefile.iso -c sha1sum"
}
while getopts h:s:i:c:w:f:m: opt; do
case $opt in
h)
help
exit 0
;;
s)
SPLITSIZE=$OPTARG
;;
i)
INFO=$OPTARG
;;
c)
CHECKSUM=$OPTARG
;;
w)
WGETOPT=$OPTARG
;;
f)
FILE=$OPTARG
;;
m)
MODE=$OPTARG
;;
esac
done
# DO NOT EDIT BELOW HERE
# ----------------------
# # # # # # # # # # # # # # # # # # # # # # # # #
# FUNCTIONS
# # # #
function checkwait {
read -p "Continue? Press Ctrl+C to cancel." END
}
function gethash {
md5sum $1 | awk -F" " '{ print $1 }'
}
function comphash {
HASH1=$(gethash $1)
HASH2=$2
if [ "$HASH1" == "$HASH2" ]; then
echo "true"
else
echo "false"
fi
}
function getdu {
DU=$(du -b $1 | awk -F" " '{ print $1 }')
if [ -d $1 ] && [ "$BLOCK" != "" ]; then
# Reduce the size of an directory by the block size
# $BLOCK is defined in the one-time-run section of this script
echo $(($DU - $BLOCK))
else
echo $DU
fi
}
function compdu {
DU1=$(getdu $1)
DU2=$2
if [ "$DU1" == "$DU2" ]; then
echo "true"
else
echo "false"
fi
}
function compall {
FILE=$1
SIZE=$2
HASH=$3
if $(compdu $FILE $SIZE) && $(comphash $FILE $HASH); then
echo "true"
else
echo "false"
fi
}
function checkfolder {
FOLDER=$1
# Check if folder already exists. If not, create it
if [ -d $FOLDER ]; then
read -p "Destination folder \"$FOLDER\" already exists. Should it be emptied? [y/N]: " YN
if [ "$YN" == "y" ]; then
rm -rf $FOLDER
mkdir $FOLDER
else
if [ "$MODE" == "server" ]; then
echo "In server-mode, this isn't recommended. Aborting."
exit 1
fi
fi
elif [ -e $FOLDER ] && [ ! -d $FOLDER ]; then
echo "Destination \"$FOLDER\" already exists but is not a folder. Please check."
exit 1
else
mkdir $FOLDER
fi
}
# # # # # # # # # # # # # # # # # # # # # # # # #
# ONE-TIME-ACTIONS
# # # #
# Check if -m and -f are given
if [ "$FILE" == "" ] || [ "$MODE" == "" ]; then
echo "Missing arguments! Please define the mode with -m and the file/link with -f."
echo
help
exit 1
#else
#echo "$MODE $SPLITSIZE $INFO $FILE $CHECKSUM"
fi
# Check size in bytes of an empty directory
TMP=$(mktemp -d)
BLOCK=$(getdu $TMP)
rm -r $TMP
# # # # # # # # # # # # # # # # # # # # # # # # #
# SERVER MODE
# # # #
if [ $MODE == "server" ]; then
# Check if file has spaces
if [ $(echo "$FILE" | grep -q " "; echo $?) == "0" ]; then
read -p "Filename has spaces. Should it be renamed? [y/n]: " YN
if [ $YN == "y" ]; then
# replace spaces by underscores
TMPFILENAME=$(echo "$FILE" | sed "s/ /_/g")
mv "$FILE" "$TMPFILENAME"
FILE=$TMPFILENAME
else
echo "Aborting."
exit 1
fi
fi
# Check if command is executed in directory of file or not
if [ $(basename "$FILE") != "$FILE" ]; then
echo "Please execute command in the same directory than the file itself."
exit 1
fi
# Check if file exists
if [ ! -e "$FILE" ]; then
echo "File does not exist. Aborting."
exit 1
fi
# Check if it's a file or a directory
if [ ! -f "$FILE" ]; then
echo "File is not a file. This script doesn't work with directories."
exit 1
fi
FOLDER="dl-$FILE"
checkfolder $FOLDER
INFONAME=$INFO
INFO="$FOLDER/$INFONAME"
echo "[INFO] Calculating size and hashsum..."
BIGNAME=$(basename $FILE)
BIGSIZE=$(getdu $FILE)
BIGHASH=$(gethash $FILE)
echo "BIGNAME=$BIGNAME" >> $INFO
echo "BIGSIZE=$BIGSIZE" >> $INFO
echo "BIGHASH=$BIGHASH" >> $INFO
echo >> $INFO
# Variables info
echo "SRV_CHECKSUM=$CHECKSUM" >> $INFO
echo "SRV_SPLITSIZE=$SPLITSIZE" >> $INFO
echo "SRV_INFO=$INFO" >> $INFO
echo >> $INFO
echo "[INFO] Splitting big file into smaller parts..."
split --verbose -a 4 -b $SPLITSIZE $FILE $FOLDER/dl-
# List all splitted files, measure size and hashsum
echo "[INFO] Creating info document with necessary specs..."
SMALLNO=$(ls $FOLDER | grep -v $INFONAME | wc -l)
echo "SMALLNO=$SMALLNO" >> $INFO
NO=0
ls $FOLDER | grep -v $INFONAME | while read -r line; do
SMALLNAME=$line
SMALLSIZE=$(getdu $FOLDER/$SMALLNAME)
SMALLHASH=$(gethash $FOLDER/$SMALLNAME)
echo "SMALLNAME[$NO]=$SMALLNAME" >> $INFO
echo "SMALLSIZE[$NO]=$SMALLSIZE" >> $INFO
echo "SMALLHASH[$NO]=$SMALLHASH" >> $INFO
let NO=NO+1
done
echo "[SUCCESS] You can now download the splitted files with the client function of this program!"
echo "[INFO] Please move the folder \"$FOLDER\" to a web-accessible directory and use this folder as the first argument for this script."
fi # /SERVER MODE
# # # # # # # # # # # # # # # # # # # # # # # # #
# CLIENT MODE
# # # #
if [ $MODE == "client" ]; then
URL=$FILE
# Check if remote directory is valid and has an info.cfg file
if [ $(wget -q --spider $URL/$INFO ; echo $?) != "0" ]; then
echo "Remote directory does not exists or has invalid info.cfg file. Aborting."
exit 1
fi
if [ -e $INFO ]; then rm $INFO; fi
wget -q $URL/$INFO
# Rename info.cfg to avoid colissions and source it
source $INFO
mv $INFO dl-$BIGNAME-$INFO
INFO=dl-$BIGNAME-$INFO
FOLDER="dl-$BIGNAME"
# Print basic status
echo
echo "Total filesize: $BIGSIZE"
echo "HASH: $BIGHASH"
echo "Number of splitted files: $SMALLNO"
echo "Download folder: "$(readlink -f $FOLDER)""
echo
checkwait
echo
checkfolder $FOLDER # Checks and creates folder
# Downloading every single small file
until [ "$STATUS" == "F" ]; do
for ((i = 0; i < ${#SMALLNAME[*]}; i++)); do
SMALLNAME=${SMALLNAME[$i]}
SMALLSIZE=${SMALLSIZE[$i]}
SMALLHASH=${SMALLHASH[$i]}
SMALLURL=$URL/$SMALLNAME
SMALLPATH=$FOLDER/$SMALLNAME
let NO=i+1
echo
echo "------------------"
echo "Starting file: $SMALLNAME ($SMALLSIZE bytes)"
echo "This is file $NO of total $SMALLNO files."
echo "Already downloaded "$(getdu $FOLDER)" of $BIGSIZE bytes."
echo
# Define starttime
TSTART=$(date +"%s")
# File doesn't exist yet, so start fresh download
if [ ! -e $SMALLPATH ]; then
echo "[INFO] File doesn't exist yet. Starting new download."
wget -O $SMALLPATH $WGETOPT $SMALLURL
if $(compall $SMALLPATH $SMALLSIZE $SMALLHASH); then
STATUS="C"
else
# Downloaded file is not valid. Restart for-loop
echo "[ERROR] Downloaded file is corrupt. Restarting."
STATUS="E"
break
fi
# File already exists but not finished yet
elif [ -e $SMALLPATH ] && ! $(compdu $SMALLPATH $SMALLSIZE); then
echo "[INFO] Continuing download."
wget -c -O $SMALLPATH $WGETOPT $SMALLURL
if $(compall $SMALLPATH $SMALLSIZE $SMALLHASH); then
STATUS="C"
else
# Downloaded file is not valid. Restart for-loop
echo "[ERROR] Downloaded file is corrupt. Restarting."
STATUS="E"
break
fi
# File already exists, has correct size, but has wrong hashsum
elif [ -e $SMALLPATH ] && $(compdu $SMALLPATH $SMALLSIZE) && ! $(comphash $SMALLPATH $SMALLHASH); then
echo "[ERROR] Hashsum is different but file has same size."
echo "[INFO] Deleting file and starting new download."
# checkwait
rm $SMALLPATH
wget -O $SMALLPATH $WGETOPT $SMALLURL
if $(compall $SMALLPATH $SMALLSIZE $SMALLHASH); then
STATUS="C"
else
# Downloaded file is not valid. Restart for-loop
echo "[ERROR] Downloaded file is corrupt. Restarting."
STATUS="E"
break
fi
# File already exists, has correct size, and has correct hashsum
elif [ -e $SMALLPATH ] && $(compdu $SMALLPATH $SMALLSIZE) && $(comphash $SMALLPATH $SMALLHASH); then
echo "[SUCCESS] File already exists and is valid."
STATUS="C"
# This shouldn't happen...
else
echo "Dafuq?!"
STATUS="E"
fi
# Define time when the download finished
TEND=$(date +"%s")
TDIFF=$[$TEND-$TSTART]
if [ $TDIFF -ge "3" ]; then
# Calc bytes per second
BPS=$[$SMALLSIZE / $TDIFF]
DUDIFF=$[$BIGSIZE - $(getdu $FOLDER)]
TREST=$[$DUDIFF / $BPS]
echo
echo "[INFO] The last file was downloaded at ~$[$BPS / 1024] KB/s. ETA $[$TREST / 60]:$[$TREST % 60] minutes."
fi
# Check if all small downloads are finished
if [ $NO -lt $SMALLNO ]; then
echo "[INFO] Starting next download."
STATUS="C"
elif [ $NO -ge $SMALLNO ] && [ "$STATUS" == "C" ]; then
echo "[SUCCESS] Downloading finished."
STATUS="F"
else
echo "Dafuq^2"
fi
done # /for smallnames
done
read -p "Download seems to be finished. Should the splitted files be rebuilt to the original big file again? [y/N]: " YN
if [ $YN == "y" ]; then
# Destination file already exists
if [ -e $BIGNAME ]; then
read -p "Destination file already exists. Should it be overwritten? [y/N]: " YN
if [ $YN == "y" ]; then
cat $FOLDER/dl-* > $BIGNAME
else
echo "[INFO] Skipping rebuilding."
fi
# Destination file doesn't exist yet
else
cat $FOLDER/dl-* > $BIGNAME
fi
fi
# Check big file for hashsum and size
sleep 2 # In rare cases, this can prevent a wrong du size
echo "[INFO] Checking correct size and hashsum for rebuilt big file..."
# Compare sizes
if $(compdu $BIGNAME $BIGSIZE); then
echo "[SUCCESS] The size of the completed file is corrent."
else
echo "[ERROR] The size of the completed file is incorrect."
fi
# Compare hashsum
if $(comphash $BIGNAME $BIGHASH); then
echo "[SUCCESS] The checksum of the completed file is corrent."
else
echo "[ERROR] The sum of the completed file is incorrect."
fi
# Clean directories
read -p "Clean the download directory and info.cfg file? [y/N]: " YN
if [ $YN == "y" ]; then
rm $INFO
rm -r $FOLDER
fi
fi # /CLIENT MODE