Browse Source

heavy overhaul: Better communication, option to only clean one file, adding backups and directory independency

master
mxmehl 7 years ago
parent
commit
1947de6992
  1. 56
      pdf-clearmeta.sh

56
pdf-clearmeta.sh

@ -5,33 +5,55 @@
# This happens by the usage of pdftk, a powerful command line tool on GNU/Linux
# change to directory where the script resides if tool is called by cron
cd "$(dirname "$(readlink -f "$0")")"
ls *.pdf > /dev/null 2>&1
if [ "$?" = "0" ]; then
mkdir ./temp # create temporary directory
cp ./*.pdf ./temp
if [ "$#" == "0" ]; then
echo "Searching for PDF files in this directory..."
ls *.pdf > /dev/null 2>&1
if [ "$?" != "0" ]; then
echo "No files found. Maybe change directory or give one file as parameter."
exit 1
fi
echo "Cleaning all files in the current directory..."
read -p "Should I proceed? Press Enter to continue." END
TMPDIR=$(mktemp -d) # create temporary directory
cp ./*.pdf $TMPDIR
elif [ "$#" == "1" ]; then
if [ -e "$1" -a -f "$1" ]; then
echo "Cleaning $1..."
TMPDIR=$(mktemp -d) # create temporary directory
cp "$1" $TMPDIR
else
echo "$1 does not exist or is no file."
exit 1
fi
else
echo "No files found"
echo "Too many parameters. Please give no file (cleans all files in current directory) or one file as parameter."
exit 1
fi
# Replace spaces with underscores
for FILE in ./temp/*.pdf; do
rename "s/ /_/g" "$DATEI" > /dev/null 2>&1;
done
for i in $( find ./temp -type f -name "*.pdf" ); # filename with path in $i
for FILE in $TMPDIR/*.pdf; # filename with path in $i
do
FILENAME=`basename $i` # filename without path
FILENAME=$(basename "$FILE") # filename without path, but possible spaces
TMPFILENAME=$(echo $FILENAME | sed "s/ /_/g") # replace spaces by underscores
TMPFILE="$TMPDIR/$TMPFILENAME"
EXTLESS=$(echo $FILENAME | sed "s/\.pdf$//")
BAKFILENAME="$EXTLESS.pdf.bak"
cp "$FILE" "$BAKFILENAME" # Generating backup
mv "$FILE" "$TMPFILE" # rename it as long as we work with it
# Dump data in temporary file
pdftk $i dump_data output $i.meta
pdftk "$TMPFILE" dump_data output "$TMPFILE".meta
sed 's/InfoValue:.*/InfoValue:/g' $i.meta > $i.meta.clean
sed 's/InfoValue:.*/InfoValue:/g' "$TMPFILE".meta > "$TMPFILE".meta.clean
pdftk $i update_info $i.meta.clean output $FILENAME # Update Metadata from cleaned metafile and write to original file
pdftk "$TMPFILE" update_info "$TMPFILE".meta.clean output "$FILENAME" # Update Metadata from cleaned metafile and write to original file
echo "Cleaned $FILENAME from metadata. Saved backup to $BAKFILENAME."
done
rm -r ./temp
rm -r $TMPDIR
Loading…
Cancel
Save