Removes all metadata from one or more PDF files
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
1.8 KiB

#!/bin/bash
# A very simple script by Max Mehl: mail@mehl.mx
# Removes all metadata from a PDF file located in the same dir like the script
# This happens by the usage of pdftk, a powerful command line tool on GNU/Linux
# change to directory where the script resides if tool is called by cron
if [ "$#" == "0" ]; then
echo "Searching for PDF files in this directory..."
ls *.pdf > /dev/null 2>&1
if [ "$?" != "0" ]; then
echo "No files found. Maybe change directory or give one file as parameter."
exit 1
fi
echo "Cleaning all files in the current directory..."
read -p "Should I proceed? Press Enter to continue." END
TMPDIR=$(mktemp -d) # create temporary directory
cp ./*.pdf $TMPDIR
elif [ "$#" == "1" ]; then
if [ -e "$1" -a -f "$1" ]; then
echo "Cleaning $1..."
TMPDIR=$(mktemp -d) # create temporary directory
cp "$1" $TMPDIR
else
echo "$1 does not exist or is no file."
exit 1
fi
else
echo "Too many parameters. Please give no file (cleans all files in current directory) or one file as parameter."
exit 1
fi
for FILE in $TMPDIR/*.pdf; # filename with path in $i
do
FILENAME=$(basename "$FILE") # filename without path, but possible spaces
TMPFILENAME=$(echo $FILENAME | sed "s/ /_/g") # replace spaces by underscores
TMPFILE="$TMPDIR/$TMPFILENAME"
EXTLESS=$(echo $FILENAME | sed "s/\.pdf$//")
BAKFILENAME="$EXTLESS.pdf.bak"
cp "$FILE" "$BAKFILENAME" # Generating backup
mv "$FILE" "$TMPFILE" # rename it as long as we work with it
# Dump data in temporary file
pdftk "$TMPFILE" dump_data output "$TMPFILE".meta
sed 's/InfoValue:.*/InfoValue:/g' "$TMPFILE".meta > "$TMPFILE".meta.clean
pdftk "$TMPFILE" update_info "$TMPFILE".meta.clean output "$FILENAME" # Update Metadata from cleaned metafile and write to original file
echo "Cleaned $FILENAME from metadata. Saved backup to $BAKFILENAME."
done
rm -r $TMPDIR