plog-browse

#!/bin/sh

plogurl="http://www.example.com/plogger"
plogdir="/var/www/example.com/htdocs/plogger"
ploglog=~/plog-browse.log

delay=0
tempdir=/tmp

if [ ! -d $plogdir/images ]
then
	echo Error: Could not find directory $plogdir/images
	echo Aborted.
	exit 1
fi

oldpwd=$PWD

# create temporary file
tempfile=`mktemp`

# find all files in the images directory
cd $plogdir/images
find . -type f |
# filter out .htaccess file(s)
grep -v  htaccess |
# remove leading dots ./dir/file -> /dir/file
sed "s/^\.//" |
# prepend url and save result to the temporary file
awk -v plogurl="$plogurl" '{ print plogurl $0 }' > $tempfile

# give a summary
total=`cat $tempfile | wc -l`
echo Found $total pictures.

# create temporary directory and cd into it
if [ ! -d $tempdir/plog-browse ]
then
	mkdir $tempdir/plog-browse
fi
cd $tempdir/plog-browse

num=1

todo=`sort $tempfile $ploglog $ploglog | uniq -u  | wc -l`

# get all urls that are in tempfile but not in the ploglog
sort $tempfile $ploglog $ploglog | uniq -u |
# download urls
while read url
do
	echo -n Downloading $num of $todo...
	wget -q $url
	echo " Ok"
	echo $url >> $ploglog
	sleep $delay
	let "num=$num+1"
done

# remove all temporary files
rm -rf $tempdir/plog-browse $tempfile

echo Done!

cd $oldpwd

exit 0

Leave a Reply