Google Book downloader gbd

•October 21, 2008 • 1 Comment

You can find many useful books at book.google.com. It is a very appreciable project which provide huge amount of materials. Instead of browsing Internet every time when I need to read my favourite book. I would like to download the content and keep for future usage as a pdf file, so I did the following automation script. copy the script to /usr/bin/gbd and make it executable  with chmod +x /usr/bin/gbd. then create a derectory to download your book, cd to directory and run gbd once, then it will create a configuration file called gbd.conf in current directory. Now search your favourite book from book.google.com and turn the view of that page in to ‘Basic HTML  mode’ then click next buttons untill you find URLs which follows the format of BOOK=”"  URL given in script, note that there is ‘PA=[number]‘ name value pairs in url, copy that url and past in gbd.conf inside the doubl quotes  of BOOK=” “. Include the starting page number, End page number which you want to download. If you want to create the book also you should make CREATE_BOOK=true. Also edit FIREFOX_DIR or OPERA_DIR in script to comply with your directory structure. Now run gbd and see whether it downloading the pages.


#!/bin/bash
# Copyright (C) 2008 Nayanajit Mahendra Laxaman mail: nmlaxaman@gmail.com

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

BOOK=”http://books.google.com/books?id=m-5aANQCcp0C&pg=PA13&lpg=PA12&vq=5&dq=food+flavours&output=html”
STARTING_PAGE=0
END_PAGE=170
COOKEY_FROM=firefox
GET_MISSED=false
CREATE_BOOK=false

FIREFOX_DIR=~/.mozilla/firefox/tyfemkj0.default
OPERA_DIR=~/.opera

URL_F=
URL_B=

if [[ -f gbd.conf ]]
then
source gbd.conf
else
echo “gdb.conf file does not exists!”
echo “Creating a template gdb.conf please include the values”
echo “BOOK=\”[URL]\”
STARTING_PAGE=[starting page number]
END_PAGE=[last page number]
COOKEY_FROM=[firefox/opera]
CREATE_BOOK=[true/false]
GET_MISSED=[true/false]” > gbd.conf
exit 0
fi

if [[ `echo "$BOOK" | grep -o PA | wc -l` != 0 ]]
then
URL_F=`echo “$BOOK” | awk -F ‘PA’ ‘{print $1″PA”}’`
URL_B=`echo “$BOOK” | awk -F ‘PA’ ‘{print $2″PA”$3}’`
else
echo Invalid Book url !
exit 0
fi

PAGES=$END_PAGE
COOKEY=cookie.txt
#URL_F=”http://books.google.com/books?id=m-5aANQCcp0C&pg=PA”
#URL_B=”&lpg=PA12&vq=5&dq=food+flavours&output=html”
AGENT_OPERA=”Opera/9.63″
AGENT_FIREFOX=”Mozilla/5.0″
FOUND_COOKEY=

function cookey_merge(){
echo Cookey merging…
curl –user-agent “$AGENT” –cookie-jar $COOKEY “$URL_F$i$URL_B” 2> /dev/null | hindent -s > /tmp/log.html
CURL_FRONT=$( cat $COOKEY | grep google | awk -F ‘PREF’ ‘{print $1}’ )
echo “$CURL_FRONT PREF $FOUND_COOKEY” >> $COOKEY
echo Cookey merged!
}

function opera_cookey_merge(){
echo Trying to get cookey form opera…
OPERA_COOKEY=$( strings $OPERA_DIR/cookies4.dat | grep -A2 google.com | tail -1 | awk -F ‘ID=’ ‘{print $2}’ )
if [[ $OPERA_COOKEY == '' ]]
then
echo No cookeys form opera!
exit 0
fi
echo opera cookey: $OPERA_COOKEY
FOUND_COOKEY=$OPERA_COOKEY
curl –user-agent “$AGENT” –cookie-jar $COOKEY “$URL_F$i$URL_B” 2> /dev/null | hindent -s > /tmp/log.html
CURL_FRONT=$( cat $COOKEY | grep google | awk -F ‘PREF’ ‘{print $1}’ )
echo “$CURL_FRONT PREF $FOUND_COOKEY” >> $COOKEY
echo Cookey merged!
#cookey_merge
}

function moz_cookey_merge(){
echo Trying to get cookey form firefox…
MOZ_COOKEY=$( strings $FIREFOX_DIR/cookies.sqlite | grep google.com | grep PREF | tail -1 | awk -F ‘PREF’ ‘{print $2}’ | awk -F ‘.google’ ‘{print $1}’ )
if [[ $MOZ_COOKEY == '' ]]
then
echo No cookeys form firefox!
exit 0
fi
echo firefox cookey: $MOZ_COOKEY
FOUND_COOKEY=$MOZ_COOKEY
curl –user-agent “$AGENT” –cookie-jar $COOKEY “$URL_F$i$URL_B” 2> /dev/null | hindent -s > /tmp/log.html
CURL_FRONT=$( cat $COOKEY | grep google | awk -F ‘PREF’ ‘{print $1}’ )
echo “$CURL_FRONT PREF $FOUND_COOKEY” >> $COOKEY
echo Cookey merged!

#cookey_merge
}

function get_pages(){
while true;
IMAGE=$( curl –user-agent “$AGENT” –cookie $COOKEY “$URL_F$i$URL_B” 2>/dev/null | hindent -s | grep background | awk -F ‘”‘ ‘{print $2}’ )

if [[ $IMAGE == '' ]]
then
echo Restricted Page:$i !
else
echo Retrieving page:$i…
curl –user-agent “$AGENT” –cookie $COOKEY “$IMAGE” 2>/dev/null > $i.png
fi

do i=`expr $i + 1`;

if [[ $i == `expr $PAGES + 1` ]];
then break;
fi
done
}

function get_missed_pages(){
pages=$(
p=$STARTING_PAGE
while true
do
if [[ ! -f $p.png ]];
then
echo $p
fi
p=`expr $p + 1`

if [[ $p == $PAGES ]]
then
break
fi
done
)
echo Misse pages: $pages
for i in $pages
do
IMAGE=$( curl –user-agent “$AGENT” –cookie $COOKEY “$URL_F$i$URL_B” 2> /dev/null | hindent -s | grep background | awk -F ‘”‘ ‘{print $2}’ )

if [[ $IMAGE == '' ]]
then
echo Restricted Page:$i !
else
echo Retrieving page:$i…
curl –user-agent “$AGENT” –cookie $COOKEY “$IMAGE” > $i.png 2> /dev/null
fi
done

}

if [[ $COOKEY_FROM == "firefox" ]]
then
AGENT=$AGENT_FIREFOX
moz_cookey_merge
else
AGENT=$AGENT_OPERA
opera_cookey_merge
fi

if $GET_MISSED
then
get_missed_pages
else
get_pages
fi

if $CREATE_BOOK
then
echo creating book book.pdf
convert -sharpen 10 -coalesce $( ls *.png | sort -t ‘.’ -n ) book.pdf
fi

 
Follow

Get every new post delivered to your Inbox.