|
我的blog,
http://eagleisyou.spaces.live.com/default.aspx
三个bash脚本,现在总共可以下载到五千张图片,全是来自nationalgeographic网站。
使用方法:把三个脚本放在同一个目录下,运行pod.sh ;运行callphotos.sh ;不要直接运行photos.sh
解释:
1 pod.sh就是原来的ngpod.sh修改版,专门下载nationalgeographic.com/photography/photo-of-the-day这个栏目的图片,大约有2500张图片。
2 每张photo-of-the-day都有它的related pics;callphotos.sh和photos.sh就负责下载这些。
3 callphotos.sh是对为了加快速度,同时调用多个photos.sh脚本;photos.sh是下载ng网站其它栏目的图片,大约也有2500张图片。
4 现在可“断点续传”,当脚本运行到一半时你可以用ctrl+D中断,注意不要手动删除隐藏的临时文件,下次再运行脚本会自动接到上次中断的地方。
5 pod.sh可自动下载最新的图片,不再需要修改脚本来改变开始/结束时间。
6 运行时间,在我的网络状态,晚上睡觉前运行,早上起来脚本就成功结束了。
7 生成的介绍html有些还是有错误,still need to be done.- #!/bin/bash
- #####################################
- #pod.sh
- #update 2008/4/5,
- # to get last version,visit http://eagleisyou.spaces.live.com
- # the script will download photos of NGPOD
- #NGPOD: "National Geographic Photo Of the Day"
- #http://photography.nationalgeographic.com/photography/photo-of-the-day
- ######################################
- #the earliest ngpod is the April 21, 2001
- #declare BYEAR=1 BMONTH=4 BDAY=21
- declare BYEAR=1 BMONTH=4 BDAY=21
- #the script will auto use today as the end day
- declare EYEAR EMONTH EDAY
- #or you can manual set EYAER variables
- #declare EYEAR=8 EMONTH=3 EDAY=29
- ###############################################################
- #download year,month,day
- declare DYEAR DMONTH DDAY
- declare JPGNAME
- TMP1=.tmpngpod1
- TMP2=.tmpngpod2
- TMP3=.tmpngpod3
- TMP4=.tmpngpod4
- TMP5=.tmpngpod5
- TMPJPG=.tmpngpodjpg
- TMPINTRO=.tmpngpodintro
- EXLINK="http://photography.nationalgeographic.com"
- declare JUDGE
- ###############################################
- #function difinition area
- getendday()
- {
- if [ ! -z "$EYEAR" ];then
- return
- fi
- EYEAR=`date | awk '{print $6}' `
- EMONTH=`date | awk '{print $2}' `
- EDAY=`date | awk '{print $3}' `
- EYEAR=`expr $EYEAR - 2000 `
- if [ "$EMONTH" == "Jan" ];then
- EMONTH=1
- elif [ "$EMONTH" == "Feb" ];then
- EMONTH=2
- elif [ "$EMONTH" == "Mar" ];then
- EMONTH=3
- elif [ "$EMONTH" == "Apr" ];then
- EMONTH=4
- elif [ "$EMONTH" == "May" ];then
- EMONTH=5
- elif [ "$EMONTH" == "Jun" ];then
- EMONTH=6
- elif [ "$EMONTH" == "Jul" ];then
- EMONTH=7
- elif [ "$EMONTH" == "Aug" ];then
- EMONTH=8
- elif [ "$EMONTH" == "Sep" ];then
- EMONTH=9
- elif [ "$EMONTH" == "Oct" ];then
- EMONTH=10
- elif [ "$EMONTH" == "Nov" ];then
- EMONTH=11
- elif [ "$EMONTH" == "Dec" ];then
- EMONTH=12
- fi
- }
- getday()
- {
- if [ $BYEAR -gt $EYEAR ];then
- BYEAR=0
- return
- elif [ $BYEAR -eq $EYEAR ];then
- if [ $BMONTH -gt $EMONTH ];then
- BYEAR=0
- return
- elif [ $BMONTH -eq $EMONTH ];then
- if [ $BDAY -gt $EDAY ];then
- BYEAR=0
- return
- fi
- fi
- fi
- if [ $BMONTH -eq 2 ];then
- if [ $BDAY -eq 30 ];then
- BDAY=1;
- BMONTH=3;
- fi
- if [ $BDAY -eq 29 ];then
- if [ $BYEAR -ne 4 ];then #not 2004.2.29
- if [ $BYEAR -ne 8 ];then # not 2008.2.29
- if [ $BYEAR -ne 12 ];then #not 2008.2.29
- BDAY=1;
- BMONTH=3;
- fi
- fi
- fi
- fi
-
- fi
- if [ $BDAY -eq 32 ];then
- BMONTH=`expr $BMONTH + 1 ` ;
- BDAY=1;
- fi
- if [ $BDAY -eq 31 ];then
- case $BMONTH in
- 4)
- BMONTH=5;
- BDAY=1;
- ;;
- 6)
- BMONTH=7;
- BDAY=1;
- ;;
- 9)
- BMONTH=10;
- BDAY=1;
- ;;
- 11)
- BMONTH=12;
- BDAY=1;
- ;;
- esac
- fi
- if [ $BMONTH -eq 13 ];then
- BMONTH=1;
- BYEAR=`expr $BYEAR + 1 `
- fi
- if [ $BYEAR -lt 10 ];then
- DYEAR="0$BYEAR";
- else
- DYEAR=$BYEAR;
- fi
- if [ $BMONTH -lt 10 ];then
- DMONTH=0$BMONTH;
- else
- DMONTH=$BMONTH;
- fi
-
- if [ $BDAY -lt 10 ];then
- DDAY=0$BDAY;
- else
- DDAY=$BDAY;
- fi
-
- }
- call_iconv()
- {
- iconv -f iso8859-1 -t utf8 -o $1.utf8 $1
- rm -f $1
- mv $1.utf8 $1
- }
- getfile()
- {
- local TEST
- #download $1 htmllink,save as file $1
-
- wget -O "$1" "$2" 2>/dev/null
- #wget -O "$1" "$2"
-
- TEST=`echo "$2" |sed -n -e 's/\.jpg//p' `
- #not jpg,need to iconv
- if [ -z "$TEST" ];then
- call_iconv $1
- fi
- }
- check_jpg()
- {
- local TEST
- local SIZE
- if [ ! -f $1 ];then
- #file not exist,do not need to check
- return
- fi
- # if a jpeg filesize is less than 6KByte,
- # delete it
- SIZE=`wc -c $1 | awk '{print $1}'`
- if [ $SIZE -lt 6000 ];then
- rm -f $1
- return
- fi
- TEST=`file $1 `
- # if the jpg is not cracked,TEST will be zero
- TEST=`echo "$TEST" |sed -n -e 's/JPEG//p' `
- if [ -z "$TEST" ];then
- rm -f $1
- fi
- }
- s_alpha()
- {
- # read first alpha of jpgname
- #sed -n -e 's/.*\/\([a-zA-Z]\).*jpg/\1/p' $1
- sed -n -e 's/\(^.\).*jpg/\1/p' $1
- }
- s_relateddir()
- {
- # get relateddir
- sed -n -e 's/.*\/\([a-zA-Z]*\/[a-zA-Z]*\)\/.*html/\1/p' $1
- }
- s_relatedlink()
- {
- sed -n -e '/<h3>/{N;N;s/.*<a href="\(.*html\).*>\(.*\)<\/a>.*/\1 \2/p}' $1
- }
- s_walllink()
- {
- sed -n -e 's/.*class="wallpaper" href="\(.*html\)">.*/\1/p' $1
- }
- s_walllink2()
- {
- sed -n -e 's/.*class="enlarge" href="\(.*html\)">.*/\1/p' $1
- }
- sg_jpgname()
- {
- JPGNAME=`sed -n -e 's/.*<a href="\(.*jpg\)">.*1280 x 1024 pixels.*/\1/p' $1`
-
- if [ -z ${JPGNAME} ] ;then
- JPGNAME=`sed -n -e 's/.*<a href="\(.*jpg\)">.*1024 x 768 pixels.*/\1/p' $1`
- fi
- # only has 800x600 jpg
- if [ -z ${JPGNAME} ] ;then
- JPGNAME=`sed -n -e 's/.*src="\(.*sw\.jpg\)".*/\1/p' $1`
- fi
- if [ -z ${JPGNAME} ] ;then
- JPGNAME=`sed -n -e 's/.*src="\(.*800.*jpg\)".*/\1/p' $1`
- fi
- }
- s_title()
- {
- # sed -n -e 's/.*<td><h1>\(.*\)<\/h1><\/td>.*/\1/p' $1
- sed -n -e '/<div class="summary"/{N;N;N;s/.*<h3>\(.*\)<\/h3>.*/\1/p}' $1
- }
- s_date()
- {
- sed -n -e 's/.*<div class="date">\(.*\)<\/div>.*/\1/p' $1
- }
- s_who()
- {
- #sed -n -e 's/.*<div class="credit">\(.*\)<\/div>.*/\1/p' $1
- sed -n -e 's/.*<p class="credit">\(.*\)<\/p>.*/\1/p' $1
- }
- s_intro()
- {
- sed -n -e '/<div class="description"/{N;N;N;N;N;N;N;N;s/.*<div class="description">\(.*\)<\/div>.*/\1/p}' $1 \
- | sed -e 's/<a.*\/a>//p'
- }
- create_introhead()
- {
- local INTROFILE YEAR MONTH
- local -i DAY COUNT TMP
- if [ $# -ne 3 ];then
- return
- fi
-
- INTROFILE=$1
- if [ ! -f $INTROFILE ];then
- touch $INTROFILE;
- else
- return
- fi
- echo "<html><head>" >>$INTROFILE;
- echo "<meta http-equiv=Content-Type content="text/html; charset=utf8">" >> $INTROFILE;
- echo "<title>National Geographic Photo of the Day</title></head>" >>$INTROFILE
- case $3 in
- 01) MONTH=January; DAY=31 ;;
- 02) MONTH=February;
- if [ "$2" == "04" ];then
- DAY=29
- elif [ "$2" == "08" ];then
- DAY=29
- elif [ "$2" == "12" ];then
- DAY=29
- else
- DAY=28
- fi
- ;;
- 03) MONTH=March ;DAY=31 ;;
- 04) MONTH=April ;DAY=30 ;;
- 05) MONTH=May ;DAY=31 ;;
- 06) MONTH=June ;DAY=30 ;;
- 07) MONTH=July ;DAY=31 ;;
- 08) MONTH=August ;DAY=31 ;;
- 09) MONTH=September ;DAY=30 ;;
- 10) MONTH=October ;DAY=31 ;;
- 11) MONTH=November ;DAY=30 ;;
- 12) MONTH=December ;DAY=31 ;;
-
- ?) return ;;
- esac
-
- YEAR=20$2
- echo "<body bgcolor="gray">" >>$INTROFILE
- echo "<table border="0" width="90%">" >>$INTROFILE
- echo " <tr><td align="center">" >>$INTROFILE
- echo " <h1><a target="_blank" href="http://photography.nationalgeographic.com/photography/photo-of-the-day"> National Geographic Photo of the Day</a></h1>" >>$INTROFILE
- echo " </td></tr>" >>$INTROFILE
- echo " <tr><td align="center">" >>$INTROFILE
- echo " <h2>${YEAR} , ${MONTH}</h2>" >>$INTROFILE
- echo " </td></tr>" >>$INTROFILE
- echo "</table>" >>$INTROFILE
-
- echo "<table width="90%" border=1>" >>$INTROFILE
- COUNT=1
- TMP=0
- while [ $COUNT -le $DAY ]
- do
- TMP=`expr $COUNT % 5`;
- if [ $TMP -eq 1 ];then
- echo "<tr>" >>$INTROFILE
- fi
-
- if [ $COUNT -le 9 ];then
- echo "<td width="20%"><a href="#day0$COUNT">day 0$COUNT</a></td>" >>$INTROFILE
- else
- echo "<td width="20%"><a href="#day$COUNT">day $COUNT</a></td>" >>$INTROFILE
- fi
- TMP=`expr $COUNT % 5`
- if [ $TMP -eq 0 ];then
- echo "</tr>" >>$INTROFILE
- fi
- COUNT=`expr $COUNT + 1`
- done
- if [ $DAY -eq 28 ];then
- echo "<td width="20%"> </td>" >>$INTROFILE
- echo "<td width="20%"> </td>" >>$INTROFILE
- echo "</tr>" >>$INTROFILE
- elif [ $DAY -eq 29 ];then
- echo "<td width="20%"> </td>" >>$INTROFILE
- echo "</tr>" >>$INTROFILE
- elif [ $DAY -eq 31 ];then
- echo "<td width="20%"> </td>" >>$INTROFILE
- echo "<td width="20%"> </td>" >>$INTROFILE
- echo "<td width="20%"> </td>" >>$INTROFILE
- echo "<td width="20%"> </td>" >>$INTROFILE
- echo "</tr>" >>$INTROFILE
- fi
-
-
- echo "</table><br><br>" >>$INTROFILE
- return
- }
- create_introtail()
- { local INTROFILE YEAR MONTH DAY
- if [ $# -ne 4 ];then
- return
- fi
-
- INTROFILE=$1
- YEAR=$2
- MONTH=$3
- DAY=$4
- #if is a month end,create_tail
- if [ "$DAY" == "28" ] && [ "$MONTH" == "02" ] ;then
- if [ ! "$YEAR" == "04" ];then
- if [ ! "$YEAR" == "08" ];then
- if [ ! "$YEAR" == "12" ];then
- echo "</body></html>" >>$INTROFILE
- fi
- fi
- fi
- return
- fi
- if [ "$DAY" == "29" ] && [ "$MONTH" == "02" ] ;then
- echo "</body></html>" >>$INTROFILE
- return
- fi
-
- if [ "$DAY" == "30" ] ;then
- if [ "$MONTH" == "04" ] || [ "$MONTH" == "06" ] || [ "$MONTH" == "09" ] || [ "$MONTH" == "11" ] ;then
- echo "</body></html>" >>$INTROFILE
- fi
- return
- fi
-
- if [ "$DAY" == "31" ] ;then
- echo "</body></html>" >>$INTROFILE
- fi
- }
- update_intro()
- {
- local INTROFILE DAY JPGLOCAL HTMLINK
- local TITLE DATE WHO INTRO
- local JPGLOCAL
- INTROFILE=$1;
- DAY=$3
- JPGLOCAL=$4
- HTMLINK=$5
-
- TITLE=`s_title $2`
- #for some page,title may be empty
- TITLE=${TITLE:-"National Geographic Photo of the Day:"}
-
- DATE=`s_date $2`
- WHO=`s_who $2`
- INTRO=`s_intro $2`
- #echo "INTRO is +++ $INTRO"
-
- s_relatedlink $2 > $TMP4
- echo "<br><a name="day${DAY}"></a><br>" >> $INTROFILE
- echo "<table border="0" width="90%">" >>$INTROFILE
- echo " <table border="0" width="100%" bgcolor="\#666666"><tr>" >>$INTROFILE
- echo " <td width="45%" align="left" >${TITLE}</td>" >>$INTROFILE
- echo " <td align="center"><a href="${HTMLINK}">${DATE}</a></td>" >>$INTROFILE
- echo " </tr></table>" >>$INTROFILE
- echo " <table border="0" width="100%">" >>$INTROFILE
- echo " <tr><td align="right" width="50%">" >>$INTROFILE
- echo " <table border="0" width="100%">" >>$INTROFILE
- echo " <tr><td><br>${INTRO}<br></td></tr>" >>$INTROFILE
- echo " <tr><td>${WHO}<br><hr></td></tr>" >>$INTROFILE
- echo " </table></td>" >>$INTROFILE
- echo " <td align="center" width="500"><a target="_blank" href="../../${JPGLOCAL}"><img src="../../${JPGLOCAL}" width=480 align=right></a></td>" >>$INTROFILE
- echo " </tr>" >>$INTROFILE
- echo " </table>" >>$INTROFILE
-
-
- echo "</table>" >>$INTROFILE
-
- return
- }
- #download ngpod
- downloadngpod()
- {
- local YEAR MONTH DAY
- local DIR JPGLOCAL INTROFILE
- local HTMLINK1 HTMLLINK2
- local JPGTYPE JPGALPHA JPGBASENAME JPGREMOTE
- local DAY10 DAY1
-
- YEAR=$DYEAR
- MONTH=$DMONTH
- DAY=$DDAY
- DIR="photography/pod"
- if [ ! -d $DIR ];then
- mkdir -p $DIR;
- fi
- INTROFILE="${DIR}/pod20${YEAR}${MONTH}.htm"
-
- if [ ! -f ${INTROFILE} ];then
- create_introhead ${INTROFILE} ${YEAR} ${MONTH};
- else
- if [ ! -z `sed -n -e 's/<\/html>//p' ${INTROFILE} ` ];then
- #the intro file has end.skip to next month
- echo "file end,skipped 20${YEAR}/${MONTH} month "
- BMONTH=`expr $BMONTH + 1`
- BDAY=0
- return
- else
- # get last <a name="day${DAY10}${DAY1}"><\/a>
- DAY10=`sed -n '/<a name/h;${g;s/.*"day\([0-9]\)[0-9]".*/\1/p}' ${INTROFILE}`
- DAY1=`sed -n '/<a name/h;${g;s/.*"day[0-9]\([0-9]\)".*/\1/p}' ${INTROFILE}`
- if [ ! -z "$DAY10" ];then
- DAY10=`expr $DAY10 \* 10 `
- DAY10=`expr $DAY10 + $DAY1 `
- #the day has created before
- if [ ! $DAY10 -lt $BDAY ];then
- BDAY=$DAY10
- echo "exist,last is 20${YEAR}/${MONTH}/${BDAY} ngpod "
- return
- fi
- fi
- fi
- fi
- HTMLINK1="${EXLINK}/cgi-bin/pod/PhotoOfTheDay.cgi?month=${MONTH}&day=${DAY}&year=${YEAR}"
- getfile ${TMP1} ${HTMLINK1}
-
- if [ ! -f ${TMP1} ];then
- echo "get htm fail get 20${YEAR}/${MONTH}/${DAY} ngpod "
- return
- elif [ ! -s ${TMP1} ];then
- echo "zero file;fail get 20${YEAR}/${MONTH}/${DAY} ngpod"
- return
- fi
- # get wallpaper html description
- HTMLLINK2=`s_walllink ${TMP1}`
- # save as tmpfile 2
- getfile ${TMP2} ${EXLINK}${HTMLLINK2}
- #get $JPGNAME
- sg_jpgname ${TMP2}
- #some do not has wallpaper,for example
- #http://photography.nationalgeographic.com/cgi-bin/pod/PhotoOfTheDay.cgi?month=07&day=11&year=01
- if [ -z $JPGNAME ];then
- echo " find 800x600 instead"
- #wallpaper link could not get jpg
- rm -f ${TMP2}
- # get jpgname from enlarge
- # get wallpaper html description
- HTMLLINK2=`s_walllink2 ${TMP1}`
- # save as tmpfile 2
- getfile ${TMP2} ${EXLINK}${HTMLLINK2}
- #get $JPGNAME
- sg_jpgname ${TMP2}
- fi
- # some ngpod is losing
- if [ -z $JPGNAME ];then
- echo "could not find jpg; fail get 20${YEAR}/${MONTH}/${DAY}"
- return
- fi
- JPGBASENAME=`basename ${JPGNAME}`
- JPGALPHA=`echo ${JPGBASENAME} | s_alpha `
- DIR="image/${JPGALPHA}"
- if [ ! -d $DIR ];then
- mkdir -p $DIR;
- fi
- JPGLOCAL="${DIR}/${JPGBASENAME}"
-
- JPGREMOTE="${EXLINK}${JPGNAME}"
-
- check_jpg $JPGLOCAL
- if [ ! -f ${JPGLOCAL} ];then
- rm -f ${TMPJPG}
- getfile ${TMPJPG} ${JPGREMOTE}
- check_jpg ${TMPJPG}
- if [ -f ${TMPJPG} ];then
- mv -f ${TMPJPG} ${JPGLOCAL}
- fi
- fi
-
- #echo "updateintro get $YEAR $MONTH $DAY "
- rm -f $TMPINTRO
- update_intro ${TMPINTRO} ${TMP1} ${DAY} ${JPGLOCAL} ${HTMLINK1}
- create_introtail ${TMPINTRO} $YEAR $MONTH $DAY
-
- cat $TMPINTRO >> $INTROFILE
- echo "OK get 20${YEAR}/${MONTH}/${DAY} ngpod"
-
- return
- }
- ########################################
- # all begin
- # if $EYEAR not set,auto set variables
- getendday
- echo "This shell need wget,sed,iconv in your \$PATH variable"
- echo "ngpod is national geographic photo of the day"
- echo "This shell will download ngpod pictures and related "
- echo " from ${BYEAR}/${BMONTH}/${BDAY} to ${EYEAR}/${EMONTH}/${EDAY}"
- echo "To change begin/end day, edit this shell"
- read -p "Now begin download(Y/N):" JUDGE
- if [ ! $JUDGE = "y" ];then
- if [ ! $JUDGE = "Y" ];then
- echo "This shell will exit now"
- exit
- fi
- fi
- echo -n "run begin at:"
- date
- ##########################################
- # function run from here
- getday
- while [ 0 -lt $BYEAR ]
- do
- downloadngpod $DYEAR $DMONTH $DDAY;
- # to next day
- BDAY=`expr $BDAY + 1`;
- getday
- done
- echo -n "end at:"
- date
- rm -f ${TMP1}
- rm -f ${TMP2}
- rm -f ${TMP3}
- rm -f ${TMP4}
- rm -f ${TMP5}
- rm -f ${TMPJPG}
- rm -f ${TMPINTRO}
- # end of pod.sh
- ####################
复制代码- #!/bin/bash
- #callphotos.sh
- #update 2008/4/5,
- # to get last version,visit http://eagleisyou.spaces.live.com
- declare TOPTOTAL TOPLINK MAXPROID
- declare LOGFILE=.calllog
- #you can call photos.sh `expr $MAXPROID + 1` processes at a time
- MAXPROID=9
- #########################
- # function definition area begin
- init_data()
- {
- #initial links
- TOPLINK[0]="http://www3.nationalgeographic.com/places/directory.html"
- TOPLINK[1]="http://science.nationalgeographic.com/science/atoz.html"
- TOPLINK[2]="http://animals.nationalgeographic.com/animals/a-to-z"
- TOPLINK[3]="http://photography.nationalgeographic.com/photography/photographers"
- TOPLINK[4]="http://photography.nationalgeographic.com/photography/photogalleries/adventure-exploration"
- TOPLINK[5]="http://photography.nationalgeographic.com/photography/photogalleries/animals"
- TOPLINK[6]="http://photography.nationalgeographic.com/photography/photogalleries/nature-weather"
- TOPLINK[7]="http://photography.nationalgeographic.com/photography/photogalleries/people-culture"
- TOPLINK[8]="http://photography.nationalgeographic.com/photography/photogalleries/science-space"
- TOPLINK[9]="http://photography.nationalgeographic.com/photography/photogalleries/travel"
- TOPLINK[10]="http://photography.nationalgeographic.com/photography/photogalleries/underwater"
- TOPLINK[11]="http://photography.nationalgeographic.com/photography/article/patterns-nature-galleries.html"
- TOPLINK[12]="http://photography.nationalgeographic.com/photography/wallpapers/people-culture"
- TOPLINK[13]="http://photography.nationalgeographic.com/photography/wallpapers/travel"
- TOPLINK[14]="http://photography.nationalgeographic.com/photography/wallpapers/underwater"
- TOPLINK[15]="http://photography.nationalgeographic.com/photography/article/patterns-nature-wallpapers.html"
- TOPLINK[16]="http://photography.nationalgeographic.com/photography/wallpapers/science-space"
- TOPLINK[17]="http://photography.nationalgeographic.com/photography/wallpapers/nature-weather"
- TOPLINK[18]="http://photography.nationalgeographic.com/photography/wallpapers/animals"
- TOPLINK[19]="http://photography.nationalgeographic.com/photography/wallpapers/adventure-exploration"
-
- TOPTOTAL=${#TOPLINK[@]}
- }
- send_last()
- {
- local LAST LASTNAME PROCOUNT COUNT
- local LASTNUM
- LASTNUM=`ls .process* 2>/dev/null`
- if [ -z "$LASTNUM" ];then
- return
- fi
- # to check if need to run from last interrupt
- # use RELLINK,RELNAME as temp viraible
- for LASTNAME in .process* ;do
- exec 3<&0 0<$LASTNAME
- # keep last run status,$COUNT to keep useless variable
- read PROCOUNT COUNT
- exec 0<&3
-
- if [ -z "$PROCOUNT" ];then
- rm -f $LASTNAME
- continue
- fi
- # last run process number
- COUNT=`echo $LASTNAME | sed -n -e 's/\.process\([0-9]*\)/\1/p' `
-
- if [ $MAXPROID -lt $COUNT ];then
- MAXPROID=$COUNT
- fi
- echo "re-run last interrput link, use process $COUNT "
- ./photos.sh ".process$COUNT" "$PROCOUNT" "${TOPLINK[$PROCOUNT]}" $LOGFILE &
-
- # remove ${TOPLINK[$PROCOUNT]}
- TOPLINK[$PROCOUNT]=""
-
- done
- # remenber last-run finish links
- if [ -f $LOGFILE ];then
- exec 3<&0 0<$LOGFILE
- # keep last run status,$COUNT to keep useless variable
- read PROCOUNT
- while [ ! -z "$PROCOUNT" ] ;do
- #no need to do these link again
- if [ $PROCOUNT -lt $TOPTOTAL ];then
- TOPLINK[$PROCOUNT]=""
- fi
- read PROCOUNT
- done
- exec 0<&3
- fi
- }
- send_parallel()
- {
- local COUNT=0
- local PROCOUNT=0
- local LAST LASTNAME
- local LASTNUM=0
- local SLEEP=0
-
- send_last
- PROCOUNT=0
- COUNT=0
- while [ true ];do
- if [ ! -f .process$COUNT ];then
- if [ -z "${TOPLINK[$PROCOUNT]}" ];then
- PROCOUNT=`expr $PROCOUNT + 1`
- if [ $PROCOUNT -eq $TOPTOTAL ];then
- # send process end
- return
- fi
- continue
- fi
- echo "$PROCOUNT link, use process $COUNT "
- ./photos.sh ".process$COUNT" "$PROCOUNT" "${TOPLINK[$PROCOUNT]}" $LOGFILE &
- PROCOUNT=`expr $PROCOUNT + 1`
- fi
-
- if [ $PROCOUNT -eq $TOPTOTAL ];then
- # send process end
- return
- fi
- if [ $COUNT -eq $MAXPROID ];then
- COUNT=0
-
- if [ "$SLEEP" -eq 0 ];then
- echo "no more process free,sleep:"
- sleep 60
- else
- echo -n "$SLEEP:"
- sleep 60
- fi
- SLEEP=`expr $SLEEP + 1`
- if [ 20 -eq $SLEEP ];then
- echo "$SLEEP:"
- SLEEP=0
- fi
- continue
- fi
- COUNT=`expr $COUNT + 1`
- done
- }
- wait_parallel()
- {
- local COUNT=0
- local SLEEP=0
- while [ $COUNT -lt $MAXPROID ];do
- if [ -f .process$COUNT ];then
- if [ "$SLEEP" -eq 0 ];then
- echo -n "process $COUNT still run,sleep:"
- sleep 60
- else
- echo -n "$SLEEP:"
- sleep 60
- fi
- SLEEP=`expr $SLEEP + 1`
- if [ 20 -eq $SLEEP ];then
- echo "$SLEEP:"
- SLEEP=0
- fi
- continue;
- fi
- COUNT=`expr $COUNT + 1`
- done
- }
- if [ ! -x ./photos.sh ];then
- exit
- fi
- # the script run begin
- init_data
- # show begin time
- echo -n "Begin at:"
- date
- # exist means last interrupt,no keep to create_links
- send_parallel
- sleep 10
- #wait the script to end
- wait_parallel
- #all finish,no need to keep this logfile
- rm -f $LOGFILE
- #show end time
- echo -n "end at:"
- date
- # end of callphotos.sh
- ####################
复制代码- #!/bin/bash
- #photos.sh
- #update 2008/4/5,
- # to get last version,visit http://eagleisyou.spaces.live.com
- TMP1=.tmp1pro
- TMP2=.tmp2pro
- TMP3=.tmp3pro
- TMP4=.tmp4pro
- TMP5=.tmp5pro
- TMP6=.tmp6pro
- TMPJPG=.tmp7pro
- declare INTRO1 INTRO2 FLINK1 JPGNAME bLINK DIRLEVEL
- declare PROCESSNUM INTIMFILE
- declare CURRENTLINK
- ###############################################
- #function difinition area
- s_toplink()
- {
- #sed -n -e 's/.*<a href="\(.*html\).*>\(.*\)<\/a>.*/\1 \2/p' $1
- sed -n -e 's/.*<a href="\(.*html\)">\([^<]*\)<\/a>.*/\1 \2/p' $1
- }
- s_linkpre()
- {
- sed -n -e 's/\(http.*com\)\/.*/\1/p' $1
- }
- get_dirlevel()
- {
- # get the number of '/' in dirname
- DIRLEVEL=`echo -n "$1" | sed 's/[^/]*[^/]//g' | wc -c`
- DIRLEVEL=`expr $DIRLEVEL + 1`
- }
- getfile()
- {
- local TEST
- #download $1 htmllink,save as file $1
- wget -T 30 -O "$1" "$2" 2>/dev/null
- #wget -O "$1" "$2"
-
- TEST=`echo "$2" |sed -n -e 's/\.jpg//p' `
- #not jpg,need to iconv
- if [ -z "$TEST" ];then
- iconv -f "iso8859-1" -t "utf8" -o "$1.utf8" "$1"
- rm -f $1
- mv "$1.utf8" "$1"
- fi
- }
- #related link for the related
- sr_next()
- {
- sed -n -e 's/.*<a href="\(.*html\)">NEXT<\/a>.*/\1/p' $1
- }
- srel_picfile()
- {
- FLINK1=`sed -n -e 's/.*<a href="\(.*\/wallpaper\/.*html\)">.*/\1/p' $1`
- #some just has enlarge
- if [ -z "$FLINK1" ];then
- #echo "zero picfile"
- FLINK1=`sed -n -e 's/.*<a href="\(.*\/enlarge\/.*html\)">.*/\1/p' $1`
- fi
- if [ -z "$FLINK1" ];then
- FLINK1=`sed -n -e 's/.*<a href="\(.*html\)"><img src=".*enlarge_icon.*/\1/p' $1`
- #echo "icon $FLINK1"
- fi
- }
- srel_who()
- {
- INTRO2=`sed -n -e 's/<div class="photocredit">\(.*\)<\/div>/\1/p' $1`
- if [ -z "$INTRO2" ];then
- INTRO2=`sed -n -e 's/<div class="credit">\(.*\)<\/div>/\1/p' $1`
- fi
- }
- srel_intro()
- {
- INTRO1=`sed -n -e '/<div class="caption"/{N;N;N;N;N;N;N;N;N;N;N;N; s/.*<div class="caption"[^<]*>\(.*\)<\/div>.*<div.*/\1/p}' $1`
- if [ -z "$INTRO1" ];then
- INTRO1=`sed -n -e '/<div class="photogallerytext"/{N;N;N;N;N;N;N;N;N;N;N;N; s/.*<div class="photogallerytext"[^<]*>\(.*\)<\/div>.*<div.*/\1/p}' $1`
- fi
- if [ -z "$INTRO1" ];then
- INTRO1=`sed -n -e '/<div class="photocaption"/{N;N;N;N;N;N;N;N;N;N;N;N; s/.*<div class="photocaption"[^<]*>\(.*\)<\/div>.*<div.*/\1/p}' $1`
- fi
- if [ -z "$INTRO1" ];then
- INTRO1=` sed -n -e 's/.*<div class="wmDarkGreyBodyText"[^<]*>\(.*\)<\/div>/\1/p' $1`
- fi
- }
- srel_jpgname()
- {
- JPGNAME=`sed -n -e 's/.*<a href="\(.*jpg\)">.*1280 x 1024 pixels.*/\1/p' $1`
-
- if [ -z "${JPGNAME}" ] ;then
- JPGNAME=`sed -n -e 's/.*<a href="\(.*jpg\)">.*1024 x 768 pixels.*/\1/p' $1`
- fi
- # only has 800x600 jpg
- if [ -z "${JPGNAME}" ] ;then
- JPGNAME=`sed -n -e 's/.*src="\(.*sw\.jpg\)".*/\1/p' $1`
- fi
- if [ -z "${JPGNAME}" ] ;then
- JPGNAME=`sed -n -e 's/.*src="\(.*800.*jpg\)".*/\1/p' $1`
- fi
- if [ -z "${JPGNAME}" ] ;then
- JPGNAME=`sed -n -e 's/.*src="\(.*photo_lg.*jpg\)".*/\1/p' $1`
- fi
- if [ -z "${JPGNAME}" ] ;then
- JPGNAME=`sed -n -e 's/.*src="\(.*ga\.jpg\)".*/\1/p' $1`
- fi
- }
- s_alpha()
- {
- # read first alpha of jpgname
- #sed -n -e 's/.*\/\([a-zA-Z]\).*jpg/\1/p' $1
- sed -n -e 's/\(^.\).*jpg/\1/p' $1
- }
- s_relateddir1()
- {
- # get relateddir
- sed -n -e 's/.*com\/\(.*\)\/.*\.html/\1/p' $1
- }
- s_relateddir2()
- {
- # get relateddir
- sed -n -e 's/\/\(.*\)\/.*\.html/\1/p' $1
- }
- s_relatedlink()
- {
- sed -n -e '/<h3>/{N;N;N;N;N;N;s/.*<a href="\(.*html\).*>\(.*\)<\/a>.*/\1 \2/p}' $1
- }
- s_relatedlink2()
- {
- sed -n -e '/<div class="WhatsHotTitle/{s/.*<a href="\(.*html\).*>\(.*\)<\/a>.*/\1 \2/p}' $1
- }
- s_reltype()
- {
- sed -n -e 's/\([a-zA-Z]*\)\/.*/\1/p' $1
- }
- check_jpg()
- {
- local TEST
- local SIZE
- if [ ! -f $1 ];then
- #file not exist,do not need to check
- return
- fi
- # if a jpeg filesize is less than 6KByte,
- # delete it
- SIZE=`wc -c $1 | awk '{print $1}'`
- if [ $SIZE -lt 6000 ];then
- rm -f $1
- return
- fi
- TEST=`file $1 `
- # if the jpg is not cracked,TEST will be zero
- TEST=`echo "$TEST" |sed -n -e 's/JPEG//p' `
- if [ -z "$TEST" ];then
- rm -f $1
- fi
- }
- judge_type()
- {
- bLINK=0
- if [ -z $1 ];then
- return
- fi
- if [ "animals" == "$1" ];then
- bLINK=1
- elif [ "photography" == "$1" ];then
- bLINK=2
- elif [ "science" == "$1" ];then
- bLINK=3
- elif [ "places" == "$1" ];then
- bLINK=4
- elif [ "history" == "$1" ];then
- bLINK=5
- fi
- }
- create_head()
- {
- local FNAME TITLE LINK
- FNAME=$1
- TITLE=$2
- LINK=$3
- echo "<html><head>" >> ${FNAME}
- #echo "<meta http-equiv=Content-Type content="text/html; charset=iso8859-1">" >> ${FNAME}
- echo "<meta http-equiv=Content-Type content="text/html; charset=utf8">" >> ${FNAME}
- echo "<title>${TIILE}</title></head>" >> ${FNAME}
-
- echo "<body bgcolor="gray">" >>$FNAME
- echo "<table border="0" width="90%">" >>$FNAME
- echo "<tr><td align="center">" >>$FNAME
- echo "<h1><a target="_blank" href="${LINK}">${TITLE}</a></h1>" >>$FNAME
- echo "</td></tr>" >>$FNAME
- echo "</table>" >>$FNAME
- }
- create_single()
- {
- local FNAME LINKPRE NUM REALNAME
- local HTMLINK HTMNAME
- local JPGLOCAL JPGALPHA JPGBASENAME JPGREMOTE
- local COLOR
- local DIR
- FNAME="$1"
- LINKPRE="$2"
- NUM="$3"
- REALNAME="$4"
- HTMLINK="$5"
-
- HTMNAME=$6
-
- if [ -z "$7" ];then
- color=888888
- else
- color=FF0000
- fi
- #save to $INTRO1 $INTRO2
- srel_intro ${TMP1}
- srel_who ${TMP1}
- #now link is point to wallpaper
- srel_picfile ${TMP1}
- # $FLINK1 may be null; that means no wallpaper pic,only small pic
- if [ ! -z "${FLINK1}" ];then
- rm -f ${TMP1}
- getfile ${TMP1} ${LINKPRE}${FLINK1}
- if [ -z "$INTRO1" ];then
- #save to $INTRO1 $INTRO2
- srel_intro ${TMP1}
- srel_who ${TMP1}
- fi
-
- # try to get wallpaper
- # use FLINK1 to store temp result
- FLINK1=`echo "$FLINK1" | sed -n -e 's/wallpaper//p'`
- if [ -z "$FLINK1" ];then
- FLINK1=`sed -n -e 's/.*<a href="\(.*\/wallpaper\/.*html\)">.*/\1/p' ${TMP1}`
- if [ ! -z "${FLINK1}" ];then
- rm -f ${TMP1}
- getfile ${TMP1} ${LINKPRE}${FLINK1}
- fi
- fi
-
- fi
- # save to $JPGNAME
- srel_jpgname ${TMP1}
-
- # there make be no pic,only reference link
- # for examples,http://photography.nationalgeographic.com/photography/photographers/photography-timeline.html
- if [ -z "${JPGNAME}" ];then
- echo "<hr><br>" >> $FNAME
- if [ ! -z "${INTRO1}" ];then
- echo "<table border="0" width="100%">" >>$FNAME
- echo " <tr><td align="right" width="50%">" >>$FNAME
- echo " <tr><td>${INTRO1}<br></td></tr>" >>$FNAME
- echo " <tr><td>${INTRO2}</td></tr>" >>$FNAME
- echo " </tr>" >>$FNAME
- echo "</table>" >>$FNAME
- fi
-
- return
- fi
- if [ ! -z "${JPGNAME}" ];then
- JPGBASENAME=`basename "${JPGNAME}"`
- JPGALPHA=`echo ${JPGBASENAME} | s_alpha `
- DIR="image/${JPGALPHA}"
- if [ ! -d $DIR ];then
- mkdir -p $DIR;
- fi
- JPGLOCAL="${DIR}/${JPGBASENAME}"
-
- JPGREMOTE="${LINKPRE}${JPGNAME}"
- check_jpg $JPGLOCAL
- if [ ! -f ${JPGLOCAL} ];then
- rm -f ${TMPJPG}
- getfile ${TMPJPG} ${JPGREMOTE}
- check_jpg ${TMPJPG}
- if [ -f ${TMPJPG} ];then
- mv -f ${TMPJPG} ${JPGLOCAL}
- fi
- fi
-
- echo " <table border="0" width="100%" bgcolor="\#${COLOR}"><tr>" >>$FNAME
- echo " <td align="center"><a href="${HTMLINK}">${HTMNAME}</a></td>" >>$FNAME
- echo " </tr></table>" >>$FNAME
- echo "<table border="0" width="100%">" >>$FNAME
- echo " <tr><td align="right" width="50%">" >>$FNAME
- echo " <table border="0" width="100%">" >>$FNAME
- echo " <tr><td><br>${INTRO1}<br></td></tr>" >>$FNAME
- if [ ! -z "${INTRO2}" ];then
- echo " <tr><td><hr>${INTRO2}</td></tr>" >>$FNAME
- fi
- echo " </table></td>" >>$FNAME
- # dirlevel must be 2 or 3
- if [ 2 -eq $DIRLEVEL ];then
- echo " <td align="center" width="500"><a target="_blank" href="../../${JPGLOCAL}"><img src="../../${JPGLOCAL}" width=480 align=right></a></td>" >>$FNAME
- else
- echo " <td align="center" width="500"><a target="_blank" href="../../../${JPGLOCAL}"><img src="../../../${JPGLOCAL}" width=480 align=right></a></td>" >>$FNAME
- fi
- echo " </tr>" >>$FNAME
- echo "</table>" >>$FNAME
-
- fi # end of if [ ! -z ${JPGNAME} ]
- }
- get_relrel()
- {
- local FNAME NUM LPRE TYPE TEST
- local RELNAME RELLINK RELDIR RELLOCAL
- local ARRAYLINK ARRAYDIR ARRAYNAME TOTAL
- local COUNT
- FNAME=$1
- NUM=$2
- TOTAL=0
-
- s_relatedlink $TMP3 > $TMP5
- if [ ! -s $TMP5 ];then
- s_relatedlink2 $TMP3 >$TMP5
- fi
- echo "<br><hr>" >>$FNAME
- exec 4<&0 0< "$TMP5"
- while read RELLINK RELNAME ;do
- TEST=`echo $RELLINK | sed -n -e 's/index\.html//p'`
- if [ ! -z "$TEST" ];then
- continue
- fi
- TEST=`echo $RELLINK |sed -n -e 's/screensaver//p' `
- if [ ! -z "$TEST" ];then
- continue
- fi
- TEST=`echo $RELLINK |sed -n -e 's/photo-of-the-day//p' `
- if [ ! -z "$TEST" ];then
- continue
- fi
- RELDIR=`echo $RELLINK | s_relateddir1`
- if [ -z "$RELDIR" ];then
- RELDIR=`echo $RELLINK | s_relateddir2`
- fi
- TYPE=`echo ${RELDIR} | s_reltype `
- judge_type $TYPE
- if [ 0 -eq $bLINK ];then
- continue
- fi
-
- RELLOCAL=`basename $RELLINK`
-
- LPRE=`echo $RELLINK | s_linkpre `
- if [ -z "$LINKPRE" ];then
- if [ $bLINK == 1 ];then
- RELLINK="http://animals.nationalgeographic.com${RELLINK}"
- elif [ $bLINK == 2 ];then
- RELLINK="http://photography.nationalgeographic.com${RELLINK}"
- elif [ $bLINK == 3 ];then
- RELLINK="http://science.nationalgeographic.com${RELLINK}"
- else
- RELLINK="http://www3.nationalgeographic.com${RELLINK}"
- fi
- fi
- # dirlevel must be 2 or 3
- if [ 2 -eq $DIRLEVEL ];then
- echo "<br><a href="../../${RELDIR}/${RELLOCAL}">${RELNAME}</a>" >>$FNAME
- else
- echo "<br><a href="../../../${RELDIR}/${RELLOCAL}">${RELNAME}</a>" >>$FNAME
- fi
- ARRAYLINK[TOTAL]=${RELLINK}
- ARRAYDIR[TOTAL]=${RELDIR}
- ARRAYNAME[TOTAL]="${RELNAME}" # may has space
- TOTAL=`expr $TOTAL + 1`
- done
- exec 0<&4
-
- rm -f $TMP5
- #create html file end
- echo "<br></body></html>" >>$FNAME
- # use $COUNT to store temp result
- COUNT=`sed -n -e 's/<\/html>//p' ${3} `
- if [ ! -z "$COUNT" ];then
- # file exist and finish
- return
- fi
- # create file
- cat $FNAME > $3
- COUNT=0
- if [ 0 -lt $NUM ];then
- while [ $COUNT -lt $TOTAL ] ;do
- update_rel "${ARRAYLINK[$COUNT]}" "${ARRAYDIR[$COUNT]}" "${ARRAYNAME[$COUNT]}" $NUM
- COUNT=`expr $COUNT + 1`
- done
- fi
- }
- update_rel()
- {
- local FNAME NAME REALNAME
- local DIR LINK LINKPRE LPRE
- local TEST T T1 T2 DIR2
- local NUM TYPE TITLE
- local CLUSTER HTMNAME
- local CTYPE CUR C1 C2
- if [ $# -ne 4 ];then
- return
- fi
- NUM=$4
- if [ $NUM -lt 1 ];then
- return
- fi
-
- if [ -z $2 ];then
- return
- fi
-
- if [ 9 -lt $NUM ];then
- NUM=9
- fi
- NAME=`basename $1`
- DIR=$2
- TITLE=$3
- get_dirlevel $DIR
- if [ ! -d "${DIR}" ];then
- mkdir -p ${DIR}
- fi
- FNAME="${DIR}/${NAME}"
-
- NUM=`expr $NUM - 1 `
- LINKPRE=`echo $1 | s_linkpre `
- if [ -z "$LINKPRE" ];then
- judge_type `echo ${DIR} | s_reltype `
- if [ $bLINK == 1 ];then
- LINKPRE="http://animals.nationalgeographic.com"
- elif [ $bLINK == 2 ];then
- LINKPRE="http://photography.nationalgeographic.com"
- elif [ $bLINK == 3 ];then
- LINKPRE="http://science.nationalgeographic.com"
- else
- LINKPRE="http://www3.nationalgeographic.com"
- fi
- LINK=${LINKPRE}${1}
- else
- LINK=$1
- fi
- if [ -f $FNAME ];then
-
- #use REALNAME to store temp resule
- REALNAME=`sed -n -e 's/<\/html>//p' $FNAME`
- if [ ! -z "$REALNAME" ];then
- # last-run finish parsing $FNAME,
- # so return
- return
- fi
- # $FNAME should be empty
- # means last-run is interrupt
- # continue to parse
- else
- touch $FNAME
- fi
-
- CLUSTER=`echo "$LINK" | sed -n -e 's/.*\/\([^.]*\)\.html/\1/p' `
- C1=0
- CTYPE=`echo "$DIR" | sed -n -e 's/.*\/\([^/]*\)/\1/p' `
-
- REALNAME=$FNAME
- #first write it to tmp introfile
- FNAME=$TMP6
- rm -f $TMP6
- getfile ${TMP1} ${LINK}
-
- #save tmp1 to tmp3,for get_relrel() use
- cp -f ${TMP1} ${TMP3}
-
- HTMNAME=`echo $LINK | sed -n -e 's/.*\/\([^.]*\)\.html/\1/p' `
- create_head ${FNAME} ${HTMNAME} ${LINK}
-
- TEST=`sed -n -e 's/<a href="\([^>]*\.html\)"[^>]*><img[^>]*><\/a>/\1 /gp' $TMP1 `
-
-
- T=0
- while [ ! -z "TEST" ];do
- T1=`echo $TEST | sed -n -e 's/\([^ ]*\) \(.*\)/\1/p' `
- T2=`echo $TEST | sed -n -e 's/\([^ ]*\) \(.*\)/\2/p' `
- TEST=$T2
- if [ -z "$T1" ];then
- break
- fi
- DIR2=`echo $T1 | sed -n -e 's/.*\/\([^/]*\)\/[^.]*\.html/\1/p' `
- HTMNAME=`echo $T1 | sed -n -e 's/.*\/\([^.]*\)\.html/\1/p' `
- #save related htm of current pic
- if [ "$DIR2" == "$CTYPE" ];then
- T1=`basename $T1`
- CUR[$C1]="${LINKPRE}/${DIR}/${T1}"
- C1=`expr $C1 + 1`
- fi
- #get related pic of current html
- if [ "$DIR2" == "$CLUSTER" ];then
- T=`expr $T + 1`
- if [ 1 -eq $T ];then
- #ingore the first piclink,cause it direct same with current html
- # for example, http://photography.nationalgeographic.com/photography/photos/digital-photography-tips.html
- #get pic of current html ,cuase the first piclink mayby empty
- create_single ${FNAME} ${LINKPRE} ${NUM} ${REALNAME} ${LINK} ${HTMNAME}
- continue
- fi
- # related pic link may appear in current html but empty
- # for example, http://animals.nationalgeographic.com/animals/photos/monkeys.html
- # has 11 pic link,but link0 link1 could not get from website
-
- LPRE=`echo $T1 | s_linkpre `
- if [ -z "$LPRE" ];then
- getfile ${TMP1} ${LINKPRE}${T1}
- create_single "${FNAME}" "${LINKPRE}" "${NUM}" "${HTMNAME}" "${LINKPRE}${T1}" ${HTMNAME}
- else
- getfile ${TMP1} $T1
- create_single "${FNAME}" "${LINKPRE}" "${NUM}" "${REALNAME}" "${T1}" "${HTMNAME}"
- fi
- fi
- done
- if [ 0 -eq "$T" ];then
- cp -f ${TMP3} ${TMP1}
- #get pic of current html
- HTMNAME=`echo $LINK | sed -n -e 's/.*\/\([^.]*\)\.html/\1/p' `
- create_single ${FNAME} ${LINKPRE} ${NUM} ${REALNAME} ${LINK} ${HTMNAME}
- fi
-
- C2=0
- while [ "$C2" -lt "$C1" ];do
- HTMNAME=`echo ${CUR[$C2]} | sed -n -e 's/.*\/\([^.]*\)\.html/\1/p' `
- getfile ${TMP1} "${CUR[$C2]}"
- create_single "${FNAME}" "${LINKPRE}" "${NUM}" "${REALNAME}" "${CUR[$C2]}" "${HTMNAME}" 1
- C2=`expr $C2 + 1`
- done
- # get related links of current html
- get_relrel $FNAME $NUM $REALNAME
-
- }
- parse_toplink()
- {
- local LPRE RELLINK RELNAME RELDIR EXLINK
- local TEST
- local COUNT=0 TMPCOUNT=0
- local LINKS NAMES DIRS
- local LINKSTOTAL=0
- getfile $TMP1 "$1"
- s_toplink $TMP1 >"$TMP5"
-
- EXLINK=`echo $1 |s_linkpre `
- exec 3<&0 0<$TMP5
- while read RELLINK RELNAME; do
- TEST=`echo $RELLINK | sed -n -e 's/index\.html//p'`
- if [ ! -z "$TEST" ];then
- continue
- fi
- TEST=`echo $RELLINK |sed -n -e 's/screensaver//p' `
- if [ ! -z "$TEST" ];then
- continue
- fi
- TEST=`echo $RELLINK |sed -n -e 's/photo-of-the-day//p' `
- if [ ! -z "$TEST" ];then
- continue
- fi
- RELDIR=`echo $RELLINK | s_relateddir1`
- if [ -z "$RELDIR" ];then
- RELDIR=`echo $RELLINK | s_relateddir2`
- fi
- TYPE=`echo ${RELDIR} | s_reltype `
- judge_type $TYPE
- if [ 0 -eq $bLINK ];then
- continue
- fi
- LPRE=`echo $RELLINK | s_linkpre `
- if [ -z "$LPRE" ];then
- RELLINK=${EXLINK}${RELLINK}
- fi
- LINKS[$LINKSTOTAL]=$RELLINK
- DIRS[$LINKSTOTAL]=$RELDIR
- NAMES[$LINKSTOTAL]=$RELNAME
- LINKSTOTAL=`expr $LINKSTOTAL + 1`
- done
- exec 0<&3
- exec 3<&0 0<$INTIMFILE
- read TMPCOUNT COUNT
- exec 0<&3
- if [ -z "$COUNT" ];then
- COUNT=0
- fi
-
- while [ $COUNT -lt $LINKSTOTAL ];do
- echo "$PROCESSNUM $COUNT" >$INTIMFILE
- update_rel "${LINKS[$COUNT]}" "${DIRS[$COUNT]}" "${NAMES[$COUNT]}" 6
- COUNT=`expr $COUNT + 1`
- done
- }
- ##################################################
- # the script run from here
- if [ $# -ne 4 ];then
- echo "this shell should be call by ngpod.sh"
- echo "do not directly run this shell"
- exit
- fi
- INTIMFILE=$1
- touch $INTIMFILE
- PROCESSNUM=$2
- #for each process,the tmp files should be differ
- TMP1=${TMP1}$2
- TMP2=${TMP2}$2
- TMP3=${TMP3}$2
- TMP4=${TMP4}$2
- TMP5=${TMP5}$2
- TMP6=${TMP6}$2
- TMPJPG=${TMPJPG}$2
- parse_toplink "$3"
- # need to delete tmpfiles
- rm -f $TMP1 $TMP2 $TMP3 $TMP4 $TMP5
- rm -f $TMP6 $TMPJPG
- # so callphotos.sh can know these script finish
- rm -f $INTIMFILE
- # write to logfile,to let callphotos.sh know it finish
- echo $PROCESSNUM >> $4
- # end of photos.sh
- ####################
复制代码 |
|