Target URL Crawler

#!/usr/bin/env bash
 
##   listing only target domain with list-urls.py (in backtrack4)
##   by Aung Khant, http://yehg.net
 
list_url_location=/pentest/enumeration/list-urls/list-urls.py
 
echo ++++++++++++++++++++++++++++++++++++
echo
echo Target URL Crawler with list-urls.py
echo 
echo by Aung Khant, http://yehg.net
echo YGN Ethical Hacker Group, Myanmar
echo
echo ++++++++++++++++++++++++++++++++++++
echo 
if [ ! -e $list_url_location ]; then
  echo This script depends on:
  echo "$list_url_location" that does not exist !
  echo 
  echo Edit the source to modify list-urls.py location
  exit
fi
if [ $# -ne 1 ]
 then
  echo "Usage: ./$0 url"
  echo 
  echo "e.g ./$0 http://www.google.com"
  exit
fi
 
echo Crawling ..
echo
 
#Get Random String
#Ref: http://tldp.org/LDP/abs/html/string-manipulation.html#RANDSTRING
 
POS=2  # Starting from position 2 in the string.
LEN=8  # Extract eight characters.
str0="$$"
str1=$( echo "$str0" | md5sum | md5sum )
str2=$( date | md5sum | md5sum )
 
randstring=${str1:$POS:$LEN}
randstring2=${str2:$POS:$LEN}
 
tmpfile="/tmp/tmp_$randstring"
tmpfile2="/tmp/tmp_$randstring2"
 
touch $tmpfile2
 
$list_url_location $1 > $tmpfile
 
target=$1
domain=$1
 
p1=`expr match $target "\(http://\)"`
 
if [ ${#p1} -eq 0 ]; then
  p2=`expr match $target "\(https://\)"`
  if [ ${#p2} -eq 0 ]; then
      echo URL should start with http:// or https://
      exit
  else
    d1=${target:8}
    l1=`expr index $d1 '/'`
    if [ $l1 -le 0 ]; then
        target=$1/
        d1=${target:7}
        l1=`expr index $d1 '/'`
    fi
    l1=`echo $l1-1|bc`
    d2=${d1:0:$l1}
    domain=$d2
    protocol='https://'
 
  fi
else
    d1=${target:7}
    l1=`expr index $d1 '/'`
    if [ $l1 -le 0 ]; then
        target=$1/
        d1=${target:7}
        l1=`expr index $d1 '/'`
    fi
    l1=`echo $l1-1|bc`
    d2=${d1:0:$l1}
    domain=$d2
    protocol='http://'
fi
 
count=0
for line in $(cat $tmpfile)
do
  outof_target=`echo $line | grep -i -P "^(http|#|javascript|vbscript)" | wc -l`
  basedir=`echo $line | grep -i -P "^/" | wc -l`
 
  url=`echo $line | grep '/' | wc -l `
 
  if [ $outof_target -eq 0 ]; then
      if [ $url -eq 1 ]; then
    if [ ${#line} -gt 1 ]; then
       has_included=`cat $tmpfile2 | grep $line | wc -l`
           line=${line//..\//}
 
           if [ $has_included -eq 0 ]; then
        if [ $basedir -eq 1 ]; then
             echo ${protocol}${domain}${line}
             echo ${protocol}${domain}${line} >> $tmpfile2
        else
             echo ${target}${line}      
                 echo ${target}${line} >> $tmpfile2
        fi
       fi
    fi
      fi
  fi
 
done
 
rm $tmpfile $tmpfile2

Download

Submitted by Aung Khant

discovery/url_crawler.txt · Last modified: 2010/08/19 20:53 by Robin Wood