1 --- 2 Etemenanki is an agent that builds word dictionnaries based on remote and 3 local (hyper)text repositories. 4 aym3ric-at-goto10-dot-org 5 NO WARRANTY - USE IT AT YOUR OWN RISK 6 feel free to send patches if you hack it. 7 --- 8 9 ./etemenanki.sh -h for help 10 11 --- 12 13 Some notes ... 14 15 The default behaviour of this script is to download (hyper)text content 16 from website and make a sorted dupechecked list of all the words found. 17 When pointing etemenanki to a website/domain, the ~/dictionnaries 18 directory is created and at the end of the process you will end up with 19 a wordlist file. 20 21 By default the downloaded material is cached in ~/dictionnaries/cache 22 this is useful if you intend to resume or update the downloaded content as 23 etemenanki will try to only download updated material when a cached 24 location is given. 25 Also when playing with the link depth parameter you may download much more 26 than expected and may need to interrupt and resume later on. 27 This can be also used for later wlist creation as well, when running in 28 offline mode, etemenanki will skip the download/update of the cache but 29 process it directly into a wordlist. 30 31 If a worldlist already exist for one given domain/location, etemenanki 32 will update it, never overwrite it. 33 34 To generate more entries in the world list, you can turn on the unique 35 4-PASS l33tifying engine :) 36 (can require a few GB of temporary free space if you intend to play with 37 large wordlist files). 38 39 When you have collected a couple of wlist dictionnaries you can merge them 40 into one, simply execute: 41 ./etemenanki.sh -m 42 43 --- #!/bin/bash 2 # 3 # Etemenanki is an agent that builds word dictionnaries based on remote and 4 # local (hyper)text repositories. 5 # aym3ric-at-goto10-dot-org 6 # NO WARRANTY - USE IT AT YOUR OWN RISK 7 8 9 # Config ---------------------------------------------------------------------- 10 GLOBAL_REPOS=~/dictionnaries/cache 11 DICOS=~/dictionnaries 12 PROXY="" 13 SPOOF="Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.8) Gecko/20050511" 14 REJECT=ogg,iso,wav,mpg,mpeg,mp3,mov,jpg,jpeg,gif,png,bmp,tif,tga,xml,css,ico,js,gz,tgz,zip,rar,exe,bin,svg,pdf,eps,doc 15 WGET_RLEVEL=1 16 L33TIFY=0 17 OFFLINE=0 18 19 # Functions ------------------------------------------------------------------- 20 21 # Dictionnaries path validation 22 function validate_paths { 23 24 if test -e $DICOS ; then 25 echo "[!] dictionnaries folder found." 26 else 27 echo "[!] dictionnaries folder not found ... creating ..." 28 mkdir $DICOS 29 mkdir $GLOBAL_REPOS 30 if test -e $DICOS ; then 31 echo "[!] dictionnaries folder created." 32 else 33 echo "[!] can't create folder ... bye." 34 fi 35 fi 36 37 } 38 39 # Grab the shit 40 function suck_and_run { 41 42 wget -l $WGET_RLEVEL -nv -nd -r -N -R $REJECT -P $LOCAL_REPOS \ 43 --user-agent="$SPOOF" -erobots=off --random-wait $OPTARG -o /dev/stdout | \ 44 awk '/URL/ {print "[ ] sucking "$2}' ; 45 46 } 47 48 49 # Garbage cleaner 50 function nogarbage { 51 52 sed 's/[.,;:)]*$//g' | \ 53 sed '/^http/ d' | sed '/^h77p/ d' |\ 54 sed '/^\// d' | \ 55 sed 's/^[[(]//g' 56 57 } 58 59 # Generate more words using l33t translations 60 function l33t { 61 62 if [ "$L33TIFY" = "1" ] ; then 63 mv $DICOS/$OPTARG.wlist $DICOS/$OPTARG.wlist.tmp 64 echo "[!] 4-PASS l33tifying in progress ..." 65 echo "[ ] pass 1 in progress" 66 cat $DICOS/$OPTARG.wlist.tmp | tr '[:upper:]' '[:lower:]' > $DICOS/$OPTARG.wlist.l33tlo 67 cat $DICOS/$OPTARG.wlist.tmp | tr '[:lower:]' '[:upper:]' > $DICOS/$OPTARG.wlist.l33tup 68 cat $DICOS/$OPTARG.wlist.l33tlo >> $DICOS/$OPTARG.wlist.tmp 69 cat $DICOS/$OPTARG.wlist.l33tup >> $DICOS/$OPTARG.wlist.tmp 70 rm $DICOS/$OPTARG.wlist.l33t* 71 echo "[ ] pass 2 in progress" 72 cat $DICOS/$OPTARG.wlist.tmp | tr 'aeiou' 'AEIOU' > $DICOS/$OPTARG.wlist.l33t 73 cat $DICOS/$OPTARG.wlist.l33t >> $DICOS/$OPTARG.wlist.tmp 74 rm $DICOS/$OPTARG.wlist.l33t 75 echo "[ ] pass 3 in progress" 76 cat $DICOS/$OPTARG.wlist.tmp | tr 'aeost' '43057' > $DICOS/$OPTARG.wlist.l33t 77 cat $DICOS/$OPTARG.wlist.l33t >> $DICOS/$OPTARG.wlist.tmp 78 rm $DICOS/$OPTARG.wlist.l33t 79 echo "[ ] pass 4 in progress" 80 cat $DICOS/$OPTARG.wlist.tmp | tr 'AEOST' '43057' > $DICOS/$OPTARG.wlist.l33t 81 cat $DICOS/$OPTARG.wlist.l33t >> $DICOS/$OPTARG.wlist.tmp 82 rm $DICOS/$OPTARG.wlist.l33t 83 echo "[!] sorting $OPTARG.wlist - please wait" 84 sort -u $DICOS/$OPTARG.wlist.tmp -o $DICOS/$OPTARG.wlist 85 rm $DICOS/$OPTARG.wlist.tmp ; 86 fi 87 88 } 89 90 # Extract the words 91 function wlist_creator { 92 93 # pack/parse/clean all the gathered html crap 94 95 for i in `ls --sort=time --reverse $LOCAL_REPOS` ; do 96 echo "[ ] processing $i" ; 97 WLIST_TMP="$WLIST_TMP `cat $LOCAL_REPOS$i | lynx -verbose off -dump -nolist -stdin | sed 's/*//g'`" 98 if [ "${#WLIST_TMP}" -ge "20000" ] ; then 99 echo $WLIST_TMP | sed 's/ /\n/g' | nogarbage > $DICOS/$OPTARG.unsortedchunk 100 sort -u $DICOS/$OPTARG.unsortedchunk >> $DICOS/$OPTARG.wlist.tmp ; 101 rm $DICOS/$OPTARG.unsortedchunk 102 WLIST_TMP=""; 103 fi 104 done 105 106 # add the leftovers 107 echo $WLIST_TMP | sed 's/ /\n/g' | nogarbage > $DICOS/$OPTARG.unsortedchunk 108 sort -u $DICOS/$OPTARG.unsortedchunk >> $DICOS/$OPTARG.wlist.tmp ; 109 rm $DICOS/$OPTARG.unsortedchunk 110 111 # create or update the final dictionnary 112 113 if test -f $DICOS/$OPTARG.wlist ; then 114 echo "[!] updating $OPTARG.wlist - please wait" 115 cat $DICOS/$OPTARG.wlist.tmp >> $DICOS/$OPTARG.wlist 116 else 117 echo "[!] creating $OPTARG.wlist - please wait" 118 cat $DICOS/$OPTARG.wlist.tmp > $DICOS/$OPTARG.wlist 119 fi 120 121 echo "[!] sorting $OPTARG.wlist - please wait" 122 mv $DICOS/$OPTARG.wlist $DICOS/$OPTARG.wlist.tmp 123 sort -u $DICOS/$OPTARG.wlist.tmp -o $DICOS/$OPTARG.wlist ; 124 rm $DICOS/$OPTARG.wlist.tmp ; 125 l33t 126 127 } 128 129 # Merge all the wlist into a big dictionnary 130 function wlist_merge { 131 132 # test is there are any wlist around 133 134 if [ "`ls -1 $DICOS/*.wlist | wc -l`" -eq "0" ] ; then 135 echo "[!] you don't have any wlist yet..." 136 else 137 echo "[!] wlist found in $DICOS/" 138 if [ "`ls -1 $DICOS/*.wlist | wc -l`" -eq "1" ] ; then 139 echo "[!] you only have one though ... aborting." 140 else 141 for i in `ls -1 $DICOS/*.wlist | grep -v merge.wlist` ; do 142 echo "[ ] merging $i" 143 cat $i >> $DICOS/merge.wlist.tmp 144 done 145 echo "[ ] sorting the entries ..." 146 sort -u $DICOS/merge.wlist.tmp -o $DICOS/merge.wlist 147 rm $DICOS/merge.wlist.tmp 148 echo "[!] merge completed - bye." 149 fi 150 fi 151 152 } 153 154 155 156 157 # Options handling ------------------------------------------------------------ 158 OPTSTRING="hd:lot:m" 159 160 # Check if any switches have been passed 161 if [ $# -eq 0 ] ; then 162 echo "[!] Type -h for help." 163 exit 164 fi 165 166 # Choose you destiny 167 while getopts "$OPTSTRING" SWITCH ; do 168 case $SWITCH in 169 h) echo "[?] Etemenanki - wordlist builder bot" 170 echo "[?] aym3ric-at-goto10-dot-org" 171 echo "[?]" 172 echo "[?] Usage: etemenanki.sh [-h] (-dlo) -t location" 173 echo "[?] -h this help" 174 echo "[?] -d maximum recursion depth (default=1)" 175 echo "[?] -l l33tify the words " 176 echo "[?] -o offline mode" 177 echo "[?] -t target location (http)" 178 echo "[?] -m merge all the wlist files" 179 echo "[?] Example: ./etemenanki -d 1 -l -t google.com" 180 exit 0 181 ;; 182 d) WGET_RLEVEL=$OPTARG 183 ;; 184 l) L33TIFY=1 185 ;; 186 o) OFFLINE=1 187 ;; 188 t) LOCAL_REPOS=$GLOBAL_REPOS/$OPTARG/ 189 validate_paths 190 SUCKSTART=`date +%s` 191 if [ "$OFFLINE" = "0" ] ; then 192 suck_and_run 193 fi 194 WLISTSTART=`date +%s` 195 wlist_creator 196 WLISTSTOP=`date +%s` 197 echo "[!] $OPTARG.wlist written in $DICOS/" 198 echo "[!] sucked in `expr $WLISTSTART - $SUCKSTART` s - packed in `expr $WLISTSTOP - $WLISTSTART` s" 199 echo "[ ] bye..." 200 exit 0 201 ;; 202 m) wlist_merge 203 exit 0 204 ;; 205 *) echo "[!] Unexpected switch or argument" 206 echo "[!] Type -h for help." 207 exit 0 208 ;; 209 esac 210 done