1       ---
2       Etemenanki is an agent that builds word dictionnaries based on remote and
3       local (hyper)text repositories.
4       aym3ric-at-goto10-dot-org
5       NO WARRANTY - USE IT AT YOUR OWN RISK
6       feel free to send patches if you hack it.
7       ---
8       
9       ./etemenanki.sh -h for help
10      
11      ---
12      
13      Some notes ...
14      
15      The default behaviour of this script is to download (hyper)text content
16      from website and make a sorted dupechecked list of all the words found.
17      When pointing etemenanki to a website/domain, the ~/dictionnaries
18      directory is created and at the end of the process you will end up with
19      a wordlist file.
20      
21      By default the downloaded material is cached in ~/dictionnaries/cache
22      this is useful if you intend to resume or update the downloaded content as
23      etemenanki will try to only download updated material when a cached
24      location is given.
25      Also when playing with the link depth parameter you may download much more
26      than expected and may need to interrupt and resume later on.
27      This can be also used for later wlist creation as well, when running in
28      offline mode, etemenanki will skip the download/update of the cache but
29      process it directly into a wordlist.
30      
31      If a worldlist already exist for one given domain/location, etemenanki
32      will update it, never overwrite it.
33      
34      To generate more entries in the world list, you can turn on the unique
35      4-PASS l33tifying engine :)
36      (can require a few GB of temporary free space if you intend to play with
37      large wordlist files).
38      
39      When you have collected a couple of wlist dictionnaries you can merge them
40      into one, simply execute:
41      ./etemenanki.sh -m
42      
43      ---
#!/bin/bash
2       #
3       # Etemenanki is an agent that builds word dictionnaries based on remote and
4       # local (hyper)text repositories.
5       # aym3ric-at-goto10-dot-org
6       # NO WARRANTY - USE IT AT YOUR OWN RISK
7       
8       
9       # Config ----------------------------------------------------------------------
10      GLOBAL_REPOS=~/dictionnaries/cache
11      DICOS=~/dictionnaries
12      PROXY=""
13      SPOOF="Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.8) Gecko/20050511"
14      REJECT=ogg,iso,wav,mpg,mpeg,mp3,mov,jpg,jpeg,gif,png,bmp,tif,tga,xml,css,ico,js,gz,tgz,zip,rar,exe,bin,svg,pdf,eps,doc
15      WGET_RLEVEL=1
16      L33TIFY=0
17      OFFLINE=0
18      
19      # Functions -------------------------------------------------------------------
20      
21      # Dictionnaries path validation
22      function validate_paths {
23      
24      if test -e $DICOS ; then
25      echo "[!] dictionnaries folder found."
26      else
27      echo "[!] dictionnaries folder not found ... creating ..."
28      mkdir $DICOS
29      mkdir $GLOBAL_REPOS
30      if test -e $DICOS ; then
31      echo "[!] dictionnaries folder created."
32      else
33      echo "[!] can't create folder ... bye."
34      fi
35      fi
36      
37      }
38      
39      # Grab the shit
40      function suck_and_run {
41      
42      wget -l $WGET_RLEVEL -nv -nd -r -N -R $REJECT -P $LOCAL_REPOS \
43      --user-agent="$SPOOF" -erobots=off --random-wait $OPTARG -o /dev/stdout | \
44      awk '/URL/ {print "[ ] sucking "$2}' ;
45      
46      }
47      
48      
49      # Garbage cleaner
50      function nogarbage {
51      
52      sed 's/[.,;:)]*$//g' | \
53      sed '/^http/ d' | sed '/^h77p/ d' |\
54      sed '/^\// d' | \
55      sed 's/^[[(]//g'
56      
57      }
58      
59      # Generate more words using l33t translations
60      function l33t {
61      
62      if [ "$L33TIFY" = "1" ] ; then
63      mv $DICOS/$OPTARG.wlist $DICOS/$OPTARG.wlist.tmp
64      echo "[!] 4-PASS l33tifying in progress ..."
65      echo "[ ] pass 1 in progress"
66      cat $DICOS/$OPTARG.wlist.tmp | tr '[:upper:]' '[:lower:]' > $DICOS/$OPTARG.wlist.l33tlo
67      cat $DICOS/$OPTARG.wlist.tmp | tr '[:lower:]' '[:upper:]' > $DICOS/$OPTARG.wlist.l33tup
68      cat $DICOS/$OPTARG.wlist.l33tlo >> $DICOS/$OPTARG.wlist.tmp
69      cat $DICOS/$OPTARG.wlist.l33tup >> $DICOS/$OPTARG.wlist.tmp
70      rm $DICOS/$OPTARG.wlist.l33t*
71      echo "[ ] pass 2 in progress"
72      cat $DICOS/$OPTARG.wlist.tmp | tr 'aeiou' 'AEIOU' > $DICOS/$OPTARG.wlist.l33t
73      cat $DICOS/$OPTARG.wlist.l33t >> $DICOS/$OPTARG.wlist.tmp
74      rm $DICOS/$OPTARG.wlist.l33t
75      echo "[ ] pass 3 in progress"
76      cat $DICOS/$OPTARG.wlist.tmp | tr 'aeost' '43057' > $DICOS/$OPTARG.wlist.l33t
77      cat $DICOS/$OPTARG.wlist.l33t >> $DICOS/$OPTARG.wlist.tmp
78      rm $DICOS/$OPTARG.wlist.l33t
79      echo "[ ] pass 4 in progress"
80      cat $DICOS/$OPTARG.wlist.tmp | tr 'AEOST' '43057' > $DICOS/$OPTARG.wlist.l33t
81      cat $DICOS/$OPTARG.wlist.l33t >> $DICOS/$OPTARG.wlist.tmp
82      rm $DICOS/$OPTARG.wlist.l33t
83      echo "[!] sorting $OPTARG.wlist - please wait"
84      sort -u $DICOS/$OPTARG.wlist.tmp -o $DICOS/$OPTARG.wlist
85      rm $DICOS/$OPTARG.wlist.tmp ;
86      fi
87      
88      }
89      
90      # Extract the words
91      function wlist_creator {
92      
93      # pack/parse/clean all the gathered html crap
94      
95      for i in `ls --sort=time --reverse $LOCAL_REPOS` ; do
96      echo "[ ] processing $i" ;
97      WLIST_TMP="$WLIST_TMP `cat $LOCAL_REPOS$i | lynx -verbose off -dump -nolist -stdin | sed 's/*//g'`"
98      if [ "${#WLIST_TMP}" -ge "20000" ] ; then
99      echo $WLIST_TMP | sed 's/ /\n/g' | nogarbage > $DICOS/$OPTARG.unsortedchunk
100     sort -u $DICOS/$OPTARG.unsortedchunk >> $DICOS/$OPTARG.wlist.tmp ;
101     rm $DICOS/$OPTARG.unsortedchunk
102     WLIST_TMP="";
103     fi
104     done
105     
106     # add the leftovers
107     echo $WLIST_TMP | sed 's/ /\n/g' | nogarbage > $DICOS/$OPTARG.unsortedchunk
108     sort -u $DICOS/$OPTARG.unsortedchunk >> $DICOS/$OPTARG.wlist.tmp ;
109     rm $DICOS/$OPTARG.unsortedchunk
110     
111     # create or update the final dictionnary
112     
113     if test -f $DICOS/$OPTARG.wlist ; then
114     echo "[!] updating $OPTARG.wlist - please wait"
115     cat $DICOS/$OPTARG.wlist.tmp >> $DICOS/$OPTARG.wlist
116     else
117     echo "[!] creating $OPTARG.wlist - please wait"
118     cat $DICOS/$OPTARG.wlist.tmp > $DICOS/$OPTARG.wlist
119     fi
120     
121     echo "[!] sorting $OPTARG.wlist - please wait"
122     mv $DICOS/$OPTARG.wlist $DICOS/$OPTARG.wlist.tmp
123     sort -u $DICOS/$OPTARG.wlist.tmp -o $DICOS/$OPTARG.wlist ;
124     rm $DICOS/$OPTARG.wlist.tmp ;
125     l33t
126     
127     }
128     
129     # Merge all the wlist into a big dictionnary
130     function wlist_merge {
131     
132     # test is there are any wlist around
133     
134     if [ "`ls -1 $DICOS/*.wlist | wc -l`" -eq "0" ] ; then
135     echo "[!] you don't have any wlist yet..."
136     else
137     echo "[!] wlist found in $DICOS/"
138     if [ "`ls -1 $DICOS/*.wlist | wc -l`" -eq "1" ] ; then
139     echo "[!] you only have one though ... aborting."
140     else
141     for i in `ls -1 $DICOS/*.wlist | grep -v merge.wlist` ; do
142     echo "[ ] merging $i"
143     cat $i >> $DICOS/merge.wlist.tmp
144     done
145     echo "[ ] sorting the entries ..."
146     sort -u $DICOS/merge.wlist.tmp -o $DICOS/merge.wlist
147     rm $DICOS/merge.wlist.tmp
148     echo "[!] merge completed - bye."
149     fi
150     fi
151     
152     }
153     
154     
155     
156     
157     # Options handling ------------------------------------------------------------
158     OPTSTRING="hd:lot:m"
159     
160     # Check if any switches have been passed
161     if [ $# -eq 0 ] ; then
162     echo "[!] Type -h for help."
163     exit
164     fi
165     
166     # Choose you destiny
167     while getopts "$OPTSTRING" SWITCH ; do
168     case $SWITCH in
169     h) echo "[?] Etemenanki - wordlist builder bot"
170     echo "[?] aym3ric-at-goto10-dot-org"
171     echo "[?]"
172     echo "[?] Usage: etemenanki.sh [-h] (-dlo) -t location"
173     echo "[?] -h this help"
174     echo "[?] -d maximum recursion depth (default=1)"
175     echo "[?] -l l33tify the words "
176     echo "[?] -o offline mode"
177     echo "[?] -t target location (http)"
178     echo "[?] -m merge all the wlist files"
179     echo "[?] Example: ./etemenanki -d 1 -l -t google.com"
180     exit 0
181     ;;
182     d) WGET_RLEVEL=$OPTARG
183     ;;
184     l) L33TIFY=1
185     ;;
186     o) OFFLINE=1
187     ;;
188     t) LOCAL_REPOS=$GLOBAL_REPOS/$OPTARG/
189     validate_paths
190     SUCKSTART=`date +%s`
191     if [ "$OFFLINE" = "0" ] ; then
192     suck_and_run
193     fi
194     WLISTSTART=`date +%s`
195     wlist_creator
196     WLISTSTOP=`date +%s`
197     echo "[!] $OPTARG.wlist written in $DICOS/"
198     echo "[!] sucked in `expr $WLISTSTART - $SUCKSTART` s - packed in `expr $WLISTSTOP - $WLISTSTART` s"
199     echo "[ ] bye..."
200     exit 0
201     ;;
202     m) wlist_merge
203     exit 0
204     ;;
205     *) echo "[!] Unexpected switch or argument"
206     echo "[!] Type -h for help."
207     exit 0
208     ;;
209     esac
210     done 
