1 ---
2 Etemenanki is an agent that builds word dictionnaries based on remote and
3 local (hyper)text repositories.
4 aym3ric-at-goto10-dot-org
5 NO WARRANTY - USE IT AT YOUR OWN RISK
6 feel free to send patches if you hack it.
7 ---
8
9 ./etemenanki.sh -h for help
10
11 ---
12
13 Some notes ...
14
15 The default behaviour of this script is to download (hyper)text content
16 from website and make a sorted dupechecked list of all the words found.
17 When pointing etemenanki to a website/domain, the ~/dictionnaries
18 directory is created and at the end of the process you will end up with
19 a wordlist file.
20
21 By default the downloaded material is cached in ~/dictionnaries/cache
22 this is useful if you intend to resume or update the downloaded content as
23 etemenanki will try to only download updated material when a cached
24 location is given.
25 Also when playing with the link depth parameter you may download much more
26 than expected and may need to interrupt and resume later on.
27 This can be also used for later wlist creation as well, when running in
28 offline mode, etemenanki will skip the download/update of the cache but
29 process it directly into a wordlist.
30
31 If a worldlist already exist for one given domain/location, etemenanki
32 will update it, never overwrite it.
33
34 To generate more entries in the world list, you can turn on the unique
35 4-PASS l33tifying engine :)
36 (can require a few GB of temporary free space if you intend to play with
37 large wordlist files).
38
39 When you have collected a couple of wlist dictionnaries you can merge them
40 into one, simply execute:
41 ./etemenanki.sh -m
42
43 ---
#!/bin/bash
2 #
3 # Etemenanki is an agent that builds word dictionnaries based on remote and
4 # local (hyper)text repositories.
5 # aym3ric-at-goto10-dot-org
6 # NO WARRANTY - USE IT AT YOUR OWN RISK
7
8
9 # Config ----------------------------------------------------------------------
10 GLOBAL_REPOS=~/dictionnaries/cache
11 DICOS=~/dictionnaries
12 PROXY=""
13 SPOOF="Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.8) Gecko/20050511"
14 REJECT=ogg,iso,wav,mpg,mpeg,mp3,mov,jpg,jpeg,gif,png,bmp,tif,tga,xml,css,ico,js,gz,tgz,zip,rar,exe,bin,svg,pdf,eps,doc
15 WGET_RLEVEL=1
16 L33TIFY=0
17 OFFLINE=0
18
19 # Functions -------------------------------------------------------------------
20
21 # Dictionnaries path validation
22 function validate_paths {
23
24 if test -e $DICOS ; then
25 echo "[!] dictionnaries folder found."
26 else
27 echo "[!] dictionnaries folder not found ... creating ..."
28 mkdir $DICOS
29 mkdir $GLOBAL_REPOS
30 if test -e $DICOS ; then
31 echo "[!] dictionnaries folder created."
32 else
33 echo "[!] can't create folder ... bye."
34 fi
35 fi
36
37 }
38
39 # Grab the shit
40 function suck_and_run {
41
42 wget -l $WGET_RLEVEL -nv -nd -r -N -R $REJECT -P $LOCAL_REPOS \
43 --user-agent="$SPOOF" -erobots=off --random-wait $OPTARG -o /dev/stdout | \
44 awk '/URL/ {print "[ ] sucking "$2}' ;
45
46 }
47
48
49 # Garbage cleaner
50 function nogarbage {
51
52 sed 's/[.,;:)]*$//g' | \
53 sed '/^http/ d' | sed '/^h77p/ d' |\
54 sed '/^\// d' | \
55 sed 's/^[[(]//g'
56
57 }
58
59 # Generate more words using l33t translations
60 function l33t {
61
62 if [ "$L33TIFY" = "1" ] ; then
63 mv $DICOS/$OPTARG.wlist $DICOS/$OPTARG.wlist.tmp
64 echo "[!] 4-PASS l33tifying in progress ..."
65 echo "[ ] pass 1 in progress"
66 cat $DICOS/$OPTARG.wlist.tmp | tr '[:upper:]' '[:lower:]' > $DICOS/$OPTARG.wlist.l33tlo
67 cat $DICOS/$OPTARG.wlist.tmp | tr '[:lower:]' '[:upper:]' > $DICOS/$OPTARG.wlist.l33tup
68 cat $DICOS/$OPTARG.wlist.l33tlo >> $DICOS/$OPTARG.wlist.tmp
69 cat $DICOS/$OPTARG.wlist.l33tup >> $DICOS/$OPTARG.wlist.tmp
70 rm $DICOS/$OPTARG.wlist.l33t*
71 echo "[ ] pass 2 in progress"
72 cat $DICOS/$OPTARG.wlist.tmp | tr 'aeiou' 'AEIOU' > $DICOS/$OPTARG.wlist.l33t
73 cat $DICOS/$OPTARG.wlist.l33t >> $DICOS/$OPTARG.wlist.tmp
74 rm $DICOS/$OPTARG.wlist.l33t
75 echo "[ ] pass 3 in progress"
76 cat $DICOS/$OPTARG.wlist.tmp | tr 'aeost' '43057' > $DICOS/$OPTARG.wlist.l33t
77 cat $DICOS/$OPTARG.wlist.l33t >> $DICOS/$OPTARG.wlist.tmp
78 rm $DICOS/$OPTARG.wlist.l33t
79 echo "[ ] pass 4 in progress"
80 cat $DICOS/$OPTARG.wlist.tmp | tr 'AEOST' '43057' > $DICOS/$OPTARG.wlist.l33t
81 cat $DICOS/$OPTARG.wlist.l33t >> $DICOS/$OPTARG.wlist.tmp
82 rm $DICOS/$OPTARG.wlist.l33t
83 echo "[!] sorting $OPTARG.wlist - please wait"
84 sort -u $DICOS/$OPTARG.wlist.tmp -o $DICOS/$OPTARG.wlist
85 rm $DICOS/$OPTARG.wlist.tmp ;
86 fi
87
88 }
89
90 # Extract the words
91 function wlist_creator {
92
93 # pack/parse/clean all the gathered html crap
94
95 for i in `ls --sort=time --reverse $LOCAL_REPOS` ; do
96 echo "[ ] processing $i" ;
97 WLIST_TMP="$WLIST_TMP `cat $LOCAL_REPOS$i | lynx -verbose off -dump -nolist -stdin | sed 's/*//g'`"
98 if [ "${#WLIST_TMP}" -ge "20000" ] ; then
99 echo $WLIST_TMP | sed 's/ /\n/g' | nogarbage > $DICOS/$OPTARG.unsortedchunk
100 sort -u $DICOS/$OPTARG.unsortedchunk >> $DICOS/$OPTARG.wlist.tmp ;
101 rm $DICOS/$OPTARG.unsortedchunk
102 WLIST_TMP="";
103 fi
104 done
105
106 # add the leftovers
107 echo $WLIST_TMP | sed 's/ /\n/g' | nogarbage > $DICOS/$OPTARG.unsortedchunk
108 sort -u $DICOS/$OPTARG.unsortedchunk >> $DICOS/$OPTARG.wlist.tmp ;
109 rm $DICOS/$OPTARG.unsortedchunk
110
111 # create or update the final dictionnary
112
113 if test -f $DICOS/$OPTARG.wlist ; then
114 echo "[!] updating $OPTARG.wlist - please wait"
115 cat $DICOS/$OPTARG.wlist.tmp >> $DICOS/$OPTARG.wlist
116 else
117 echo "[!] creating $OPTARG.wlist - please wait"
118 cat $DICOS/$OPTARG.wlist.tmp > $DICOS/$OPTARG.wlist
119 fi
120
121 echo "[!] sorting $OPTARG.wlist - please wait"
122 mv $DICOS/$OPTARG.wlist $DICOS/$OPTARG.wlist.tmp
123 sort -u $DICOS/$OPTARG.wlist.tmp -o $DICOS/$OPTARG.wlist ;
124 rm $DICOS/$OPTARG.wlist.tmp ;
125 l33t
126
127 }
128
129 # Merge all the wlist into a big dictionnary
130 function wlist_merge {
131
132 # test is there are any wlist around
133
134 if [ "`ls -1 $DICOS/*.wlist | wc -l`" -eq "0" ] ; then
135 echo "[!] you don't have any wlist yet..."
136 else
137 echo "[!] wlist found in $DICOS/"
138 if [ "`ls -1 $DICOS/*.wlist | wc -l`" -eq "1" ] ; then
139 echo "[!] you only have one though ... aborting."
140 else
141 for i in `ls -1 $DICOS/*.wlist | grep -v merge.wlist` ; do
142 echo "[ ] merging $i"
143 cat $i >> $DICOS/merge.wlist.tmp
144 done
145 echo "[ ] sorting the entries ..."
146 sort -u $DICOS/merge.wlist.tmp -o $DICOS/merge.wlist
147 rm $DICOS/merge.wlist.tmp
148 echo "[!] merge completed - bye."
149 fi
150 fi
151
152 }
153
154
155
156
157 # Options handling ------------------------------------------------------------
158 OPTSTRING="hd:lot:m"
159
160 # Check if any switches have been passed
161 if [ $# -eq 0 ] ; then
162 echo "[!] Type -h for help."
163 exit
164 fi
165
166 # Choose you destiny
167 while getopts "$OPTSTRING" SWITCH ; do
168 case $SWITCH in
169 h) echo "[?] Etemenanki - wordlist builder bot"
170 echo "[?] aym3ric-at-goto10-dot-org"
171 echo "[?]"
172 echo "[?] Usage: etemenanki.sh [-h] (-dlo) -t location"
173 echo "[?] -h this help"
174 echo "[?] -d maximum recursion depth (default=1)"
175 echo "[?] -l l33tify the words "
176 echo "[?] -o offline mode"
177 echo "[?] -t target location (http)"
178 echo "[?] -m merge all the wlist files"
179 echo "[?] Example: ./etemenanki -d 1 -l -t google.com"
180 exit 0
181 ;;
182 d) WGET_RLEVEL=$OPTARG
183 ;;
184 l) L33TIFY=1
185 ;;
186 o) OFFLINE=1
187 ;;
188 t) LOCAL_REPOS=$GLOBAL_REPOS/$OPTARG/
189 validate_paths
190 SUCKSTART=`date +%s`
191 if [ "$OFFLINE" = "0" ] ; then
192 suck_and_run
193 fi
194 WLISTSTART=`date +%s`
195 wlist_creator
196 WLISTSTOP=`date +%s`
197 echo "[!] $OPTARG.wlist written in $DICOS/"
198 echo "[!] sucked in `expr $WLISTSTART - $SUCKSTART` s - packed in `expr $WLISTSTOP - $WLISTSTART` s"
199 echo "[ ] bye..."
200 exit 0
201 ;;
202 m) wlist_merge
203 exit 0
204 ;;
205 *) echo "[!] Unexpected switch or argument"
206 echo "[!] Type -h for help."
207 exit 0
208 ;;
209 esac
210 done