#!/usr/bin/python
#Searches the internet for email
#addresses, prints them to a file...
import urllib, sys, re, random, socket, time, urllib2, string, sets
def title():
print "\n\t d3hydr8***91;at***93;gmail***91;dot***93;com EmailCollecter v1.3"
print "\t--------------------------------------------------\n"
def timer():
now = time.localtime(time.time())
return time.asctime(now)
def StripTags(text):
finished = 0
while not finished:
finished = 1
start = text.find("<")
if start >= 0:
stop = text***91;start:***93;.find(">")
if stop >= 0:
text = text***91;:start***93; + text***91;start+stop+1:***93;
finished = 0
return text
def getgoog(domain):
counter = 0
goog_emails = ***91;***93;
try:
while counter < 100:
results = 'http://groups.google.com/groups?q='+str(domain)+'&hl=en&lr=&ie=UTF-8&start=' + repr(counter) + '&sa=N'
request = urllib2.Request(results)
request.add_header('User-Agent','Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)')
opener = urllib2.build_opener()
text = opener.open(request).read()
emails = (re.findall('(***91;\w\.\-***93;+@'+domain+')',StripTags(text)))
for email in emails:
goog_emails.append(email)
counter += 10
page_counter = 0
while page_counter < 100 :
results_web = 'http://www.google.com/search?q=%40'+str(domain)+'&hl=en&lr=&ie=UTF-8&start=' + repr(page_counter) + '&sa=N'
request_web = urllib2.Request(results_web)
request_web.add_header('User-Agent','Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)')
opener_web = urllib2.build_opener()
text = opener_web.open(request_web).read()
emails_web = (re.findall('(***91;\w\.\-***93;+@'+domain+')',StripTags(text)))
for email_web in emails_web:
goog_emails.append(email_web)
page_counter += 10
goog_emails = list(sets.Set(goog_emails))
return goog_emails
except(IndexError):
pass
def geturls(url):
try:
if verbose == 1:
print "Collecting:",url
site = urllib.urlopen(url).read()
links = re.findall(('http://***91;\w\.\-***93;*'), site)
for link in links:
if link not in urls:
urls.append(link)
except(IOError,TypeError,AttributeError,socket.timeout, socket.gaierror, socket.error): pass
return urls
def getaddress(url):
if verbose == 1:
print "Checking:",url
try:
emails = ***91;***93;
site = urllib.urlopen(url).read()
try:
if domain:
emails = re.findall('***91;\w\.\-***93;+@'+domain, site)
t1 = re.findall('***91;\w\.\-***93;+\***91;at\***93;'+domain, site)
t2 = re.findall('***91;\w\.\-***93;+\***91;at\***93;'+re.sub("\.","\***91;dot\***93;",domain), site)
if len(t1) >= 1:
for addr in t1:
emails.append(addr.replace("***91;at***93;","."))
if len(t2) >= 1:
for addr in t2:
emails.append(addr.replace("***91;at***93;",".").replace("***91;dot***93;","."))
except(NameError):
emails = re.findall('***91;\.\w***93;+@***91;a-zA-Z_***93;+?\.***91;a-zA-Z***93;{2,3}', site)
t1 = re.findall('***91;\w\.\-***93;+\***91;at\***93;***91;\w\.\-***93;+\.\w\w\w', site)
t2 = re.findall('***91;\w\.\-***93;+\***91;at\***93;***91;\w\.\-***93;+\***91;dot\***93;\w\w\w', site)
if len(t1) >= 1:
for addr in t1:
emails.append(addr.replace("***91;at***93;","@"))
if len(t2) >= 1:
for addr in t2:
emails.append(addr.replace("***91;at***93;",".").replace("***91;dot***93;","."))
pass
if len(emails) >=1:
data = open(sys.argv***91;2***93;, "a")
for email in emails:
if email not in addresses:
addresses.append(email)
data.writelines(email+"\n")
print "\nFound:",email,"\nTotal:",len(addresses),"\n"
data.close()
except(IOError,TypeError,AttributeError,socket.timeout,socket.gaierror,socket.error):
pass
except(KeyboardInterrupt):
pass
return addresses
if len(sys.argv) < 4 or len(sys.argv) > 7:
title()
print "\nUsage: ./emailcollect.py <starting point> <file to save addreses> <how many> <options>"
print "Ex: ./emailcollect.py www.busywebsite.com emails.txt 10000 -domain gmail.com -verbose\n"
print "\t***91;options***93;"
print "\t -d/-domain <domain> : Only searches for that domain (ex: gmail.com, yahoo.com)"
print "\t -v/-verbose : Verbose Mode\n"
sys.exit(1)
url = sys.argv***91;1***93; length = int(sys.argv***91;3***93;)
for arg in sys.argv***91;1:***93;:
if arg.lower() == "-d" or arg.lower() == "-domain":
domain = sys.argv***91;int(sys.argv***91;1:***93;.index(arg))+2***93;
if arg.lower() == "-v" or arg.lower() == "-verbose":
verbose = 1
if url***91;:7***93; != "http://":
url = "http://"+url
addresses = ***91;***93; urls = ***91;***93; socket.setdefaulttimeout(3)
title()
print "***91;+***93; Starting:",url print "***91;+***93; File:",sys.argv***91;2***93;
try:
if domain:
domain = re.sub("@","",domain)
print "***91;+***93; Domain:",domain
except(NameError):
print "***91;+***93; Searching: all domains"
pass print "***91;+***93; Collecting:",length try:
if verbose ==1:
print "***91;+***93; Verbose Mode On" except(NameError):
verbose = 0
print "***91;-***93; Verbose Mode Off" print "***91;+***93; Started:",timer(),"\n"
try:
if domain:
print "***91;+***93; Getting addresses from google...\n"
goog_emails = getgoog(domain)
print "***91;+***93; Found:",len(goog_emails),"from google."
if len(goog_emails) >= 1:
file = open(sys.argv***91;2***93;, "a")
for e in goog_emails:
file.writelines(e+"\n")
addresses.append(e)
file.close() except(NameError):
pass
if int(len(addresses)) < length:
urls = geturls(url)
while int(len(addresses)) < length:
try:
addresses = getaddress(random.choice(urls))
except(IndexError):
print "***91;-***93; Ran out of links, try another start site\n"
sys.exit(1)
if len(urls) < 1000:
urls = geturls(random.choice(urls))
else:
for url in urls***91;1:300***93;:
urls.remove(url)
print "\n***91;+***93; Final Total:",len(addresses)
print "***91;+***93; Data:",sys.argv***91;2***93;
print "***91;+***93; Done",timer(),"\n"
#Searches the internet for email
#addresses, prints them to a file...
import urllib, sys, re, random, socket, time, urllib2, string, sets
def title():
print "\n\t d3hydr8***91;at***93;gmail***91;dot***93;com EmailCollecter v1.3"
print "\t--------------------------------------------------\n"
def timer():
now = time.localtime(time.time())
return time.asctime(now)
def StripTags(text):
finished = 0
while not finished:
finished = 1
start = text.find("<")
if start >= 0:
stop = text***91;start:***93;.find(">")
if stop >= 0:
text = text***91;:start***93; + text***91;start+stop+1:***93;
finished = 0
return text
def getgoog(domain):
counter = 0
goog_emails = ***91;***93;
try:
while counter < 100:
results = 'http://groups.google.com/groups?q='+str(domain)+'&hl=en&lr=&ie=UTF-8&start=' + repr(counter) + '&sa=N'
request = urllib2.Request(results)
request.add_header('User-Agent','Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)')
opener = urllib2.build_opener()
text = opener.open(request).read()
emails = (re.findall('(***91;\w\.\-***93;+@'+domain+')',StripTags(text)))
for email in emails:
goog_emails.append(email)
counter += 10
page_counter = 0
while page_counter < 100 :
results_web = 'http://www.google.com/search?q=%40'+str(domain)+'&hl=en&lr=&ie=UTF-8&start=' + repr(page_counter) + '&sa=N'
request_web = urllib2.Request(results_web)
request_web.add_header('User-Agent','Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)')
opener_web = urllib2.build_opener()
text = opener_web.open(request_web).read()
emails_web = (re.findall('(***91;\w\.\-***93;+@'+domain+')',StripTags(text)))
for email_web in emails_web:
goog_emails.append(email_web)
page_counter += 10
goog_emails = list(sets.Set(goog_emails))
return goog_emails
except(IndexError):
pass
def geturls(url):
try:
if verbose == 1:
print "Collecting:",url
site = urllib.urlopen(url).read()
links = re.findall(('http://***91;\w\.\-***93;*'), site)
for link in links:
if link not in urls:
urls.append(link)
except(IOError,TypeError,AttributeError,socket.timeout, socket.gaierror, socket.error): pass
return urls
def getaddress(url):
if verbose == 1:
print "Checking:",url
try:
emails = ***91;***93;
site = urllib.urlopen(url).read()
try:
if domain:
emails = re.findall('***91;\w\.\-***93;+@'+domain, site)
t1 = re.findall('***91;\w\.\-***93;+\***91;at\***93;'+domain, site)
t2 = re.findall('***91;\w\.\-***93;+\***91;at\***93;'+re.sub("\.","\***91;dot\***93;",domain), site)
if len(t1) >= 1:
for addr in t1:
emails.append(addr.replace("***91;at***93;","."))
if len(t2) >= 1:
for addr in t2:
emails.append(addr.replace("***91;at***93;",".").replace("***91;dot***93;","."))
except(NameError):
emails = re.findall('***91;\.\w***93;+@***91;a-zA-Z_***93;+?\.***91;a-zA-Z***93;{2,3}', site)
t1 = re.findall('***91;\w\.\-***93;+\***91;at\***93;***91;\w\.\-***93;+\.\w\w\w', site)
t2 = re.findall('***91;\w\.\-***93;+\***91;at\***93;***91;\w\.\-***93;+\***91;dot\***93;\w\w\w', site)
if len(t1) >= 1:
for addr in t1:
emails.append(addr.replace("***91;at***93;","@"))
if len(t2) >= 1:
for addr in t2:
emails.append(addr.replace("***91;at***93;",".").replace("***91;dot***93;","."))
pass
if len(emails) >=1:
data = open(sys.argv***91;2***93;, "a")
for email in emails:
if email not in addresses:
addresses.append(email)
data.writelines(email+"\n")
print "\nFound:",email,"\nTotal:",len(addresses),"\n"
data.close()
except(IOError,TypeError,AttributeError,socket.timeout,socket.gaierror,socket.error):
pass
except(KeyboardInterrupt):
pass
return addresses
if len(sys.argv) < 4 or len(sys.argv) > 7:
title()
print "\nUsage: ./emailcollect.py <starting point> <file to save addreses> <how many> <options>"
print "Ex: ./emailcollect.py www.busywebsite.com emails.txt 10000 -domain gmail.com -verbose\n"
print "\t***91;options***93;"
print "\t -d/-domain <domain> : Only searches for that domain (ex: gmail.com, yahoo.com)"
print "\t -v/-verbose : Verbose Mode\n"
sys.exit(1)
url = sys.argv***91;1***93; length = int(sys.argv***91;3***93;)
for arg in sys.argv***91;1:***93;:
if arg.lower() == "-d" or arg.lower() == "-domain":
domain = sys.argv***91;int(sys.argv***91;1:***93;.index(arg))+2***93;
if arg.lower() == "-v" or arg.lower() == "-verbose":
verbose = 1
if url***91;:7***93; != "http://":
url = "http://"+url
addresses = ***91;***93; urls = ***91;***93; socket.setdefaulttimeout(3)
title()
print "***91;+***93; Starting:",url print "***91;+***93; File:",sys.argv***91;2***93;
try:
if domain:
domain = re.sub("@","",domain)
print "***91;+***93; Domain:",domain
except(NameError):
print "***91;+***93; Searching: all domains"
pass print "***91;+***93; Collecting:",length try:
if verbose ==1:
print "***91;+***93; Verbose Mode On" except(NameError):
verbose = 0
print "***91;-***93; Verbose Mode Off" print "***91;+***93; Started:",timer(),"\n"
try:
if domain:
print "***91;+***93; Getting addresses from google...\n"
goog_emails = getgoog(domain)
print "***91;+***93; Found:",len(goog_emails),"from google."
if len(goog_emails) >= 1:
file = open(sys.argv***91;2***93;, "a")
for e in goog_emails:
file.writelines(e+"\n")
addresses.append(e)
file.close() except(NameError):
pass
if int(len(addresses)) < length:
urls = geturls(url)
while int(len(addresses)) < length:
try:
addresses = getaddress(random.choice(urls))
except(IndexError):
print "***91;-***93; Ran out of links, try another start site\n"
sys.exit(1)
if len(urls) < 1000:
urls = geturls(random.choice(urls))
else:
for url in urls***91;1:300***93;:
urls.remove(url)
print "\n***91;+***93; Final Total:",len(addresses)
print "***91;+***93; Data:",sys.argv***91;2***93;
print "***91;+***93; Done",timer(),"\n"
0 Comments:
Post a Comment