Bem estou tentando desenvolver um extrator de emails com python só que esta dando erro:
Codigo:
import os import mechanize from bs4 import BeautifulSoup import re from urllib import quote import argparse import cookielib from random import randint def banner(): os.system("clear || cls") rint(''' ALlan ''') def main(): banner() parser = argparse.ArgumentParser(description='Extract emails') parser.add_argument('-d', '--dork', metavar='"INSERT DORK"') parser.add_argument( '-m', '--mode', help='h/hour, d/day, w/week, m/month, y/year', default="0") args = parser.parse_args() if not args.dork: parser.print_help() exit() br = mechanize.Browser() cj = cookielib.LWPCookieJar() br.set_cookiejar(cj) # br.set_handle_equiv(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_rebots(False) # br.set_handle_refresh(mechanize._http.HTTPRefreshPRocessor(), max_time=1) # br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] next_page = 0 print("\n[+] Searching in Google...") while True: src = br.open("https://www.google.com.br/search?source="+quote(args.dork)+"&tbs=qdr:%s&start=%s" % (args.mode[0],next_page) ) bs = BeautifulSoup(src.get_data(), 'lxml') for i in bs.find_all('a'): if "/url?" in i.get('href') and not "webcache.googleusercontent.com" in i.get('href'): extract = re.findall(r"q=(.*\.txt)", i.get('href')) for link in extract: try: extract_emails(link,br) except: continue page = (raw_input("\n[*] Next page (Y/n)?: ") if page.lower() == "n": break else: next_page = next_page+10 def extract_emails(link, br): src = br.open(link) print ("[+] Extracting %s" % link) emails = re.findall(r"[a-zA-Z0-9_.\-]+@[a-zA-Z]+\.[a-z]+\.br", src.get_data()) with open("emails/"+char()+char()+char()+"_emails.txt",'w') as f: for email in emails: f.write(email+"\n") f.close() def char(): return chr(randint(50,100)) if __name__ == '__main__': main()
===================================== Erro: File “test.py”, line 62 if page.lower() == “n”: ^ SyntaxError: invalid syntax