python ghdb_ripper.py

Posted 2021-05-10
tags:
篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了python ghdb_ripper.py相关的知识，希望对你有一定的参考价值。
#!/usr/bin/python
# Exploit-db.com Google Dork Hacking Database Replicator written by Andy Bricker 
# Proof of concept.  You shouldnt use this script without prior consent from Exploit-db.com
# http://andybricker.com
# Contact: andy at andybricker.com
 
# Requirements
# Python 2.7 (Has not been tasted on later versions)

# Usage: 
#   python ghdb_ripper.py books stores -n 50 -l myLogFile.txt

# Script will crawl exploits_db.com google dork pages and build a csv output file containing line by line
# dork,date dork was added,dork description
 
# Like the script?  Donate
#   LiteCoin: LcFU5upJyS7FsEeB5sb25vFTS69dH6fugr
#   DogeCoin: D7SPH1LYJn9Co4GCZePH3JvzR5RkZEPi5M  


import urllib2
import re
import time
import os

from optparse import OptionParser

options = OptionParser(usage='%prog number_of_dorks [options]', description='Exploit-db.Com GHDB Database Replicator')
options.add_option('-s', '--start_number', type='int', default=51, help='Dork number to start with (default: 5)')
options.add_option('-o', '--output_file', type='string', default="output.txt", help='Name of the output file.  Paths accepted. User must have access to output path. (default: output.txt)')

opts, args = options.parse_args()
if len(args) < 1:
    options.print_help()
    exit()

dorkData = []
output = ""
log_file = open(opts.output_file, "a")

if os.name == 'nt':
        os.system('color a')
        os.system('cls')
else:
        os.system('clear')
 
max_range = int(args[0]) - opts.start_number
failed_atempts = 0
for page in range(int(opts.start_number),int(max_range)):   # 3943 Max Results
    
    print "Grabbing " + 'http://www.exploit-db.com/ghdb/' + str(page) + '/'
    print "========================================================================"
    search_url = urllib2.Request('http://www.exploit-db.com/ghdb/' + str(page) + '/')
    search_url.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5')
        
    try:
        search_response = urllib2.urlopen(search_url, timeout=6)
        search_content = search_response.read()  
        dork = re.findall('<h1>(.*?)</h1>',search_content)
        date_added = re.findall('<p>Submited: (.*?)</p>',search_content)
        dork_desc = re.findall('<p class="text">(.*?)</p>',search_content)
        
        print "Checking response"            
        try:
            dork[0]
            
            try: 
                date_added[0]
            except:
                date_added.append("0000-00-00")
                
            try:
                dork_desc[0]
            except:
                dork_desc.append("na")
            
            log_file.write(dork[0] + "," + date_added[0] + "," + dork_desc[0] + "\n")
            failed_attempts = 0   
            
        except:
            print "Communication error.  Waiting 3 seconds."
            time.sleep(3)      
            pass
         
        search_response.close()  
    except:
        print "Connection interrupted.  Waiting 5 Seconds."
        failed_attempts = failed_attempts + 1
        time.sleep(5)        
        if failed_attempts == 3:
            print "Connection lost.  Exiting."
            log_file.close() 
            exit()        
    

log_file.close()
以上是关于python ghdb_ripper.py的主要内容，如果未能解决你的问题，请参考以下文章