|
- #!/bin/env python
- import sys
- import getopt
- import threading
- import string
- import socket
- import base64
- import time
- import urlparse
- import struct
- class Proxy:
- """
- Maintain the proxy and the status of it.
- """
- def __init__(self, urlStr):
- # parse scheme
- [self.scheme, t] = string.split(urlStr, "://")
- # parse auth
- if string.find(t, "@") == -1:
- self.needAuth = False
- else:
- [auth, t] = string.split(t, "@")
- self.needAuth = True
- if string.find(auth, ":") == -1:
- self.needPassword = False
- self.username = auth
- else:
- self.needPassword = True
- [self.username, self.password] = string.split(auth, ":")
- # parse host and port
- if string.find(t, ":") == -1:
- self.host = t
- self.port = 8080
- else:
- [self.host, t] = string.split(t, ":")
- self.port = int(t)
- # set this non validated
- self.validated = False;
- def _parseHttpHeader(self, sock):
- """
- Parse the http response header
- return true if success
- """
- retval = False
- data = ""
- while True:
- data += sock.recv(1)
- if string.find(data, "\r\n") != -1:
- if len(data) == 2:
- break
- elif string.find(data, "\n") != -1:
- if len(data) == 1:
- break
- else:
- continue
- if string.find(data, "HTTP/1.") == 0:
- s = string.split(data, " ")
- if len(s) >= 3 and s[1] == "200":
- retval = True
- data = ""
- return retval
- def _getHttpBody(self, sock):
- # get the body
- while True:
- data = sock.recv(256)
- if data == "":
- break
- def validateHttp(self, testUrl, sock):
- """
- Validate the HTTP proxy with the testUrl
- """
- data = "GET " + testUrl + " HTTP/1.1\r\n"
- data += "Connection: close\r\n"
- if self.needAuth == True and self.needPassword == True:
- auth = base64.b64encode(self.username + ":" + self.password)
- data += "Proxy-Authorization: Basic " + auth + "\r\n";
- data += "\r\n"
- sock.sendall(data)
- retval = self._parseHttpHeader(sock)
- if retval:
- self._getHttpBody(sock)
- return retval
- def _getAddrFromUrl(self, testUrl):
- t = urlparse.urlparse(testUrl)
- netloc = t[1]
- retval = []
- if string.find(netloc, "@") != -1:
- t = string.split(netloc, "@")
- netloc = t[1]
- if string.find(netloc, ":") != -1:
- t = string.split(netloc, ":")
- host = t[0]
- port = int(t[1])
- else:
- host = netloc
- port = 80
- retval.append(host)
- retval.append(port)
- return retval
- def _getHttpPage(self, testUrl, sock):
- [hostname, port] = self._getAddrFromUrl(testUrl)
- t = urlparse.urlparse(testUrl)
- if len(t) < 3 or len(t[2]) == 0:
- path = "/"
- else:
- path = t[2]
- data = "GET " + path + " HTTP/1.1\r\n"
- data += "Host: " + hostname + "\r\n"
- data += "Connection: close\r\n"
- data += "\r\n"
- sock.sendall(data)
- if self._parseHttpHeader(sock) != True:
- return False
- self._getHttpBody(sock)
- return True
- def validateHttpc(self, testUrl, sock):
- """
- Validate the HTTP-CONNECT proxy with the testURL
- """
- [hostname, port] = self._getAddrFromUrl(testUrl)
- data = "CONNECT " + hostname + ":" + str(port) + " HTTP/1.1\r\n"
- if self.needAuth == True and self.needPassword == True:
- auth = base64.b64encode(self.username + ":" + self.password)
- data += "Proxy-Authorization: Basic " + auth + "\r\n";
- data += "\r\n"
- sock.sendall(data)
- if self._parseHttpHeader(sock) != True:
- return False
- return self._getHttpPage(testUrl, sock)
- def _recvn(self, sock, length):
- res = ""
- while len(res) < length:
- ret = sock.recv(length - len(res))
- if (len(ret) == 0):
- return ""
- res += ret
- return res
- def validateSocks4(self, testUrl, sock):
- """
- Validate the Socks4 proxy with testUrl
- """
- [hostname, port] = self._getAddrFromUrl(testUrl)
- ip = socket.gethostbyname(hostname)
- # send connect request
- req = struct.pack("!BBH", 4, 1, port)
- req = "%s%s" % (req, socket.inet_aton(ip))
- if self.needAuth:
- req = "%s%s\x00" % (req, self.username)
- else:
- req = "%s\x00" % (req)
- sock.sendall(req)
- # get and parse the response
- res = self._recvn(sock, 8)
- if len(res) == 0:
- return False
- (vn, cd) = struct.unpack("!BB", res[:2])
- if vn != 0 or cd != 90:
- return False
- return self._getHttpPage(testUrl, sock)
- def validateSocks5(self, testUrl, sock):
- """
- Validate the Socks4 proxy with testUrl
- """
- # hello and authorization
- if self.needAuth:
- req = struct.pack("!BBBB", 5, 2, 0, 2)
- else:
- req = struct.pack("!BBB", 5, 1, 0)
- sock.sendall(req)
- res = self._recvn(sock, 2)
- if len(res) == 0:
- return False
- (ver, method) = struct.unpack("!BB", res)
- if ver != 5:
- return False
- if method == 2:
- if self.needAuth != True or self.needPassword != True:
- return False
- req = struct.pack("!BB", 1, len(self.username))
- req = "%s%s%s%s" % (req, self.username, \
- struct.pack("!B", len(self.password)), \
- self.password)
- sock.sendall(req)
- res = self._recvn(sock, 2)
- if len(res) == 0:
- return False
- (ver, status) = struct.unpack("!BB", res)
- if ver != 0 or status != 0:
- return False
- elif method != 0:
- return False
- # connect
- [hostname, port] = self._getAddrFromUrl(testUrl)
- ip = socket.gethostbyname(hostname)
- req = struct.pack("!BBBB", 5, 1, 0, 1)
- req = "%s%s%s" % (req, socket.inet_aton(ip), struct.pack("!H", port))
- sock.sendall(req)
- res = self._recvn(sock, 5)
- if len(res) == 0:
- return False
- (ver, rep, rsv, atyp, addrLen) = struct.unpack("!BBBBB", res)
- if (ver != 5 or rep != 0 or rsv != 0):
- return False
- if atyp == 1:
- res = self._recvn(sock, 5)
- elif atyp == 3:
- res = self._recvn(sock, addrLen + 2)
- elif atyp == 4:
- res = self._recvn(sock, 17)
- else:
- return False
- if (len(res) == 0):
- return False
- return self._getHttpPage(testUrl, sock)
- def validate(self, testUrl):
- """
- Validate the proxy server, return true if successfully.
- """
- self.startTime = time.time()
- # connect to the proxy server
- sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
- sock.connect((self.host, self.port))
- self.connectTime = time.time()
- if self.scheme == "http":
- retval = self.validateHttp(testUrl, sock)
- elif self.scheme == "httpc":
- retval = self.validateHttpc(testUrl, sock)
- elif self.scheme == "socks4":
- retval = self.validateSocks4(testUrl, sock)
- elif self.scheme == "socks5":
- retval = self.validateSocks5(testUrl, sock)
- else:
- retval = False
- sock.close()
- self.endTime = time.time()
- self.validated = True
- return retval
- class ValidateThread(threading.Thread):
- def __init__(self, proxy, url, sema, lock):
- threading.Thread.__init__(self)
- self._proxy = proxy
- self._sema = sema
- self._sema.acquire()
- self._lock = lock
- self._url = url
- def __del__(self):
- self._sema.release()
- def run(self):
- if self._proxy.validate(url) == True:
- self._lock.acquire()
- proxy = self._proxy
- urlStr = proxy.scheme + "://"
- if (proxy.needAuth):
- urlStr += proxy.username
- if (proxy.needPassword):
- urlStr += ":" + proxy.password
- urlStr += "@"
- urlStr += proxy.host + ":" + str(proxy.port)
- print urlStr, proxy.connectTime - proxy.startTime, \
- proxy.endTime - proxy.connectTime, \
- proxy.endTime - proxy.startTime
- self._lock.release()
- def CreateValidateThread(proxy, url, sema, lock):
- thread = ValidateThread(proxy, url, sema, lock)
- thread.setDaemon(True)
- thread.start()
- def showHelp(prog):
- print "valproxy 0.0.1: A proxies validating tool."
- print "Usage: %s [option]..." % prog
- print "Options:"
- print " -h, --help Show the help information"
- print " -i, --input-file=file Use the file as input instead of stdin"
- print " -l, --test-url=url Use the url as the test url "\
- + "instead of www.163.com"
- print " -n, --max-thread-num=n Set the max thread number at "\
- + "the same time to n, default 10"
- if __name__ == "__main__":
- maxThreadNum = 10
- lock = threading.Lock()
- url = "http://www.163.com/index.html"
- inFile = sys.stdin
- # parse the arguments
- try:
- (opts, args) = getopt.getopt(sys.argv[1:], "hi:l:n:", \
- ["help", "input-file=", "test-url=", "max-thread-num="])
- except getopt.GetoptError:
- showHelp(sys.argv[0])
- sys.exit(1)
- if len(args) > 0:
- showHelp(sys.argv[0])
- sys.exit(1)
- for (o, a) in opts:
- if o in ("-h", "--help"):
- showHelp(sys.argv[0])
- sys.exit(0)
- elif o in ("-i", "--input-file"):
- inFile = file(a, "r")
- elif o in ("-l", "--test-url"):
- url = a
- elif o in ("-n", "--max-thread-num"):
- maxThreadNum = int(a)
- if (maxThreadNum <= 0):
- print "the argument for options n and max-thread-num must"\
- + "be no less than 0"
- sys.exit(1)
- else:
- print "Unknown option: " + o
- showHelp(sys.argv[0])
- sys.exit(1)
- sema = threading.Semaphore(maxThreadNum)
- # validate the proxy one by one
- while True:
- line = string.strip(inFile.readline())
- if len(line) == 0:
- break
- proxy = Proxy(line)
- CreateValidateThread(proxy, url, sema, lock)
- # wait until all the threads exit
- n = maxThreadNum
- while n > 0:
- sema.acquire()
- n -= 1
- while n < maxThreadNum:
- sema.release()
- n += 1
复制代码 |
|