1
0

POpen.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. #!/usr/bin/env python
  2. import sys, httplib, socket, select, gzip, zlib, re, traceback, argparse
  3. from SocketServer import ThreadingMixIn
  4. from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
  5. from threading import Lock, Timer
  6. from cStringIO import StringIO
  7. from urlparse import urlsplit
  8. parser = argparse.ArgumentParser()
  9. parser.add_argument("-p", "--port", help="Nombre de archivo a procesar")
  10. args = parser.parse_args()
  11. if args.port:
  12. port = int(args.port)
  13. else:
  14. print(" Deve ingresar el puerto que usara como socks...")
  15. sys.exit()
  16. class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
  17. address_family = socket.AF_INET
  18. def handle_error(self, request, client_address):
  19. print >>sys.stderr, '-'*40
  20. print >>sys.stderr, 'Exception happened during processing of request from', client_address
  21. traceback.print_exc()
  22. print >>sys.stderr, '-'*40
  23. class ThreadingHTTPServer6(ThreadingHTTPServer):
  24. address_family = socket.AF_INET6
  25. class SimpleHTTPProxyHandler(BaseHTTPRequestHandler):
  26. global_lock = Lock()
  27. conn_table = {}
  28. timeout = 300
  29. upstream_timeout = 300
  30. proxy_via = None
  31. def log_error(self, format, *args):
  32. if format == "Request timed out: %r":
  33. return
  34. self.log_message(format, *args)
  35. def do_CONNECT(self):
  36. req = self
  37. reqbody = None
  38. req.path = "https://%s/" % req.path.replace(':443', '')
  39. replaced_reqbody = self.request_handler(req, reqbody)
  40. if replaced_reqbody is True:
  41. return
  42. u = urlsplit(req.path)
  43. address = (u.hostname, u.port or 443)
  44. try:
  45. conn = socket.create_connection(address)
  46. except socket.error:
  47. return
  48. self.send_response(200, 'SOCKS5')
  49. self.send_header('Connection', 'close')
  50. self.end_headers()
  51. conns = [self.connection, conn]
  52. keep_connection = True
  53. while keep_connection:
  54. keep_connection = False
  55. rlist, wlist, xlist = select.select(conns, [], conns, self.timeout)
  56. if xlist:
  57. break
  58. for r in rlist:
  59. other = conns[1] if r is conns[0] else conns[0]
  60. data = r.recv(8192)
  61. if data:
  62. other.sendall(data)
  63. keep_connection = True
  64. conn.close()
  65. def do_HEAD(self):
  66. self.do_SPAM()
  67. def do_GET(self):
  68. self.do_SPAM()
  69. def do_POST(self):
  70. self.do_SPAM()
  71. def do_SPAM(self):
  72. req = self
  73. content_length = int(req.headers.get('Content-Length', 0))
  74. if content_length > 0:
  75. reqbody = self.rfile.read(content_length)
  76. else:
  77. reqbody = None
  78. replaced_reqbody = self.request_handler(req, reqbody)
  79. if replaced_reqbody is True:
  80. return
  81. elif replaced_reqbody is not None:
  82. reqbody = replaced_reqbody
  83. if 'Content-Length' in req.headers:
  84. req.headers['Content-Length'] = str(len(reqbody))
  85. self.remove_hop_by_hop_headers(req.headers)
  86. if self.upstream_timeout:
  87. req.headers['Connection'] = 'Keep-Alive'
  88. else:
  89. req.headers['Connection'] = 'close'
  90. if self.proxy_via:
  91. self.modify_via_header(req.headers)
  92. try:
  93. res, resdata = self.request_to_upstream_server(req, reqbody)
  94. except socket.error:
  95. return
  96. content_encoding = res.headers.get('Content-Encoding', 'identity')
  97. resbody = self.decode_content_body(resdata, content_encoding)
  98. replaced_resbody = self.response_handler(req, reqbody, res, resbody)
  99. if replaced_resbody is True:
  100. return
  101. elif replaced_resbody is not None:
  102. resdata = self.encode_content_body(replaced_resbody, content_encoding)
  103. if 'Content-Length' in res.headers:
  104. res.headers['Content-Length'] = str(len(resdata))
  105. resbody = replaced_resbody
  106. self.remove_hop_by_hop_headers(res.headers)
  107. if self.timeout:
  108. res.headers['Connection'] = 'Keep-Alive'
  109. else:
  110. res.headers['Connection'] = 'close'
  111. if self.proxy_via:
  112. self.modify_via_header(res.headers)
  113. self.send_response(res.status, res.reason)
  114. for k, v in res.headers.items():
  115. if k == 'set-cookie':
  116. for value in self.split_set_cookie_header(v):
  117. self.send_header(k, value)
  118. else:
  119. self.send_header(k, v)
  120. self.end_headers()
  121. if self.command != 'HEAD':
  122. self.wfile.write(resdata)
  123. with self.global_lock:
  124. self.save_handler(req, reqbody, res, resbody)
  125. def request_to_upstream_server(self, req, reqbody):
  126. u = urlsplit(req.path)
  127. origin = (u.scheme, u.netloc)
  128. req.headers['Host'] = u.netloc
  129. selector = "%s?%s" % (u.path, u.query) if u.query else u.path
  130. while True:
  131. with self.lock_origin(origin):
  132. conn = self.open_origin(origin)
  133. try:
  134. conn.request(req.command, selector, reqbody, headers=dict(req.headers))
  135. except socket.error:
  136. self.close_origin(origin)
  137. raise
  138. try:
  139. res = conn.getresponse(buffering=True)
  140. except httplib.BadStatusLine as e:
  141. if e.line == "''":
  142. self.close_origin(origin)
  143. continue
  144. else:
  145. raise
  146. resdata = res.read()
  147. res.headers = res.msg
  148. if not self.upstream_timeout or 'close' in res.headers.get('Connection', ''):
  149. self.close_origin(origin)
  150. else:
  151. self.reset_timer(origin)
  152. return res, resdata
  153. def lock_origin(self, origin):
  154. d = self.conn_table.setdefault(origin, {})
  155. if not 'lock' in d:
  156. d['lock'] = Lock()
  157. return d['lock']
  158. def open_origin(self, origin):
  159. conn = self.conn_table[origin].get('connection')
  160. if not conn:
  161. scheme, netloc = origin
  162. if scheme == 'https':
  163. conn = httplib.HTTPSConnection(netloc)
  164. else:
  165. conn = httplib.HTTPConnection(netloc)
  166. self.reset_timer(origin)
  167. self.conn_table[origin]['connection'] = conn
  168. return conn
  169. def reset_timer(self, origin):
  170. timer = self.conn_table[origin].get('timer')
  171. if timer:
  172. timer.cancel()
  173. if self.upstream_timeout:
  174. timer = Timer(self.upstream_timeout, self.close_origin, args=[origin])
  175. timer.daemon = True
  176. timer.start()
  177. else:
  178. timer = None
  179. self.conn_table[origin]['timer'] = timer
  180. def close_origin(self, origin):
  181. timer = self.conn_table[origin]['timer']
  182. if timer:
  183. timer.cancel()
  184. conn = self.conn_table[origin]['connection']
  185. conn.close()
  186. del self.conn_table[origin]['connection']
  187. def remove_hop_by_hop_headers(self, headers):
  188. hop_by_hop_headers = ['Connection', 'Keep-Alive', 'Proxy-Authenticate', 'Proxy-Authorization', 'TE', 'Trailers', 'Trailer', 'Transfer-Encoding', 'Upgrade']
  189. connection = headers.get('Connection')
  190. if connection:
  191. keys = re.split(r',\s*', connection)
  192. hop_by_hop_headers.extend(keys)
  193. for k in hop_by_hop_headers:
  194. if k in headers:
  195. del headers[k]
  196. def modify_via_header(self, headers):
  197. via_string = "%s %s" % (self.protocol_version, self.proxy_via)
  198. via_string = re.sub(r'^HTTP/', '', via_string)
  199. original = headers.get('Via')
  200. if original:
  201. headers['Via'] = original + ', ' + via_string
  202. else:
  203. headers['Via'] = via_string
  204. def decode_content_body(self, data, content_encoding):
  205. if content_encoding in ('gzip', 'x-gzip'):
  206. io = StringIO(data)
  207. with gzip.GzipFile(fileobj=io) as f:
  208. body = f.read()
  209. elif content_encoding == 'deflate':
  210. body = zlib.decompress(data)
  211. elif content_encoding == 'identity':
  212. body = data
  213. else:
  214. raise Exception("Unknown Content-Encoding: %s" % content_encoding)
  215. return body
  216. def encode_content_body(self, body, content_encoding):
  217. if content_encoding in ('gzip', 'x-gzip'):
  218. io = StringIO()
  219. with gzip.GzipFile(fileobj=io, mode='wb') as f:
  220. f.write(body)
  221. data = io.getvalue()
  222. elif content_encoding == 'deflate':
  223. data = zlib.compress(body)
  224. elif content_encoding == 'identity':
  225. data = body
  226. else:
  227. raise Exception("Unknown Content-Encoding: %s" % content_encoding)
  228. return data
  229. def split_set_cookie_header(self, value):
  230. re_cookies = r'([^=]+=[^,;]+(?:;\s*Expires=[^,]+,[^,;]+|;[^,;]+)*)(?:,\s*)?'
  231. return re.findall(re_cookies, value, flags=re.IGNORECASE)
  232. def request_handler(self, req, reqbody):
  233. pass
  234. def response_handler(self, req, reqbody, res, resbody):
  235. pass
  236. def save_handler(self, req, reqbody, res, resbody):
  237. pass
  238. # Port
  239. def test(HandlerClass=SimpleHTTPProxyHandler, ServerClass=ThreadingHTTPServer, protocol="HTTP/1.1"):
  240. server_address = ('', port)
  241. HandlerClass.protocol_version = protocol
  242. httpd = ServerClass(server_address, HandlerClass)
  243. sa = httpd.socket.getsockname()
  244. print "Serving HTTP on", sa[0], "port", sa[1], "..."
  245. httpd.serve_forever()
  246. if __name__ == '__main__':
  247. test()