1
0

PPub.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. #!/usr/bin/env python
  2. import sys
  3. import httplib
  4. from SocketServer import ThreadingMixIn
  5. from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
  6. from threading import Lock, Timer
  7. from cStringIO import StringIO
  8. from urlparse import urlsplit
  9. import socket
  10. import select
  11. import gzip
  12. import zlib
  13. import re
  14. import traceback
  15. if sys.argv[2:]:
  16. msg1 = sys.argv[2]
  17. else:
  18. msg1 = "ADM-ULTIMATE"
  19. class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
  20. address_family = socket.AF_INET
  21. def handle_error(self, request, client_address):
  22. print >>sys.stderr, '-'*40
  23. print >>sys.stderr, 'Exception happened during processing of request from', client_address
  24. traceback.print_exc()
  25. print >>sys.stderr, '-'*40
  26. class ThreadingHTTPServer6(ThreadingHTTPServer):
  27. address_family = socket.AF_INET6
  28. class SimpleHTTPProxyHandler(BaseHTTPRequestHandler):
  29. global_lock = Lock()
  30. conn_table = {}
  31. timeout = 300
  32. upstream_timeout = 300
  33. proxy_via = None
  34. def log_error(self, format, *args):
  35. if format == "Request timed out: %r":
  36. return
  37. self.log_message(format, *args)
  38. def do_CONNECT(self):
  39. req = self
  40. reqbody = None
  41. req.path = "https://%s/" % req.path.replace(':443', '')
  42. replaced_reqbody = self.request_handler(req, reqbody)
  43. if replaced_reqbody is True:
  44. return
  45. u = urlsplit(req.path)
  46. address = (u.hostname, u.port or 443)
  47. try:
  48. conn = socket.create_connection(address)
  49. except socket.error:
  50. return
  51. self.send_response(200, msg1)
  52. self.send_header('Connection', 'close')
  53. self.end_headers()
  54. conns = [self.connection, conn]
  55. keep_connection = True
  56. while keep_connection:
  57. keep_connection = False
  58. rlist, wlist, xlist = select.select(conns, [], conns, self.timeout)
  59. if xlist:
  60. break
  61. for r in rlist:
  62. other = conns[1] if r is conns[0] else conns[0]
  63. data = r.recv(8192)
  64. if data:
  65. other.sendall(data)
  66. keep_connection = True
  67. conn.close()
  68. def do_HEAD(self):
  69. self.do_SPAM()
  70. def do_GET(self):
  71. self.do_SPAM()
  72. def do_POST(self):
  73. self.do_SPAM()
  74. def do_SPAM(self):
  75. req = self
  76. content_length = int(req.headers.get('Content-Length', 0))
  77. if content_length > 0:
  78. reqbody = self.rfile.read(content_length)
  79. else:
  80. reqbody = None
  81. replaced_reqbody = self.request_handler(req, reqbody)
  82. if replaced_reqbody is True:
  83. return
  84. elif replaced_reqbody is not None:
  85. reqbody = replaced_reqbody
  86. if 'Content-Length' in req.headers:
  87. req.headers['Content-Length'] = str(len(reqbody))
  88. self.remove_hop_by_hop_headers(req.headers)
  89. if self.upstream_timeout:
  90. req.headers['Connection'] = 'Keep-Alive'
  91. else:
  92. req.headers['Connection'] = 'close'
  93. if self.proxy_via:
  94. self.modify_via_header(req.headers)
  95. try:
  96. res, resdata = self.request_to_upstream_server(req, reqbody)
  97. except socket.error:
  98. return
  99. content_encoding = res.headers.get('Content-Encoding', 'identity')
  100. resbody = self.decode_content_body(resdata, content_encoding)
  101. replaced_resbody = self.response_handler(req, reqbody, res, resbody)
  102. if replaced_resbody is True:
  103. return
  104. elif replaced_resbody is not None:
  105. resdata = self.encode_content_body(replaced_resbody, content_encoding)
  106. if 'Content-Length' in res.headers:
  107. res.headers['Content-Length'] = str(len(resdata))
  108. resbody = replaced_resbody
  109. self.remove_hop_by_hop_headers(res.headers)
  110. if self.timeout:
  111. res.headers['Connection'] = 'Keep-Alive'
  112. else:
  113. res.headers['Connection'] = 'close'
  114. if self.proxy_via:
  115. self.modify_via_header(res.headers)
  116. self.send_response(res.status, res.reason)
  117. for k, v in res.headers.items():
  118. if k == 'set-cookie':
  119. for value in self.split_set_cookie_header(v):
  120. self.send_header(k, value)
  121. else:
  122. self.send_header(k, v)
  123. self.end_headers()
  124. if self.command != 'HEAD':
  125. self.wfile.write(resdata)
  126. with self.global_lock:
  127. self.save_handler(req, reqbody, res, resbody)
  128. def request_to_upstream_server(self, req, reqbody):
  129. u = urlsplit(req.path)
  130. origin = (u.scheme, u.netloc)
  131. req.headers['Host'] = u.netloc
  132. selector = "%s?%s" % (u.path, u.query) if u.query else u.path
  133. while True:
  134. with self.lock_origin(origin):
  135. conn = self.open_origin(origin)
  136. try:
  137. conn.request(req.command, selector, reqbody, headers=dict(req.headers))
  138. except socket.error:
  139. self.close_origin(origin)
  140. raise
  141. try:
  142. res = conn.getresponse(buffering=True)
  143. except httplib.BadStatusLine as e:
  144. if e.line == "''":
  145. self.close_origin(origin)
  146. continue
  147. else:
  148. raise
  149. resdata = res.read()
  150. res.headers = res.msg
  151. if not self.upstream_timeout or 'close' in res.headers.get('Connection', ''):
  152. self.close_origin(origin)
  153. else:
  154. self.reset_timer(origin)
  155. return res, resdata
  156. def lock_origin(self, origin):
  157. d = self.conn_table.setdefault(origin, {})
  158. if not 'lock' in d:
  159. d['lock'] = Lock()
  160. return d['lock']
  161. def open_origin(self, origin):
  162. conn = self.conn_table[origin].get('connection')
  163. if not conn:
  164. scheme, netloc = origin
  165. if scheme == 'https':
  166. conn = httplib.HTTPSConnection(netloc)
  167. else:
  168. conn = httplib.HTTPConnection(netloc)
  169. self.reset_timer(origin)
  170. self.conn_table[origin]['connection'] = conn
  171. return conn
  172. def reset_timer(self, origin):
  173. timer = self.conn_table[origin].get('timer')
  174. if timer:
  175. timer.cancel()
  176. if self.upstream_timeout:
  177. timer = Timer(self.upstream_timeout, self.close_origin, args=[origin])
  178. timer.daemon = True
  179. timer.start()
  180. else:
  181. timer = None
  182. self.conn_table[origin]['timer'] = timer
  183. def close_origin(self, origin):
  184. timer = self.conn_table[origin]['timer']
  185. if timer:
  186. timer.cancel()
  187. conn = self.conn_table[origin]['connection']
  188. conn.close()
  189. del self.conn_table[origin]['connection']
  190. def remove_hop_by_hop_headers(self, headers):
  191. hop_by_hop_headers = ['Connection', 'Keep-Alive', 'Proxy-Authenticate', 'Proxy-Authorization', 'TE', 'Trailers', 'Trailer', 'Transfer-Encoding', 'Upgrade']
  192. connection = headers.get('Connection')
  193. if connection:
  194. keys = re.split(r',\s*', connection)
  195. hop_by_hop_headers.extend(keys)
  196. for k in hop_by_hop_headers:
  197. if k in headers:
  198. del headers[k]
  199. def modify_via_header(self, headers):
  200. via_string = "%s %s" % (self.protocol_version, self.proxy_via)
  201. via_string = re.sub(r'^HTTP/', '', via_string)
  202. original = headers.get('Via')
  203. if original:
  204. headers['Via'] = original + ', ' + via_string
  205. else:
  206. headers['Via'] = via_string
  207. def decode_content_body(self, data, content_encoding):
  208. if content_encoding in ('gzip', 'x-gzip'):
  209. io = StringIO(data)
  210. with gzip.GzipFile(fileobj=io) as f:
  211. body = f.read()
  212. elif content_encoding == 'deflate':
  213. body = zlib.decompress(data)
  214. elif content_encoding == 'identity':
  215. body = data
  216. else:
  217. raise Exception("Unknown Content-Encoding: %s" % content_encoding)
  218. return body
  219. def encode_content_body(self, body, content_encoding):
  220. if content_encoding in ('gzip', 'x-gzip'):
  221. io = StringIO()
  222. with gzip.GzipFile(fileobj=io, mode='wb') as f:
  223. f.write(body)
  224. data = io.getvalue()
  225. elif content_encoding == 'deflate':
  226. data = zlib.compress(body)
  227. elif content_encoding == 'identity':
  228. data = body
  229. else:
  230. raise Exception("Unknown Content-Encoding: %s" % content_encoding)
  231. return data
  232. def split_set_cookie_header(self, value):
  233. re_cookies = r'([^=]+=[^,;]+(?:;\s*Expires=[^,]+,[^,;]+|;[^,;]+)*)(?:,\s*)?'
  234. return re.findall(re_cookies, value, flags=re.IGNORECASE)
  235. def request_handler(self, req, reqbody):
  236. pass
  237. def response_handler(self, req, reqbody, res, resbody):
  238. pass
  239. def save_handler(self, req, reqbody, res, resbody):
  240. pass
  241. def test(HandlerClass=SimpleHTTPProxyHandler, ServerClass=ThreadingHTTPServer, protocol="HTTP/1.1"):
  242. if sys.argv[1:]:
  243. port = int(sys.argv[1])
  244. else:
  245. port = 8799
  246. server_address = ('', port)
  247. HandlerClass.protocol_version = protocol
  248. httpd = ServerClass(server_address, HandlerClass)
  249. sa = httpd.socket.getsockname()
  250. print "Serving HTTP on", sa[0], "port", sa[1], "..."
  251. httpd.serve_forever()
  252. if __name__ == '__main__':
  253. test()