|
27 | 27 | import sys |
28 | 28 | import urllib2 |
29 | 29 | from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer |
| 30 | +from HTMLParser import HTMLParser |
| 31 | +import re |
30 | 32 |
|
31 | 33 | BIND_ADDR = os.environ.get("BIND_ADDR", "0.0.0.0") |
32 | 34 | SERVER_PORT = int(os.environ.get("SERVER_PORT", "80")) |
33 | 35 | URL_PREFIX = os.environ.get("URL_PREFIX", "").rstrip('/') + '/' |
34 | 36 | SPARK_MASTER_HOST = "" |
35 | | - |
| 37 | +SPARK_TITLE_PATTERN = "Spark (?P<spark_type>\w+) at (?P<spark_url>\S+)" |
| 38 | +URL_PATTERN = "(?P<protocol>\w+://)?(?P<host>[-\w.]+):(?P<port>\d+)" |
| 39 | +WORKER_HREF_PATTERN = r'(href="/proxy:)(spark-[-\w]+-worker-[-\w]+)(:)' |
| 40 | + |
| 41 | +class SparkHTMLParser(HTMLParser): |
| 42 | + def __init__(self): |
| 43 | + self.title = False |
| 44 | + self.spark_title = None |
| 45 | + self.spark_type = None |
| 46 | + self.spark_url = None |
| 47 | + HTMLParser.__init__(self) |
| 48 | + |
| 49 | + def handle_starttag(self, tag, attrs): |
| 50 | + if tag == 'title': |
| 51 | + self.title = True |
| 52 | + |
| 53 | + def handle_endtag(self, tag): |
| 54 | + if tag == 'title': |
| 55 | + self.title = False |
| 56 | + |
| 57 | + def handle_data(self, data): |
| 58 | + if self.title: |
| 59 | + if self.spark_type is not None or self.spark_url is not None: |
| 60 | + raise Exception("Spark data is already filled (orig title: {0}, current title: {1})".format(self.spark_title, data)) |
| 61 | + self.spark_title = data |
| 62 | + try: |
| 63 | + spark_dict = re.search(SPARK_TITLE_PATTERN, data).groupdict() |
| 64 | + self.spark_type = spark_dict["spark_type"].lower() |
| 65 | + self.spark_url = spark_dict["spark_url"] |
| 66 | + except: |
| 67 | + pass |
36 | 68 |
|
37 | 69 | class ProxyHandler(BaseHTTPRequestHandler): |
38 | 70 | def do_GET(self): |
@@ -74,6 +106,8 @@ def proxyRequest(self, data): |
74 | 106 | if resCode == 200: |
75 | 107 | page = proxiedRequest.read() |
76 | 108 | page = self.rewriteLinks(page, targetHost) |
| 109 | + page = self.rewriteWorkerLinks(page) |
| 110 | + page = self.removeDeadLinks(page) |
77 | 111 | resContentType = proxiedRequest.info()["Content-Type"] |
78 | 112 | self.send_response(200) |
79 | 113 | self.send_header("Content-Type", resContentType) |
@@ -110,6 +144,17 @@ def rewriteLinks(self, page, targetHost): |
110 | 144 | page = page.replace('{{uiroot}}/history', '{{uiroot}}' + target + 'history') |
111 | 145 | return page |
112 | 146 |
|
| 147 | + def rewriteWorkerLinks(self, page): |
| 148 | + parser = SparkHTMLParser() |
| 149 | + parser.feed(page) |
| 150 | + if parser.spark_type == 'worker': |
| 151 | + worker_dict = re.search(URL_PATTERN, parser.spark_url).groupdict() |
| 152 | + page = re.sub(WORKER_HREF_PATTERN, r"\g<1>" + worker_dict['host'] + r"\g<3>", page) |
| 153 | + return page |
| 154 | + |
| 155 | + def removeDeadLinks(self, page): |
| 156 | + page = re.sub('<p><a href="/proxy:[-\w]+:\d+">Back to Master</a></p>', '', page) |
| 157 | + return page |
113 | 158 |
|
114 | 159 | if __name__ == '__main__': |
115 | 160 | if len(sys.argv) < 2: |
|
0 commit comments