Skip to content

Commit 07eba4d

Browse files
urihoenigbivas
authored andcommitted
IG-11123: Fix worker links & remove non functioning 'Back to Master' link (#1)
1 parent 20e7c2c commit 07eba4d

File tree

1 file changed

+46
-1
lines changed

1 file changed

+46
-1
lines changed

spark-ui-proxy.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,44 @@
2727
import sys
2828
import urllib2
2929
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
30+
from HTMLParser import HTMLParser
31+
import re
3032

3133
BIND_ADDR = os.environ.get("BIND_ADDR", "0.0.0.0")
3234
SERVER_PORT = int(os.environ.get("SERVER_PORT", "80"))
3335
URL_PREFIX = os.environ.get("URL_PREFIX", "").rstrip('/') + '/'
3436
SPARK_MASTER_HOST = ""
35-
37+
SPARK_TITLE_PATTERN = "Spark (?P<spark_type>\w+) at (?P<spark_url>\S+)"
38+
URL_PATTERN = "(?P<protocol>\w+://)?(?P<host>[-\w.]+):(?P<port>\d+)"
39+
WORKER_HREF_PATTERN = r'(href="/proxy:)(spark-[-\w]+-worker-[-\w]+)(:)'
40+
41+
class SparkHTMLParser(HTMLParser):
42+
def __init__(self):
43+
self.title = False
44+
self.spark_title = None
45+
self.spark_type = None
46+
self.spark_url = None
47+
HTMLParser.__init__(self)
48+
49+
def handle_starttag(self, tag, attrs):
50+
if tag == 'title':
51+
self.title = True
52+
53+
def handle_endtag(self, tag):
54+
if tag == 'title':
55+
self.title = False
56+
57+
def handle_data(self, data):
58+
if self.title:
59+
if self.spark_type is not None or self.spark_url is not None:
60+
raise Exception("Spark data is already filled (orig title: {0}, current title: {1})".format(self.spark_title, data))
61+
self.spark_title = data
62+
try:
63+
spark_dict = re.search(SPARK_TITLE_PATTERN, data).groupdict()
64+
self.spark_type = spark_dict["spark_type"].lower()
65+
self.spark_url = spark_dict["spark_url"]
66+
except:
67+
pass
3668

3769
class ProxyHandler(BaseHTTPRequestHandler):
3870
def do_GET(self):
@@ -74,6 +106,8 @@ def proxyRequest(self, data):
74106
if resCode == 200:
75107
page = proxiedRequest.read()
76108
page = self.rewriteLinks(page, targetHost)
109+
page = self.rewriteWorkerLinks(page)
110+
page = self.removeDeadLinks(page)
77111
resContentType = proxiedRequest.info()["Content-Type"]
78112
self.send_response(200)
79113
self.send_header("Content-Type", resContentType)
@@ -110,6 +144,17 @@ def rewriteLinks(self, page, targetHost):
110144
page = page.replace('{{uiroot}}/history', '{{uiroot}}' + target + 'history')
111145
return page
112146

147+
def rewriteWorkerLinks(self, page):
148+
parser = SparkHTMLParser()
149+
parser.feed(page)
150+
if parser.spark_type == 'worker':
151+
worker_dict = re.search(URL_PATTERN, parser.spark_url).groupdict()
152+
page = re.sub(WORKER_HREF_PATTERN, r"\g<1>" + worker_dict['host'] + r"\g<3>", page)
153+
return page
154+
155+
def removeDeadLinks(self, page):
156+
page = re.sub('<p><a href="/proxy:[-\w]+:\d+">Back to Master</a></p>', '', page)
157+
return page
113158

114159
if __name__ == '__main__':
115160
if len(sys.argv) < 2:

0 commit comments

Comments
 (0)