diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..3574892 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,37 @@ +label = "${UUID.randomUUID().toString()}" +git_project = "spark-ui-proxy" +git_project_user = "v3io" +git_project_upstream_user = "v3io" +git_deploy_user = "iguazio-prod-git-user" +git_deploy_user_token = "iguazio-prod-git-user-token" +git_deploy_user_private_key = "iguazio-prod-git-user-private-key" + +podTemplate(label: "${git_project}-${label}", inheritFrom: "jnlp-docker-golang") { + node("${git_project}-${label}") { + pipelinex = library(identifier: 'pipelinex@development', retriever: modernSCM( + [$class : 'GitSCMSource', + credentialsId: git_deploy_user_private_key, + remote : "git@github.com:iguazio/pipelinex.git"])).com.iguazio.pipelinex + common.notify_slack { + withCredentials([ + string(credentialsId: git_deploy_user_token, variable: 'GIT_TOKEN') + ]) { + github.release(git_deploy_user, git_project, git_project_user, git_project_upstream_user, true, GIT_TOKEN) { + stage("build ${git_project} in dood") { + container('docker-cmd') { + dir("${github.BUILD_FOLDER}/src/github.com/${git_project_upstream_user}/${git_project}") { + common.shellc("SPARK_UI_PROXY_TAG=${github.DOCKER_TAG_VERSION} SPARK_UI_PROXY_REPOSITORY='' make build") + } + } + } + + stage('push') { + container('docker-cmd') { + dockerx.images_push_multi_registries(["${git_project}:${github.DOCKER_TAG_VERSION}"], [pipelinex.DockerRepo.ARTIFACTORY_IGUAZIO, pipelinex.DockerRepo.DOCKER_HUB, pipelinex.DockerRepo.QUAY_IO, pipelinex.DockerRepo.GCR_IO]) + } + } + } + } + } + } +} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..52d4920 --- /dev/null +++ b/Makefile @@ -0,0 +1,6 @@ +SPARK_UI_PROXY_TAG ?= latest +SPARK_UI_PROXY_REPOSITORY ?= v3io/ + +.PHONY: build +build: + docker build --tag=$(SPARK_UI_PROXY_REPOSITORY)spark-ui-proxy:$(SPARK_UI_PROXY_TAG) . diff --git a/spark-ui-proxy.py b/spark-ui-proxy.py index 62dacc1..09033d0 100644 --- a/spark-ui-proxy.py +++ b/spark-ui-proxy.py @@ -27,12 +27,44 @@ import sys import urllib2 from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer +from HTMLParser import HTMLParser +import re BIND_ADDR = os.environ.get("BIND_ADDR", "0.0.0.0") SERVER_PORT = int(os.environ.get("SERVER_PORT", "80")) URL_PREFIX = os.environ.get("URL_PREFIX", "").rstrip('/') + '/' SPARK_MASTER_HOST = "" - +SPARK_TITLE_PATTERN = "Spark (?P\w+) at (?P\S+)" +URL_PATTERN = "(?P\w+://)?(?P[-\w.]+):(?P\d+)" +WORKER_HREF_PATTERN = r'(href="/proxy:)(spark-[-\w]+-worker-[-\w]+)(:)' + +class SparkHTMLParser(HTMLParser): + def __init__(self): + self.title = False + self.spark_title = None + self.spark_type = None + self.spark_url = None + HTMLParser.__init__(self) + + def handle_starttag(self, tag, attrs): + if tag == 'title': + self.title = True + + def handle_endtag(self, tag): + if tag == 'title': + self.title = False + + def handle_data(self, data): + if self.title: + if self.spark_type is not None or self.spark_url is not None: + raise Exception("Spark data is already filled (orig title: {0}, current title: {1})".format(self.spark_title, data)) + self.spark_title = data + try: + spark_dict = re.search(SPARK_TITLE_PATTERN, data).groupdict() + self.spark_type = spark_dict["spark_type"].lower() + self.spark_url = spark_dict["spark_url"] + except: + pass class ProxyHandler(BaseHTTPRequestHandler): def do_GET(self): @@ -57,6 +89,11 @@ def do_POST(self): postData = self.rfile.read(length) self.proxyRequest(postData) + def log_request(self, code='-', size='-'): + if "GET /healthz" in self.requestline and 200 == code: + return + super(ProxyHandler, self).log_request(code, size) + def proxyRequest(self, data): targetHost, path = self.extractUrlDetails(self.path) targetUrl = "http://" + targetHost + path @@ -74,6 +111,8 @@ def proxyRequest(self, data): if resCode == 200: page = proxiedRequest.read() page = self.rewriteLinks(page, targetHost) + page = self.rewriteWorkerLinks(page) + page = self.removeDeadLinks(page) resContentType = proxiedRequest.info()["Content-Type"] self.send_response(200) self.send_header("Content-Type", resContentType) @@ -110,6 +149,17 @@ def rewriteLinks(self, page, targetHost): page = page.replace('{{uiroot}}/history', '{{uiroot}}' + target + 'history') return page + def rewriteWorkerLinks(self, page): + parser = SparkHTMLParser() + parser.feed(page) + if parser.spark_type == 'worker': + worker_dict = re.search(URL_PATTERN, parser.spark_url).groupdict() + page = re.sub(WORKER_HREF_PATTERN, r"\g<1>" + worker_dict['host'] + r"\g<3>", page) + return page + + def removeDeadLinks(self, page): + page = re.sub('

Back to Master

', '', page) + return page if __name__ == '__main__': if len(sys.argv) < 2: