由于 UnknownHostException,K8S 解释器上的 Apache Zeppelin 崩溃
Posted
技术标签:
【中文标题】由于 UnknownHostException,K8S 解释器上的 Apache Zeppelin 崩溃【英文标题】:Apache Zeppelin on K8S Interpreters crash due to UnknownHostException 【发布时间】:2020-12-18 11:40:02 【问题描述】:您好,需要帮助!
我已经在我的 k8s 集群上部署了 apache/zeppelin:0.9.0,在 zeppelin 命名空间下,在 documentation 之后,这是我的 zeppeling-server.yaml 文件:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: zeppelin-notebook
namespace: zeppelin
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
status:
---
apiVersion: v1
kind: ConfigMap
metadata:
name: zeppelin-server-conf-map
data:
# 'serviceDomain' is a Domain name to use for accessing Zeppelin UI.
# Should point IP address of 'zeppelin-server' service.
#
# Wildcard subdomain need to be point the same IP address to access service inside of Pod (such as SparkUI).
# i.e. if service domain is 'local.zeppelin-project.org', DNS configuration should make 'local.zeppelin-project.org' and '*.local.zeppelin-project.org' point the same address.
#
# Default value is 'local.zeppelin-project.org' while it points 127.0.0.1 and `kubectl port-forward zeppelin-server` will give localhost to connects.
# If you have your ingress controller configured to connect to `zeppelin-server` service and have a domain name for it (with wildcard subdomain point the same address), you can replace serviceDomain field with your own domain.
#SERVICE_DOMAIN: local.zeppelin-project.org:8080
SERVICE_DOMAIN: https://zeppelin.karstecsa.com.ar/
ZEPPELIN_K8S_SPARK_CONTAINER_IMAGE: spark:2.4.5
ZEPPELIN_K8S_CONTAINER_IMAGE: apache/zeppelin:0.9.0-SNAPSHOT
ZEPPELIN_HOME: /zeppelin
ZEPPELIN_SERVER_RPC_PORTRANGE: 12320:12322
# default value of 'master' property for spark interpreter.
SPARK_MASTER: k8s://https://kubernetes.zeppelin.svc
# default value of 'SPARK_HOME' property for spark interpreter.
SPARK_HOME: /spark
---
apiVersion: v1
kind: ConfigMap
metadata:
name: zeppelin-server-conf
data:
nginx.conf: |
daemon off;
worker_processes auto;
events
worker_connections 1024;
http
map $http_upgrade $connection_upgrade
default upgrade;
'' close;
# first server block will be default. Proxy zeppelin server.
server
listen 80;
location /
proxy_pass http://localhost:8080;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_redirect http://localhost $scheme://SERVICE_DOMAIN;
# match request domain [port]-[service].[serviceDomain]
# proxy extra service such as spark-ui
server
listen 80;
server_name "~(?<svc_port>[0-9]+)-(?<svc_name>[^.]*)\.(.*)";
location /
resolver 127.0.0.1:53 ipv6=off;
proxy_pass http://$svc_name.NAMESPACE.svc:$svc_port;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_redirect http://localhost $scheme://SERVICE_DOMAIN;
# redirect rule for spark ui. 302 redirect response misses port number of service domain
proxy_redirect ~(http:[/]+[0-9]+[-][^-]+[-][^.]+)[^/]+(\/jobs.*) $1.SERVICE_DOMAIN$2;
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: zeppelin-server
labels:
app.kubernetes.io/name: zeppelin-server
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: zeppelin-server
strategy:
type: RollingUpdate
template:
metadata:
labels:
app.kubernetes.io/name: zeppelin-server
spec:
serviceAccountName: zeppelin-server
volumes:
- name: nginx-conf
configMap:
name: zeppelin-server-conf
items:
- key: nginx.conf
path: nginx.conf
- name: zeppelin-server-notebook-volume
persistentVolumeClaim:
claimName: zeppelin-notebook
# - name: zeppelin-server-conf
# persistentVolumeClaim:
# claimName: zeppelin-conf
# - name: zeppelin-server-custom-k8s
# persistentVolumeClaim:
# claimName: zeppelin-k8s
nodeSelector:
kubernetes.io/hostname: worker01convergente
containers:
- name: zeppelin-server
image: apache/zeppelin:0.9.0-SNAPSHOT
command: ["sh", "-c", "$(ZEPPELIN_HOME)/bin/zeppelin.sh"]
lifecycle:
preStop:
exec:
# SIGTERM triggers a quick exit; gracefully terminate instead
command: ["sh", "-c", "ps -ef | grep org.apache.zeppelin.server.ZeppelinServer | grep -v grep | awk 'print $2' | xargs kill"]
ports:
- name: http
containerPort: 8080
- name: https
containerPort: 8443
- name: rpc
containerPort: 12320
env:
- name: POD_UID
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.uid
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
envFrom:
- configMapRef:
name: zeppelin-server-conf-map
volumeMounts:
- name: zeppelin-server-notebook-volume # configure this to persist notebook
mountPath: /zeppelin/notebook
# - name: zeppelin-server-conf # configure this to persist Zeppelin configuration
# mountPath: /zeppelin/conf
# - name: zeppelin-server-custom-k8s # configure this to mount customized Kubernetes spec for interpreter
# mountPath: /zeppelin/k8s
- name: zeppelin-server-gateway
image: nginx:1.14.0
command: ["/bin/sh", "-c"]
env:
- name: SERVICE_DOMAIN
valueFrom:
configMapKeyRef:
name: zeppelin-server-conf-map
key: SERVICE_DOMAIN
args:
- cp -f /tmp/conf/nginx.conf /etc/nginx/nginx.conf;
sed -i -e "s/SERVICE_DOMAIN/$SERVICE_DOMAIN/g" /etc/nginx/nginx.conf;
sed -i -e "s/NAMESPACE/$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)/g" /etc/nginx/nginx.conf;
cat /etc/nginx/nginx.conf;
/usr/sbin/nginx
volumeMounts:
- name: nginx-conf
mountPath: /tmp/conf
lifecycle:
preStop:
exec:
# SIGTERM triggers a quick exit; gracefully terminate instead
command: ["/usr/sbin/nginx", "-s", "quit"]
- name: dnsmasq # nginx requires dns resolver for dynamic dns resolution
image: "janeczku/go-dnsmasq:release-1.0.5"
args:
- --listen
- "127.0.0.1:53"
- --default-resolver
- --append-search-domains
- --hostsfile=/etc/hosts
- --verbose
---
kind: Service
apiVersion: v1
metadata:
name: zeppelin-server
spec:
ports:
- name: http
port: 80
- name: rpc # port name is referenced in the code. So it shouldn't be changed.
port: 12320
selector:
app.kubernetes.io/name: zeppelin-server
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: zeppelin-server
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: zeppelin-server-role
rules:
- apiGroups: [""]
resources: ["pods", "services"]
verbs: ["create", "get", "update", "patch", "list", "delete", "watch"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources: ["roles", "rolebindings"]
verbs: ["bind", "create", "get", "update", "patch", "list", "delete", "watch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: zeppelin-server-role-binding
subjects:
- kind: ServiceAccount
name: zeppelin-server
roleRef:
kind: Role
name: zeppelin-server-role
apiGroup: rbac.authorization.k8s.io
我尝试过的
起初,尝试使用 python 解释器运行“helloworld”是不可能的,因为 zeppelin:0.9.0 docker 镜像上没有安装 kubectl,所以我构建了 0.9.0-SNAPSHOT 镜像并添加了 kubectl 安装。 现在我的仪表板上出现了 python 解释器 pod 和服务,但容器崩溃并出现以下错误: INFO [2020-08-29 22:04:39,544] (main RemoteInterpreterServer.java[<init>]:161) - Starting remote interpreter server on port 0, intpEventServerAddress: zeppelin-server-6d7f46d8d8-m9jlv.zeppelin.svc:12320
Exception in thread "main" org.apache.zeppelin.shaded.org.apache.thrift.transport.TTransportException:
java.net.UnknownHostException: zeppelin-server-6d7f46d8d8-m9jlv.zeppelin.svc
at org.apache.zeppelin.shaded.org.apache.thrift.transport.TSocket.open(TSocket.java:226)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer.<init>(RemoteInterpreterServer.java:167)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer.<init>(RemoteInterpreterServer.java:152)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer.main(RemoteInterpreterServer.java:321)
Caused by: java.net.UnknownHostException: zeppelin-server-6d7f46d8d8-m9jlv.zeppelin.svc
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:184)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
at java.net.Socket.connect(Socket.java:607)
at org.apache.zeppelin.shaded.org.apache.thrift.transport.TSocket.open(TSocket.java:221)
... 3 more
好的!因此,我的 zeppelin-server pod 的名称是出现在 python 解释器日志中的名称,zeppelin-server-6d7f46d8d8-m9jlv,as shown here!。
我不知道还有什么办法让它工作! 任何帮助、提示或建议将不胜感激!另外,如果需要更多信息,请告诉我,我当然会提供。
【问题讨论】:
Pod 名称不会出现在svc.cluster.local
命名空间下,因为在合理的集群中永远不需要寻址单个 pod;如果您有需要解决自己的 Pod,您将需要一个 StatefulSet
,它分配在无头 Service
名称下解析的 pod 成员名称
感谢您的回复,我已经为另一个项目开发了一些 StatefulSets
,例如 Cassandra StatefulSet,但似乎开发了 Zeppelin这次不同的是,文档中的说明(链接在帖子上方)使用Deployment
而不是StatefulSet
。
【参考方案1】:
我可以通过将 Deployment 更改为 Pod 来运行这个堆栈。这是我正在使用的完整 yaml:
请注意,我在 Pod 规范中使用自定义
zeppelin:0.9.0
图像而不是apache/zeppelin:0.9.0
,因为它缺少kubectl
二进制文件
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
apiVersion: v1
kind: ConfigMap
metadata:
name: zeppelin-server-conf-map
data:
# 'serviceDomain' is a Domain name to use for accessing Zeppelin UI.
# Should point IP address of 'zeppelin-server' service.
#
# Wildcard subdomain need to be point the same IP address to access service inside of Pod (such as SparkUI).
# i.e. if service domain is 'local.zeppelin-project.org', DNS configuration should make 'local.zeppelin-project.org' and '*.local.zeppelin-project.org' point the same address.
#
# Default value is 'local.zeppelin-project.org' while it points 127.0.0.1 and `kubectl port-forward zeppelin-server` will give localhost to connects.
# If you have your ingress controller configured to connect to `zeppelin-server` service and have a domain name for it (with wildcard subdomain point the same address), you can replace serviceDomain field with your own domain.
SERVICE_DOMAIN: local.zeppelin-project.org:8080
ZEPPELIN_K8S_SPARK_CONTAINER_IMAGE: spark:2.4.7-bin-hadoop-3.2.0-cloud-scala-2.11
ZEPPELIN_K8S_CONTAINER_IMAGE: apache/zeppelin:0.9.0
ZEPPELIN_HOME: /zeppelin
ZEPPELIN_SERVER_RPC_PORTRANGE: 12320:12320
# default value of 'master' property for spark interpreter.
SPARK_MASTER: k8s://https://kubernetes.default.svc
# default value of 'SPARK_HOME' property for spark interpreter.
SPARK_HOME: /spark
---
apiVersion: v1
kind: ConfigMap
metadata:
name: zeppelin-server-conf
data:
sparkContainerImage: spark:2.4.7-bin-hadoop-3.2.0-cloud-scala-2.11
nginx.conf: |
daemon off;
worker_processes auto;
events
worker_connections 1024;
http
map $http_upgrade $connection_upgrade
default upgrade;
'' close;
# first server block will be default. Proxy zeppelin server.
server
listen 80;
location /
proxy_pass http://localhost:8080;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_redirect http://localhost $scheme://SERVICE_DOMAIN;
# match request domain [port]-[service].[serviceDomain]
# proxy extra service such as spark-ui
server
listen 80;
server_name "~(?<svc_port>[0-9]+)-(?<svc_name>[^.]*)\.(.*)";
location /
resolver 127.0.0.1:53 ipv6=off;
proxy_pass http://$svc_name.NAMESPACE.svc:$svc_port;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_redirect http://localhost $scheme://SERVICE_DOMAIN;
# redirect rule for spark ui. 302 redirect response misses port number of service domain
proxy_redirect ~(http:[/]+[0-9]+[-][^-]+[-][^.]+)[^/]+(\/jobs.*) $1.SERVICE_DOMAIN$2;
---
apiVersion: v1
kind: Pod
metadata:
name: zeppelin-server
labels:
app.kubernetes.io/name: zeppelin-server
spec:
serviceAccountName: zeppelin-server
volumes:
- name: nginx-conf
configMap:
name: zeppelin-server-conf
items:
- key: nginx.conf
path: nginx.conf
containers:
- name: zeppelin-server
image: zeppelin:0.9.0
command: ["sh", "-c", "$(ZEPPELIN_HOME)/bin/zeppelin.sh"]
lifecycle:
preStop:
exec:
# SIGTERM triggers a quick exit; gracefully terminate instead
command: ["sh", "-c", "ps -ef | grep org.apache.zeppelin.server.ZeppelinServer | grep -v grep | awk 'print $2' | xargs kill"]
ports:
- name: http
containerPort: 8080
- name: https
containerPort: 8443
- name: rpc
containerPort: 12320
env:
- name: POD_UID
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.uid
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
envFrom:
- configMapRef:
name: zeppelin-server-conf-map
# volumeMounts:
# - name: zeppelin-server-notebook-volume # configure this to persist notebook
# mountPath: /zeppelin/notebook
# - name: zeppelin-server-conf # configure this to persist Zeppelin configuration
# mountPath: /zeppelin/conf
# - name: zeppelin-server-custom-k8s # configure this to mount customized Kubernetes spec for interpreter
# mountPath: /zeppelin/k8s
- name: zeppelin-server-gateway
image: nginx:1.14.0
command: ["/bin/sh", "-c"]
env:
- name: SERVICE_DOMAIN
valueFrom:
configMapKeyRef:
name: zeppelin-server-conf-map
key: SERVICE_DOMAIN
args:
- cp -f /tmp/conf/nginx.conf /etc/nginx/nginx.conf;
sed -i -e "s/SERVICE_DOMAIN/$SERVICE_DOMAIN/g" /etc/nginx/nginx.conf;
sed -i -e "s/NAMESPACE/$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)/g" /etc/nginx/nginx.conf;
cat /etc/nginx/nginx.conf;
/usr/sbin/nginx
volumeMounts:
- name: nginx-conf
mountPath: /tmp/conf
lifecycle:
preStop:
exec:
# SIGTERM triggers a quick exit; gracefully terminate instead
command: ["/usr/sbin/nginx", "-s", "quit"]
- name: dnsmasq # nginx requires dns resolver for dynamic dns resolution
image: "janeczku/go-dnsmasq:release-1.0.5"
args:
- --listen
- "127.0.0.1:53"
- --default-resolver
- --append-search-domains
- --hostsfile=/etc/hosts
- --verbose
---
kind: Service
apiVersion: v1
metadata:
name: zeppelin-server
spec:
ports:
- name: http
port: 80
- name: rpc # port name is referenced in the code. So it shouldn't be changed.
port: 12320
selector:
app.kubernetes.io/name: zeppelin-server
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: zeppelin-server
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: zeppelin-server-role
rules:
- apiGroups: [""]
resources: ["pods", "services"]
verbs: ["create", "get", "update", "patch", "list", "delete", "watch"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources: ["roles", "rolebindings"]
verbs: ["bind", "create", "get", "update", "patch", "list", "delete", "watch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: zeppelin-server-role-binding
subjects:
- kind: ServiceAccount
name: zeppelin-server
roleRef:
kind: Role
name: zeppelin-server-role
apiGroup: rbac.authorization.k8s.io
此外,我们可以尝试在 zeppelin 容器中覆盖 HOSTNAME
env var,但这可能会导致问题。不过没试过。
【讨论】:
Zeppelin 文档不准确。我能够使用 zeppelin-0.9.0-preview2 分支在 k8s 上使用 Spark 运行 zeppelin,我构建了 docker 映像,向其中添加了 kubectl 并更新了 zeppelin-server.yaml 以使用此映像和我的 Spark 映像。这篇文章也很有用medium.com/@pliben1994/…以上是关于由于 UnknownHostException,K8S 解释器上的 Apache Zeppelin 崩溃的主要内容,如果未能解决你的问题,请参考以下文章
Java 无法从 AIX 解析 DNS 地址:UnknownHostException
Android 11 更新后出现 UnknownHostException
Android java.net.UnknownHostException:主机未解析