拉取镜像
docker pull elasticsearch:7.16.1
docker pull kibana:7.16.1
docker pull apache/skywalking-oap-server:8.6.0-es7
docker pull apache/skywalking-ui:8.6.0
部署ES
mkdir -p /data/elasticsearch/{config,data,plugins}
echo "http.host: 0.0.0.0" >> /data/elasticsearch/config/elasticsearch.yml
chmod -R 777 /data/elasticsearch
docker run --name elasticsearch -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -e ES_JAVA_OPTS="-Xms2048m -Xmx2048m" -v /data/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml -v /data/elasticsearch/data:/usr/share/elasticsearch/data -v /data/elasticsearch/plugins:/usr/share/elasticsearch/plugins -d elasticsearch:7.16.1
部署kibana
elasticsearch.hosts更换成对应的ES服务器IP
cat /data/kibana/config/kibana.yml
server.port: 5601
server.host: 0.0.0.0
elasticsearch.hosts: [ "172.28.0.107:9200" ]
i18n.locale: "Zh-CN"
ELASTICSEARCH_HOSTS更换ES服务器IP
docker run --name kibana -e ELASTICSEARCH_HOSTS=http://172.28.0.107:9200 -v /data/kibana/config/kibana.yml:/usr/share/kibana/config/kibana.yml -p 5601:5601 -d kibana:7.16.1
部署skywalking-oap
创建配置文件
#mkdir -p /data/skywalking/config/ && cat /data/skywalking/config/alarm-settings.yml
rules:
# Rule unique name, must be ended with `_rule`.
service_resp_time_rule:
metrics-name: service_resp_time
op: ">"
threshold: 1000
period: 10
count: 3
silence-period: 5
message: Response time of service {name} is more than 1000ms in 3 minutes of last 10 minutes.
service_sla_rule:
# Metrics value need to be long, double or int
metrics-name: service_sla
op: "<"
threshold: 8000
# The length of time to evaluate the metrics
period: 10
# How many times after the metrics match the condition, will trigger alarm
count: 2
# How many times of checks, the alarm keeps silence after alarm triggered, default as same as period.
silence-period: 3
message: Successful rate of service {name} is lower than 80% in 2 minutes of last 10 minutes
service_resp_time_percentile_rule:
# Metrics value need to be long, double or int
metrics-name: service_percentile
op: ">"
threshold: 1000,1000,1000,1000,1000
period: 10
count: 3
silence-period: 5
message: Percentile response time of service {name} alarm in 3 minutes of last 10 minutes, due to more than one condition of p50 > 1000, p75 > 1000, p90 > 1000, p95 > 1000, p99 > 1000
service_instance_resp_time_rule:
metrics-name: service_instance_resp_time
op: ">"
threshold: 1000
period: 10
count: 2
silence-period: 5
message: Response time of service instance {name} is more than 1000ms in 2 minutes of last 10 minutes
database_access_resp_time_rule:
metrics-name: database_access_resp_time
threshold: 1000
op: ">"
period: 10
count: 2
message: Response time of database access {name} is more than 1000ms in 2 minutes of last 10 minutes
endpoint_relation_resp_time_rule:
metrics-name: endpoint_relation_resp_time
threshold: 1000
op: ">"
period: 10
count: 2
message: Response time of endpoint relation {name} is more than 1000ms in 2 minutes of last 10 minutes
# Active endpoint related metrics alarm will cost more memory than service and service instance metrics alarm.
# Because the number of endpoint is much more than service and instance.
#
# endpoint_avg_rule:
# metrics-name: endpoint_avg
# op: ">"
# threshold: 1000
# period: 10
# count: 2
# silence-period: 5
# message: Response time of endpoint {name} is more than 1000ms in 2 minutes of last 10 minutes
webhooks:
# - http://127.0.0.1/notify/
# - http://127.0.0.1/go-wechat/
dingtalkHooks:
textTemplate: |-
{
"msgtype": "text",
"text": {
"content": "SkyWalking: \n %s."
}
}
webhooks:
- url: https://oapi.dingtalk.com/robot/send?access_token=55d15241725dde83122f01b884e70ee5a83b36060ac477e912d5bc9498e113xsa
secret: SECa3928913089615655cdf89cf9fb1f38bfdb1d0e544ca67efe0ebdc29a1fda23x
启动skywalking-oap服务
docker run --name skywalking-oap --restart always -d \
--restart=always \
-e TZ=Asia/Shanghai \
-p 12800:12800 \
-p 11800:11800 \
--link elasticsearch:elasticsearch \ ##elasticsearch代表部署的elasticsearch容器的名字
-e JAVA_OPTS="-Xms2G -Dlog4j2.formatMsgNoLookups=true" -e SW_STORAGE=elasticsearch7 -e SW_STORAGE_ES_CLUSTER_NODES=elasticsearch:9200 \ ##elasticsearch代表部署的elasticsearch容器的名字 -e SW_STORAGE_ES_INDEX_SHARDS_NUMBER=2 -e SW_STORAGE_ES_INDEX_REPLICAS_NUMBER=0 -v /data/skywalking/config/alarm-settings.yml:/skywalking/config/alarm-settings.yml apache/skywalking-oap-server:8.6.0-es7
备注:-e JAVA_OPTS="-Xms2G -Dlog4j2.formatMsgNoLookups=true" 参数是为了解决log4j2漏洞
部署skywalking-ui
#cat /data/skywalking-ui/docker-entrypoint.sh
#!/bin/bash
set -e
export LOGGING_CONFIG="webapp/logback.xml"
[[ ! -z "$SW_OAP_ADDRESS" ]] && export COLLECTOR_RIBBON_LISTOFSERVERS=${SW_OAP_ADDRESS} && echo "COLLECTOR_RIBBON_LISTOFSERVERS=$COLLECTOR_RIBBON_LISTOFSERVERS"
[[ ! -z "$SW_TIMEOUT" ]] && export COLLECTOR_RIBBON_READTIMEOUT=${SW_TIMEOUT} && echo "COLLECTOR_RIBBON_READTIMEOUT=$COLLECTOR_RIBBON_READTIMEOUT"
exec java ${JAVA_OPTS} -jar webapp/skywalking-webapp.jar "$@"
启动:
docker run -d --name skywalking-ui \
--restart=always \
-e TZ=Asia/Shanghai \
-p 8080:8080 \
--link skywalking-oap:skywalking-oap \
-e SW_OAP_ADDRESS=skywalking-oap:12800 \
-e JAVA_OPTS="-Dlog4j2.formatMsgNoLookups=true" \
-v /data/skywalking-ui/docker-entrypoint.sh:/skywalking/docker-entrypoint.sh \
apache/skywalking-ui:8.6.0
备注:
#-e JAVA_OPTS="-Dlog4j2.formatMsgNoLookups=true" 解决log4j2漏洞
#-v /data/skywalking-ui/docker-entrypoint.sh:/skywalking/docker-entrypoint.sh 默认以 >#exec java -jar webapp/skywalking-webapp.jar “$@” 启动
k8s-node配置skywalking-agent 需要在每个被监控主机节点部署skywalking-agent(当然也有一种方式是直接在每个pod里添加一个“skywalking-agent”容器)
wget https://archive.apache.org/dist/skywalking/8.6.0/apache-skywalking-apm-es7-8.6.0.tar.gz
tar xf apache-skywalking-apm-es7-8.6.0.tar.gz
mv apache-skywalking-apm-bin-es7/agent /usr/local/skywalking
服务部署资源文件(Deployment)模板
#修改为skywalking服务器地址: collector.backend_service=172.28.0.107:11800
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: {{service_name}}
version: {{feature}}
{%- if feature == 'default' %}
name: {{service_name}}
{% else %}
name: {{service_name}}-{{feature}}
{%- endif %}
namespace: {{namespace}}
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: {{service_name}}
version: {{feature}}
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
annotations:
izone: {{feature}}
labels:
app: {{service_name}}
version: {{feature}}
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/app
operator: In
values:
- app
containers:
- env:
- name: izone
{%- if feature != 'default' %}
value: {{feature}}
{%- endif %}
- name: MY_SERVICE_NAME
value: {{service_name}}
- name: ilogEnvs
value: izone,buildNumber
- name: ILOG_BOOTSTRAP_SERVERS
valueFrom:
configMapKeyRef:
key: ILOG_BOOTSTRAP_SERVERS
name: global-config
- name: CONFIG_SERVER_URL
valueFrom:
configMapKeyRef:
key: CONFIG_SERVER_URL
name: global-config
- name: CONFIG_SERVER_USER
valueFrom:
configMapKeyRef:
key: CONFIG_SERVER_USER
name: global-config
- name: CONFIG_SERVER_PWDS
valueFrom:
configMapKeyRef:
key: CONFIG_SERVER_PWDS
name: global-config
- name: CONFIG_SERVER_PWDS
valueFrom:
configMapKeyRef:
key: CONFIG_SERVER_PWDS
name: global-config
- name: JAR_BOOT_ARG
value: >-
--spring.cloud.config.fail-fast=true
--spring.profiles.active=kub
--server.port=80
- name: JDK_HEAP_OPTS
value: >-
-XX:+UseContainerSupport
-XX:MaxRAMPercentage=75.0
- name: JAVA_TOOL_OPTIONS
value: >-
-javaagent:/skywalking/skywalking-agent.jar=collector.backend_service=172.28.0.107:11800,agent.service_name={{service_name}}
image: dev-hub.jiatuiyun.net/architect/arch-probe-behind:architect_b85 #关联skywalking服务端
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 2
httpGet:
path: /actuator/info
port: 80
scheme: HTTP
initialDelaySeconds: 120
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 30
name: {{service_name}}
ports:
- containerPort: 80
name: http
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /actuator/info
port: 80
scheme: HTTP
initialDelaySeconds: 60
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 30
resources:
limits:
memory: 800Mi
requests:
memory: 512Mi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /opt/jdk
name: jdk-path
- mountPath: /srv/applogs
name: applogs
- mountPath: /skywalking #关联skywalking-agent
name: skywalking-agent
dnsPolicy: ClusterFirst
imagePullSecrets:
- name: registry-secret
restartPolicy: Always
securityContext: {}
terminationGracePeriodSeconds: 30
volumes:
- hostPath:
path: /usr/local/jdk/default
type: ""
name: jdk-path
- hostPath:
path: /data/applogs
type: ""
name: applogs
- hostPath:
path: /usr/local/skywalking #关联主机skywalking-agent
type: ''
name: skywalking-agent
|