Add memory limit for all PAI services, make it 'Burstable' Qos class (#1384)

* set kubernetes memory eviction threshold

To reach that capacity, either some Pod is using more than its request,
or the system is using more than 3Gi - 1Gi = 2Gi.

* set those pods as 'Guaranteed' QoS:

node-exporter
hadoop-node-manager
hadoop-data-node
drivers-one-shot

* Set '--oom-score-adj=1000' for job container

so it would oom killed first

* set those pods as 'Burstable' QoS:

prometheus
grafana

* set those pods as 'Guaranteed' QoS:

frameworklauncher
hadoop-jobhistory
hadoop-name-node
hadoop-resource-manager
pylon
rest-server
webportal
zookeeper

* adjust services memory limits

* add k8s services resource limit

* seem 1g is not enough for launcher

* adjust hadoop-resource-manager limit

* adjust webportal memory limit

* adjust cpu limits

* rm yarn-exporter resource limits

* adjuest prometheus limits

* adjust limits

* frameworklauncher: set JAVA_OPTS="-server -Xmx512m"

zookeeper: set JAVA_OPTS="-server -Xmx512m"

fix env name to JAVA_OPTS

fix zookeeper

* add heapsize limit for hadoop-data-node hadoop-jobhistory

* add xmx for hadoop

* modify memory limits

* reserve 40g for singlebox, else reserve 12g

* using LAUNCHER_OPTS

* revert zookeeper dockerfile

* adjust node manager memory limit

* drivers would take more memory when install

* increase memory for zookeeper and launcher

* set requests to a lower value

* comment it out, using the continer env "YARN_RESOURCEMANAGER_HEAPSIZE"

* add comments
This commit is contained in:
Hao Yuan 2018-09-27 15:06:46 +08:00 коммит произвёл GitHub
Родитель e10b0b4f88
Коммит d9cf1d5f89
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
28 изменённых файлов: 110 добавлений и 21 удалений

Просмотреть файл

@ -47,3 +47,7 @@ spec:
- --anonymous-auth=true
- --cors-allowed-origins
- .*
resources:
limits:
memory: "1Gi"
cpu: "1000m"

Просмотреть файл

@ -38,3 +38,7 @@ spec:
port: 10252
initialDelaySeconds: 15
timeoutSeconds: 1
resources:
limits:
memory: "1Gi"
cpu: "1000m"

Просмотреть файл

@ -46,7 +46,7 @@ spec:
resources:
# the ideal cpu setting will be 3.5, according to our experiment. If the server hosting k8s dashboard has enough resource, user can change this setting to a larger value.
limits:
cpu: "1"
cpu: "1000m"
memory: 3000Mi
requests:
cpu: "0.5"

Просмотреть файл

@ -52,6 +52,10 @@ spec:
volumeMounts:
- mountPath: /var/etcd
name: varetcd
resources:
limits:
memory: "1Gi"
cpu: "1000m"
volumes:
- hostPath:
path: {{ clusterconfig['etcd-data-path'] }}

Просмотреть файл

@ -63,7 +63,8 @@ docker run \
--allow-privileged=true \
--logtostderr=true \
--pod-infra-container-image {{ clusterconfig['dockerregistry'] }}/pause-amd64:3.0 \
--eviction-hard="memory.available<5%,nodefs.available<5%,imagefs.available<5%,nodefs.inodesFree<5%,imagefs.inodesFree<5%" \
--image-pull-progress-deadline=10m \
--docker-root=${DOCKER_ROOT_DIR_FOR_KUBELET} \
--system-reserved=memory=3Gi \
--eviction-hard="memory.available<1Gi,nodefs.available<5%,imagefs.available<5%,nodefs.inodesFree<5%,imagefs.inodesFree<5%" \
--v=2

Просмотреть файл

@ -36,3 +36,7 @@ spec:
port: 10251
initialDelaySeconds: 15
timeoutSeconds: 1
resources:
limits:
memory: "1Gi"
cpu: "1000m"

Просмотреть файл

@ -29,7 +29,7 @@ spec:
app: drivers-one-shot
spec:
hostNetwork: true
hostPID: false
hostPID: true
containers:
- name: nvidia-drivers
image: {{ clusterinfo['dockerregistryinfo']['prefix'] }}drivers:{{ clusterinfo['dockerregistryinfo']['docker_tag'] }}
@ -57,6 +57,11 @@ spec:
- /jobstatus/jobok
initialDelaySeconds: 5
periodSeconds: 3
resources:
limits:
memory: "2Gi"
requests:
memory: "256Mi"
imagePullSecrets:
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
volumes:

Просмотреть файл

@ -23,7 +23,7 @@ spec:
replicas: 1
selector:
matchLabels:
app: grafana
app: grafana
template:
metadata:
labels:
@ -50,5 +50,8 @@ spec:
value: {{ clusterinfo['grafanainfo']['grafana_url'] }}:{{ clusterinfo['grafanainfo']['grafana_port'] }}
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "true"
resources:
limits:
memory: "256Mi"
imagePullSecrets:
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}

Просмотреть файл

@ -52,7 +52,7 @@ fi
# and/or YARN_RESOURCEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
export YARN_RESOURCEMANAGER_HEAPSIZE=8192
#export YARN_RESOURCEMANAGER_HEAPSIZE=8192
# Specify the max Heapsize for the timeline server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
@ -77,7 +77,7 @@ export YARN_RESOURCEMANAGER_HEAPSIZE=8192
# and/or YARN_NODEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
export YARN_NODEMANAGER_HEAPSIZE=4096
#export YARN_NODEMANAGER_HEAPSIZE=4096
# Specify the JVM options to be used when starting the NodeManager.
# These options will be appended to the options specified as YARN_OPTS

Просмотреть файл

@ -52,7 +52,7 @@ fi
# and/or YARN_RESOURCEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
export YARN_RESOURCEMANAGER_HEAPSIZE=8192
#export YARN_RESOURCEMANAGER_HEAPSIZE=8192
# Specify the max Heapsize for the timeline server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
@ -77,7 +77,7 @@ export YARN_RESOURCEMANAGER_HEAPSIZE=8192
# and/or YARN_NODEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
export YARN_NODEMANAGER_HEAPSIZE=4096
#export YARN_NODEMANAGER_HEAPSIZE=4096
# Specify the JVM options to be used when starting the NodeManager.
# These options will be appended to the options specified as YARN_OPTS

Просмотреть файл

@ -50,7 +50,12 @@ spec:
- /jobstatus/jobok
initialDelaySeconds: 5
periodSeconds: 3
resources:
limits:
memory: "1Gi"
env:
- name: HADOOP_DATANODE_OPTS
value: "-Xmx512m"
- name: HDFS_ADDRESS
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
- name: GENERATE_CONFIG

Просмотреть файл

@ -45,6 +45,10 @@ spec:
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
- name: TIMELINE_SERVER_ADDRESS
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
- name: YARN_TIMELINESERVER_HEAPSIZE
value: "512"
- name: HADOOP_JOB_HISTORYSERVER_HEAPSIZE
value: "512"
- name: GENERATE_CONFIG
value: jobhistory-generate-script.sh
- name: START_SERVICE
@ -61,6 +65,11 @@ spec:
- /jobstatus/jobok
initialDelaySeconds: 5
periodSeconds: 3
resources:
limits:
memory: "2Gi"
requests:
memory: "1Gi"
imagePullSecrets:
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
volumes:

Просмотреть файл

@ -51,10 +51,15 @@ spec:
env:
- name: HDFS_ADDRESS
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
- name: HADOOP_NAMENODE_OPTS
value: "-Xmx6144m"
- name: GENERATE_CONFIG
value: namenode-generate-script.sh
- name: START_SERVICE
value: namenode-start-service.sh
resources:
limits:
memory: "8Gi"
imagePullSecrets:
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
volumes:

Просмотреть файл

@ -59,8 +59,16 @@ sed -i "s/{LOGSERVER_ADDRESS}/${LOGSERVER_ADDRESS}/g" $HADOOP_CONF_DIR/mapred-s
# set memory and cpu resource for nodemanager
mem_total=`cat /proc/meminfo | grep "MemTotal" | awk '{print $2}'`
# memory size to nodemanager is floor(mem_total * 0.8)
let mem_total=mem_total*8/10/1024/1024*1024
# memory size to nodemanager is (mem_total - mem_reserved)
if [ $(grep 'ip:' /host-configuration/host-configuration.yaml|wc -l) -gt 1 ]
then
echo "Node role is 'Worker'. Reserve 12G for os and k8s."
let mem_reserved=12*1024
else
echo "Node role is 'Master & Worker'. Reserve 40G for os and k8s."
let mem_reserved=40*1024
fi
let mem_total=(mem_total/1024/1024*1024)-mem_reserved
sed -i "s/{mem_total}/${mem_total}/g" $HADOOP_CONF_DIR/yarn-site.xml
cpu_vcores=`cat /proc/cpuinfo | grep "processor" | wc -l`

Просмотреть файл

@ -77,7 +77,7 @@ export YARN_RESOURCEMANAGER_HEAPSIZE=8192
# and/or YARN_NODEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
export YARN_NODEMANAGER_HEAPSIZE=4096
#export YARN_NODEMANAGER_HEAPSIZE=4096
# Specify the JVM options to be used when starting the NodeManager.
# These options will be appended to the options specified as YARN_OPTS

Просмотреть файл

@ -62,6 +62,9 @@ spec:
- /jobstatus/jobok
initialDelaySeconds: 5
periodSeconds: 3
resources:
limits:
memory: "4Gi"
env:
- name: RESOURCEMANAGER_ADDRESS
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
@ -83,6 +86,8 @@ spec:
value: {{ clusterinfo[ 'dataPath' ] }}/hadooptmp/nodemanager
- name: CURRENT_IMAGE_NAME
value: {{ clusterinfo['dockerregistryinfo']['prefix'] }}hadoop-run:{{ clusterinfo['dockerregistryinfo']['docker_tag'] }}
- name: YARN_NODEMANAGER_HEAPSIZE
value: "3072"
imagePullSecrets:
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
volumes:

Просмотреть файл

@ -52,7 +52,7 @@ fi
# and/or YARN_RESOURCEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
export YARN_RESOURCEMANAGER_HEAPSIZE=8192
#export YARN_RESOURCEMANAGER_HEAPSIZE=8192
# Specify the max Heapsize for the timeline server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set

Просмотреть файл

@ -63,10 +63,15 @@ spec:
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
- name: TIMELINE_SERVER_ADDRESS
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
- name: YARN_RESOURCEMANAGER_HEAPSIZE
value: "6144"
- name: GENERATE_CONFIG
value: resourcemanager-generate-script.sh
- name: START_SERVICE
value: resourcemanager-start-service.sh
resources:
limits:
memory: "8Gi"
- name: yarn-exporter
image: {{ clusterinfo['dockerregistryinfo']['prefix'] }}yarn-exporter:{{ clusterinfo['dockerregistryinfo']['docker_tag'] }}
imagePullPolicy: Always
@ -74,9 +79,6 @@ spec:
- containerPort: {{ clusterinfo['prometheusinfo']['yarn_exporter_port'] }}
hostPort: {{ clusterinfo['prometheusinfo']['yarn_exporter_port'] }}
name: scrape
resources:
limits:
memory: "1Gi"
command:
- "python3"
- "/usr/local/yarn_exporter.py"

Просмотреть файл

@ -42,7 +42,7 @@ spec:
periodSeconds: 30
resources:
limits:
memory: "1Gi"
memory: "128Mi"
volumeMounts:
- mountPath: /datastorage/prometheus
name: collector-mount
@ -97,7 +97,7 @@ spec:
periodSeconds: 30
resources:
limits:
memory: "1Gi"
memory: "128Mi"
securityContext:
privileged: true # this is required by job-exporter
volumeMounts:

Просмотреть файл

@ -39,7 +39,7 @@ spec:
image: prom/prometheus:v2.1.0
resources:
limits:
memory: "10Gi"
memory: "256Mi"
args:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--web.listen-address=0.0.0.0:{{prometheus_port}}'

Просмотреть файл

@ -62,5 +62,8 @@ spec:
port: pylon
initialDelaySeconds: 10
periodSeconds: 60
resources:
limits:
memory: "1Gi"
imagePullSecrets:
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}

Просмотреть файл

@ -56,5 +56,8 @@ spec:
- name: rest-server
containerPort: 8080
hostPort: {{ clusterinfo['restserverinfo']['server_port'] }}
resources:
limits:
memory: "512Mi"
imagePullSecrets:
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}

Просмотреть файл

@ -255,6 +255,7 @@ docker run --name $docker_name \
--rm \
--tty \
--privileged=false \
--oom-score-adj=1000 \
--cap-add=SYS_ADMIN \
--network=host \
--cpus={{{ taskData.cpuNumber }}} \

Просмотреть файл

@ -46,7 +46,7 @@ spec:
periodSeconds: 30
resources:
limits:
memory: "1Gi"
memory: "256Mi"
volumeMounts:
- mountPath: /datastorage/prometheus
name: collector-mount

Просмотреть файл

@ -56,5 +56,8 @@ spec:
- name: webportal
containerPort: 8080
hostPort: {{ clusterinfo['webportalinfo']['server_port'] }}
resources:
limits:
memory: "512Mi"
imagePullSecrets:
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}

Просмотреть файл

@ -62,6 +62,15 @@ spec:
value: {{ clusterinfo[ 'frameworklauncher' ][ 'frameworklauncher_vip' ] }}
- name: FRAMEWORKLAUNCHER_PORT
value: "{{ clusterinfo[ 'frameworklauncher' ][ 'frameworklauncher_port' ] }}"
# Notes: default config '-Xmx3072m, memory limits: "4Gi"' could support about 60k jobs.
# You will need increase the resource in order to run more jobs.
- name: LAUNCHER_OPTS
value: "-server -Xmx3072m -Djute.maxbuffer=49107800"
resources:
limits:
memory: "4Gi"
requests:
memory: "1Gi"
imagePullSecrets:
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
volumes:

Просмотреть файл

@ -17,6 +17,8 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
sed -i "/^JAVA_OPTS.*/ s:.*:JVMFLAGS=\"${JAVA_OPTS}\":" /usr/share/zookeeper/bin/zkEnv.sh
HOST_NAME=`hostname`
/usr/local/host-configure.py -c /host-configuration/host-configuration.yaml -f /etc/zookeeper/conf/zoo.cfg -n $HOST_NAME
cp /myid /var/lib/zoodata/myid

Просмотреть файл

@ -46,6 +46,15 @@ spec:
- /jobstatus/jobok
initialDelaySeconds: 5
periodSeconds: 3
env:
- name: JAVA_OPTS
value: "-server -Xmx1536m"
resources:
limits:
memory: "2Gi"
cpu: "1000m"
requests:
memory: "1Gi"
imagePullSecrets:
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
volumes: