зеркало из https://github.com/microsoft/pai.git
Add memory limit for all PAI services, make it 'Burstable' Qos class (#1384)
* set kubernetes memory eviction threshold To reach that capacity, either some Pod is using more than its request, or the system is using more than 3Gi - 1Gi = 2Gi. * set those pods as 'Guaranteed' QoS: node-exporter hadoop-node-manager hadoop-data-node drivers-one-shot * Set '--oom-score-adj=1000' for job container so it would oom killed first * set those pods as 'Burstable' QoS: prometheus grafana * set those pods as 'Guaranteed' QoS: frameworklauncher hadoop-jobhistory hadoop-name-node hadoop-resource-manager pylon rest-server webportal zookeeper * adjust services memory limits * add k8s services resource limit * seem 1g is not enough for launcher * adjust hadoop-resource-manager limit * adjust webportal memory limit * adjust cpu limits * rm yarn-exporter resource limits * adjuest prometheus limits * adjust limits * frameworklauncher: set JAVA_OPTS="-server -Xmx512m" zookeeper: set JAVA_OPTS="-server -Xmx512m" fix env name to JAVA_OPTS fix zookeeper * add heapsize limit for hadoop-data-node hadoop-jobhistory * add xmx for hadoop * modify memory limits * reserve 40g for singlebox, else reserve 12g * using LAUNCHER_OPTS * revert zookeeper dockerfile * adjust node manager memory limit * drivers would take more memory when install * increase memory for zookeeper and launcher * set requests to a lower value * comment it out, using the continer env "YARN_RESOURCEMANAGER_HEAPSIZE" * add comments
This commit is contained in:
Родитель
e10b0b4f88
Коммит
d9cf1d5f89
|
@ -47,3 +47,7 @@ spec:
|
|||
- --anonymous-auth=true
|
||||
- --cors-allowed-origins
|
||||
- .*
|
||||
resources:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "1000m"
|
||||
|
|
|
@ -38,3 +38,7 @@ spec:
|
|||
port: 10252
|
||||
initialDelaySeconds: 15
|
||||
timeoutSeconds: 1
|
||||
resources:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "1000m"
|
||||
|
|
|
@ -46,7 +46,7 @@ spec:
|
|||
resources:
|
||||
# the ideal cpu setting will be 3.5, according to our experiment. If the server hosting k8s dashboard has enough resource, user can change this setting to a larger value.
|
||||
limits:
|
||||
cpu: "1"
|
||||
cpu: "1000m"
|
||||
memory: 3000Mi
|
||||
requests:
|
||||
cpu: "0.5"
|
||||
|
|
|
@ -52,6 +52,10 @@ spec:
|
|||
volumeMounts:
|
||||
- mountPath: /var/etcd
|
||||
name: varetcd
|
||||
resources:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "1000m"
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: {{ clusterconfig['etcd-data-path'] }}
|
||||
|
|
|
@ -63,7 +63,8 @@ docker run \
|
|||
--allow-privileged=true \
|
||||
--logtostderr=true \
|
||||
--pod-infra-container-image {{ clusterconfig['dockerregistry'] }}/pause-amd64:3.0 \
|
||||
--eviction-hard="memory.available<5%,nodefs.available<5%,imagefs.available<5%,nodefs.inodesFree<5%,imagefs.inodesFree<5%" \
|
||||
--image-pull-progress-deadline=10m \
|
||||
--docker-root=${DOCKER_ROOT_DIR_FOR_KUBELET} \
|
||||
--system-reserved=memory=3Gi \
|
||||
--eviction-hard="memory.available<1Gi,nodefs.available<5%,imagefs.available<5%,nodefs.inodesFree<5%,imagefs.inodesFree<5%" \
|
||||
--v=2
|
||||
|
|
|
@ -36,3 +36,7 @@ spec:
|
|||
port: 10251
|
||||
initialDelaySeconds: 15
|
||||
timeoutSeconds: 1
|
||||
resources:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "1000m"
|
||||
|
|
|
@ -29,7 +29,7 @@ spec:
|
|||
app: drivers-one-shot
|
||||
spec:
|
||||
hostNetwork: true
|
||||
hostPID: false
|
||||
hostPID: true
|
||||
containers:
|
||||
- name: nvidia-drivers
|
||||
image: {{ clusterinfo['dockerregistryinfo']['prefix'] }}drivers:{{ clusterinfo['dockerregistryinfo']['docker_tag'] }}
|
||||
|
@ -57,6 +57,11 @@ spec:
|
|||
- /jobstatus/jobok
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 3
|
||||
resources:
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
imagePullSecrets:
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
||||
volumes:
|
||||
|
|
|
@ -23,7 +23,7 @@ spec:
|
|||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: grafana
|
||||
app: grafana
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
|
@ -50,5 +50,8 @@ spec:
|
|||
value: {{ clusterinfo['grafanainfo']['grafana_url'] }}:{{ clusterinfo['grafanainfo']['grafana_port'] }}
|
||||
- name: GF_AUTH_ANONYMOUS_ENABLED
|
||||
value: "true"
|
||||
resources:
|
||||
limits:
|
||||
memory: "256Mi"
|
||||
imagePullSecrets:
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
|
@ -52,7 +52,7 @@ fi
|
|||
# and/or YARN_RESOURCEMANAGER_OPTS.
|
||||
# If not specified, the default value will be picked from either YARN_HEAPMAX
|
||||
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
|
||||
export YARN_RESOURCEMANAGER_HEAPSIZE=8192
|
||||
#export YARN_RESOURCEMANAGER_HEAPSIZE=8192
|
||||
|
||||
# Specify the max Heapsize for the timeline server using a numerical value
|
||||
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
|
||||
|
@ -77,7 +77,7 @@ export YARN_RESOURCEMANAGER_HEAPSIZE=8192
|
|||
# and/or YARN_NODEMANAGER_OPTS.
|
||||
# If not specified, the default value will be picked from either YARN_HEAPMAX
|
||||
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
|
||||
export YARN_NODEMANAGER_HEAPSIZE=4096
|
||||
#export YARN_NODEMANAGER_HEAPSIZE=4096
|
||||
|
||||
# Specify the JVM options to be used when starting the NodeManager.
|
||||
# These options will be appended to the options specified as YARN_OPTS
|
||||
|
|
|
@ -52,7 +52,7 @@ fi
|
|||
# and/or YARN_RESOURCEMANAGER_OPTS.
|
||||
# If not specified, the default value will be picked from either YARN_HEAPMAX
|
||||
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
|
||||
export YARN_RESOURCEMANAGER_HEAPSIZE=8192
|
||||
#export YARN_RESOURCEMANAGER_HEAPSIZE=8192
|
||||
|
||||
# Specify the max Heapsize for the timeline server using a numerical value
|
||||
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
|
||||
|
@ -77,7 +77,7 @@ export YARN_RESOURCEMANAGER_HEAPSIZE=8192
|
|||
# and/or YARN_NODEMANAGER_OPTS.
|
||||
# If not specified, the default value will be picked from either YARN_HEAPMAX
|
||||
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
|
||||
export YARN_NODEMANAGER_HEAPSIZE=4096
|
||||
#export YARN_NODEMANAGER_HEAPSIZE=4096
|
||||
|
||||
# Specify the JVM options to be used when starting the NodeManager.
|
||||
# These options will be appended to the options specified as YARN_OPTS
|
||||
|
|
|
@ -50,7 +50,12 @@ spec:
|
|||
- /jobstatus/jobok
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 3
|
||||
resources:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
env:
|
||||
- name: HADOOP_DATANODE_OPTS
|
||||
value: "-Xmx512m"
|
||||
- name: HDFS_ADDRESS
|
||||
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
|
||||
- name: GENERATE_CONFIG
|
||||
|
|
|
@ -45,6 +45,10 @@ spec:
|
|||
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
|
||||
- name: TIMELINE_SERVER_ADDRESS
|
||||
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
|
||||
- name: YARN_TIMELINESERVER_HEAPSIZE
|
||||
value: "512"
|
||||
- name: HADOOP_JOB_HISTORYSERVER_HEAPSIZE
|
||||
value: "512"
|
||||
- name: GENERATE_CONFIG
|
||||
value: jobhistory-generate-script.sh
|
||||
- name: START_SERVICE
|
||||
|
@ -61,6 +65,11 @@ spec:
|
|||
- /jobstatus/jobok
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 3
|
||||
resources:
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
imagePullSecrets:
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
||||
volumes:
|
||||
|
|
|
@ -51,10 +51,15 @@ spec:
|
|||
env:
|
||||
- name: HDFS_ADDRESS
|
||||
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
|
||||
- name: HADOOP_NAMENODE_OPTS
|
||||
value: "-Xmx6144m"
|
||||
- name: GENERATE_CONFIG
|
||||
value: namenode-generate-script.sh
|
||||
- name: START_SERVICE
|
||||
value: namenode-start-service.sh
|
||||
resources:
|
||||
limits:
|
||||
memory: "8Gi"
|
||||
imagePullSecrets:
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
||||
volumes:
|
||||
|
|
|
@ -59,8 +59,16 @@ sed -i "s/{LOGSERVER_ADDRESS}/${LOGSERVER_ADDRESS}/g" $HADOOP_CONF_DIR/mapred-s
|
|||
|
||||
# set memory and cpu resource for nodemanager
|
||||
mem_total=`cat /proc/meminfo | grep "MemTotal" | awk '{print $2}'`
|
||||
# memory size to nodemanager is floor(mem_total * 0.8)
|
||||
let mem_total=mem_total*8/10/1024/1024*1024
|
||||
# memory size to nodemanager is (mem_total - mem_reserved)
|
||||
if [ $(grep 'ip:' /host-configuration/host-configuration.yaml|wc -l) -gt 1 ]
|
||||
then
|
||||
echo "Node role is 'Worker'. Reserve 12G for os and k8s."
|
||||
let mem_reserved=12*1024
|
||||
else
|
||||
echo "Node role is 'Master & Worker'. Reserve 40G for os and k8s."
|
||||
let mem_reserved=40*1024
|
||||
fi
|
||||
let mem_total=(mem_total/1024/1024*1024)-mem_reserved
|
||||
sed -i "s/{mem_total}/${mem_total}/g" $HADOOP_CONF_DIR/yarn-site.xml
|
||||
|
||||
cpu_vcores=`cat /proc/cpuinfo | grep "processor" | wc -l`
|
||||
|
|
|
@ -77,7 +77,7 @@ export YARN_RESOURCEMANAGER_HEAPSIZE=8192
|
|||
# and/or YARN_NODEMANAGER_OPTS.
|
||||
# If not specified, the default value will be picked from either YARN_HEAPMAX
|
||||
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
|
||||
export YARN_NODEMANAGER_HEAPSIZE=4096
|
||||
#export YARN_NODEMANAGER_HEAPSIZE=4096
|
||||
|
||||
# Specify the JVM options to be used when starting the NodeManager.
|
||||
# These options will be appended to the options specified as YARN_OPTS
|
||||
|
|
|
@ -62,6 +62,9 @@ spec:
|
|||
- /jobstatus/jobok
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 3
|
||||
resources:
|
||||
limits:
|
||||
memory: "4Gi"
|
||||
env:
|
||||
- name: RESOURCEMANAGER_ADDRESS
|
||||
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
|
||||
|
@ -83,6 +86,8 @@ spec:
|
|||
value: {{ clusterinfo[ 'dataPath' ] }}/hadooptmp/nodemanager
|
||||
- name: CURRENT_IMAGE_NAME
|
||||
value: {{ clusterinfo['dockerregistryinfo']['prefix'] }}hadoop-run:{{ clusterinfo['dockerregistryinfo']['docker_tag'] }}
|
||||
- name: YARN_NODEMANAGER_HEAPSIZE
|
||||
value: "3072"
|
||||
imagePullSecrets:
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
||||
volumes:
|
||||
|
|
|
@ -52,7 +52,7 @@ fi
|
|||
# and/or YARN_RESOURCEMANAGER_OPTS.
|
||||
# If not specified, the default value will be picked from either YARN_HEAPMAX
|
||||
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
|
||||
export YARN_RESOURCEMANAGER_HEAPSIZE=8192
|
||||
#export YARN_RESOURCEMANAGER_HEAPSIZE=8192
|
||||
|
||||
# Specify the max Heapsize for the timeline server using a numerical value
|
||||
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
|
||||
|
|
|
@ -63,10 +63,15 @@ spec:
|
|||
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
|
||||
- name: TIMELINE_SERVER_ADDRESS
|
||||
value: {{ clusterinfo[ 'hadoopinfo' ][ 'hadoop_vip' ] }}
|
||||
- name: YARN_RESOURCEMANAGER_HEAPSIZE
|
||||
value: "6144"
|
||||
- name: GENERATE_CONFIG
|
||||
value: resourcemanager-generate-script.sh
|
||||
- name: START_SERVICE
|
||||
value: resourcemanager-start-service.sh
|
||||
resources:
|
||||
limits:
|
||||
memory: "8Gi"
|
||||
- name: yarn-exporter
|
||||
image: {{ clusterinfo['dockerregistryinfo']['prefix'] }}yarn-exporter:{{ clusterinfo['dockerregistryinfo']['docker_tag'] }}
|
||||
imagePullPolicy: Always
|
||||
|
@ -74,9 +79,6 @@ spec:
|
|||
- containerPort: {{ clusterinfo['prometheusinfo']['yarn_exporter_port'] }}
|
||||
hostPort: {{ clusterinfo['prometheusinfo']['yarn_exporter_port'] }}
|
||||
name: scrape
|
||||
resources:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
command:
|
||||
- "python3"
|
||||
- "/usr/local/yarn_exporter.py"
|
||||
|
|
|
@ -42,7 +42,7 @@ spec:
|
|||
periodSeconds: 30
|
||||
resources:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
memory: "128Mi"
|
||||
volumeMounts:
|
||||
- mountPath: /datastorage/prometheus
|
||||
name: collector-mount
|
||||
|
@ -97,7 +97,7 @@ spec:
|
|||
periodSeconds: 30
|
||||
resources:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
memory: "128Mi"
|
||||
securityContext:
|
||||
privileged: true # this is required by job-exporter
|
||||
volumeMounts:
|
||||
|
|
|
@ -39,7 +39,7 @@ spec:
|
|||
image: prom/prometheus:v2.1.0
|
||||
resources:
|
||||
limits:
|
||||
memory: "10Gi"
|
||||
memory: "256Mi"
|
||||
args:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--web.listen-address=0.0.0.0:{{prometheus_port}}'
|
||||
|
|
|
@ -62,5 +62,8 @@ spec:
|
|||
port: pylon
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 60
|
||||
resources:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
imagePullSecrets:
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
||||
|
|
|
@ -56,5 +56,8 @@ spec:
|
|||
- name: rest-server
|
||||
containerPort: 8080
|
||||
hostPort: {{ clusterinfo['restserverinfo']['server_port'] }}
|
||||
resources:
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
imagePullSecrets:
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
|
@ -255,6 +255,7 @@ docker run --name $docker_name \
|
|||
--rm \
|
||||
--tty \
|
||||
--privileged=false \
|
||||
--oom-score-adj=1000 \
|
||||
--cap-add=SYS_ADMIN \
|
||||
--network=host \
|
||||
--cpus={{{ taskData.cpuNumber }}} \
|
||||
|
|
|
@ -46,7 +46,7 @@ spec:
|
|||
periodSeconds: 30
|
||||
resources:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
memory: "256Mi"
|
||||
volumeMounts:
|
||||
- mountPath: /datastorage/prometheus
|
||||
name: collector-mount
|
||||
|
|
|
@ -56,5 +56,8 @@ spec:
|
|||
- name: webportal
|
||||
containerPort: 8080
|
||||
hostPort: {{ clusterinfo['webportalinfo']['server_port'] }}
|
||||
resources:
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
imagePullSecrets:
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
|
@ -62,6 +62,15 @@ spec:
|
|||
value: {{ clusterinfo[ 'frameworklauncher' ][ 'frameworklauncher_vip' ] }}
|
||||
- name: FRAMEWORKLAUNCHER_PORT
|
||||
value: "{{ clusterinfo[ 'frameworklauncher' ][ 'frameworklauncher_port' ] }}"
|
||||
# Notes: default config '-Xmx3072m, memory limits: "4Gi"' could support about 60k jobs.
|
||||
# You will need increase the resource in order to run more jobs.
|
||||
- name: LAUNCHER_OPTS
|
||||
value: "-server -Xmx3072m -Djute.maxbuffer=49107800"
|
||||
resources:
|
||||
limits:
|
||||
memory: "4Gi"
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
imagePullSecrets:
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
||||
volumes:
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
sed -i "/^JAVA_OPTS.*/ s:.*:JVMFLAGS=\"${JAVA_OPTS}\":" /usr/share/zookeeper/bin/zkEnv.sh
|
||||
|
||||
HOST_NAME=`hostname`
|
||||
/usr/local/host-configure.py -c /host-configuration/host-configuration.yaml -f /etc/zookeeper/conf/zoo.cfg -n $HOST_NAME
|
||||
cp /myid /var/lib/zoodata/myid
|
||||
|
|
|
@ -46,6 +46,15 @@ spec:
|
|||
- /jobstatus/jobok
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 3
|
||||
env:
|
||||
- name: JAVA_OPTS
|
||||
value: "-server -Xmx1536m"
|
||||
resources:
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "1000m"
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
imagePullSecrets:
|
||||
- name: {{ clusterinfo['dockerregistryinfo']['secretname'] }}
|
||||
volumes:
|
||||
|
|
Загрузка…
Ссылка в новой задаче