From 44f27bb66678f5db81ac92281ba0b3d713a0cce0 Mon Sep 17 00:00:00 2001 From: Yuqi Wang Date: Fri, 20 Dec 2019 12:22:14 +0800 Subject: [PATCH] [Hived]: Per VC queuing to avoid cross VC starvation (#4041) --- src/hivedscheduler/config/hivedscheduler.py | 5 +- .../hivedscheduler-config.yaml.template | 16 +--- .../hivedscheduler-service.yaml.template | 2 +- .../deploy/hivedscheduler.yaml.template | 75 ++++++++++++++----- src/hivedscheduler/deploy/service.yaml | 3 - src/hivedscheduler/deploy/start.sh.template | 2 +- src/hivedscheduler/deploy/stop.sh.template | 7 +- src/rest-server/src/models/v2/job/k8s.js | 2 +- 8 files changed, 69 insertions(+), 43 deletions(-) diff --git a/src/hivedscheduler/config/hivedscheduler.py b/src/hivedscheduler/config/hivedscheduler.py index e13bb6a33..75eb1ec9a 100644 --- a/src/hivedscheduler/config/hivedscheduler.py +++ b/src/hivedscheduler/config/hivedscheduler.py @@ -15,6 +15,7 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +import yaml class Hivedscheduler: def __init__(self, cluster_conf, service_conf, default_service_conf): @@ -26,10 +27,12 @@ class Hivedscheduler: return False, 'webservice-port is missing in hivedscheduler service configuration' if 'config' not in self.service_conf: self.service_conf['config'] = '' - # return False, 'hived scheduler config is missing' return True, None def run(self): + self.service_conf['structured-config'] = {} + if self.service_conf['config'] != '': + self.service_conf['structured-config'] = yaml.load(self.service_conf['config'], yaml.SafeLoader) machine_list = self.cluster_conf['machine-list'] master_ip = [host['hostip'] for host in machine_list if host.get('pai-master') == 'true'][0] self.service_conf['webservice'] = 'http://{}:{}'.format(master_ip, self.service_conf['webservice-port']) diff --git a/src/hivedscheduler/deploy/hivedscheduler-config.yaml.template b/src/hivedscheduler/deploy/hivedscheduler-config.yaml.template index babd9c22b..5efe95f03 100644 --- a/src/hivedscheduler/deploy/hivedscheduler-config.yaml.template +++ b/src/hivedscheduler/deploy/hivedscheduler-config.yaml.template @@ -21,27 +21,13 @@ metadata: name: hivedscheduler-config namespace: default data: - config.yaml: | - apiVersion: kubescheduler.config.k8s.io/v1alpha1 - kind: KubeSchedulerConfiguration - schedulerName: hivedscheduler - disablePreemption: false - algorithmSource: - policy: - configMap: - name: hivedscheduler-config - namespace: default - leaderElection: - leaderElect: false - lockObjectName: hivedscheduler - lockObjectNamespace: default policy.cfg : | { "kind": "Policy", "apiVersion": "v1", "extenders": [ { - "urlPrefix": "http://localhost:30096/v1/extender", + "urlPrefix": "{{ cluster_cfg['hivedscheduler']['webservice'] }}/v1/extender", "filterVerb": "filter", "preemptVerb": "preempt", "bindVerb": "bind", diff --git a/src/hivedscheduler/deploy/hivedscheduler-service.yaml.template b/src/hivedscheduler/deploy/hivedscheduler-service.yaml.template index 1a13d85d2..9813bb1e6 100644 --- a/src/hivedscheduler/deploy/hivedscheduler-service.yaml.template +++ b/src/hivedscheduler/deploy/hivedscheduler-service.yaml.template @@ -21,7 +21,7 @@ metadata: name: hivedscheduler-service spec: selector: - app: hivedscheduler + app: hivedscheduler-hs type: NodePort ports: - protocol: TCP diff --git a/src/hivedscheduler/deploy/hivedscheduler.yaml.template b/src/hivedscheduler/deploy/hivedscheduler.yaml.template index 914b6327d..c5f774b07 100644 --- a/src/hivedscheduler/deploy/hivedscheduler.yaml.template +++ b/src/hivedscheduler/deploy/hivedscheduler.yaml.template @@ -18,35 +18,23 @@ apiVersion: apps/v1 kind: StatefulSet metadata: - name: hivedscheduler-sts + name: hivedscheduler-hs namespace: default spec: - serviceName: hivedscheduler + serviceName: hivedscheduler-hs selector: matchLabels: - app: hivedscheduler + app: hivedscheduler-hs replicas: 1 template: metadata: labels: - app: hivedscheduler + app: hivedscheduler-hs spec: + nodeSelector: + pai-master: "true" serviceAccountName: hivedscheduler-account containers: - - name: defaultscheduler - image: gcr.io/google_containers/kube-scheduler:v1.14.2 - command: [ - "/usr/local/bin/kube-scheduler", - {%- if cluster_cfg['cluster']['common']['k8s-rbac'] != 'true' %} - "--master={{ cluster_cfg['layout']['kubernetes']['api-servers-url'] }}", - {%- endif %} - "--config=/hivedscheduler-config/config.yaml", - "--feature-gates=PodPriority=true", - "--leader-elect=false", - "--v=4"] - volumeMounts: - - name: hivedscheduler-config - mountPath: /hivedscheduler-config - name: hivedscheduler image: hivedscheduler/hivedscheduler:v0.2.5 command: [ @@ -59,9 +47,56 @@ spec: value: "{{ cluster_cfg['layout']['kubernetes']['api-servers-url'] }}" {%- endif %} volumeMounts: - - name: hivedscheduler-config - mountPath: /hivedscheduler-config + - name: hivedscheduler-config + mountPath: /hivedscheduler-config volumes: - name: hivedscheduler-config configMap: name: hivedscheduler-config + +{%- for vc in cluster_cfg['hivedscheduler']['structured-config']['virtualClusters'] %} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: hivedscheduler-ds-{{ vc }} + namespace: default +spec: + serviceName: hivedscheduler-ds-{{ vc }} + selector: + matchLabels: + app: hivedscheduler-ds-{{ vc }} + replicas: 1 + template: + metadata: + labels: + app: hivedscheduler-ds-{{ vc }} + spec: + nodeSelector: + pai-master: "true" + serviceAccountName: hivedscheduler-account + containers: + - name: defaultscheduler + image: gcr.io/google_containers/kube-scheduler:v1.14.2 + command: [ + "sh", "-c", + "echo \"apiVersion: kubescheduler.config.k8s.io/v1alpha1\" >> config.yaml && + echo \"kind: KubeSchedulerConfiguration\" >> config.yaml && + echo \"schedulerName: hivedscheduler-ds-{{ vc }}\" >> config.yaml && + echo \"disablePreemption: false\" >> config.yaml && + echo \"algorithmSource:\" >> config.yaml && + echo \" policy:\" >> config.yaml && + echo \" configMap:\" >> config.yaml && + echo \" name: hivedscheduler-config\" >> config.yaml && + echo \" namespace: default\" >> config.yaml && + echo \"leaderElection:\" >> config.yaml && + echo \" leaderElect: false\" >> config.yaml && + /usr/local/bin/kube-scheduler + {%- if cluster_cfg['cluster']['common']['k8s-rbac'] != 'true' %} + --master={{ cluster_cfg['layout']['kubernetes']['api-servers-url'] }} + {%- endif %} + --config=config.yaml + --feature-gates=PodPriority=true + --leader-elect=false + --v=4"] +{%- endfor %} \ No newline at end of file diff --git a/src/hivedscheduler/deploy/service.yaml b/src/hivedscheduler/deploy/service.yaml index e0e2771a8..4872b414d 100644 --- a/src/hivedscheduler/deploy/service.yaml +++ b/src/hivedscheduler/deploy/service.yaml @@ -35,6 +35,3 @@ stop-script: stop.sh delete-script: delete.sh refresh-script: refresh.sh upgraded-script: upgraded.sh - -deploy-rules: - - in: pai-master \ No newline at end of file diff --git a/src/hivedscheduler/deploy/start.sh.template b/src/hivedscheduler/deploy/start.sh.template index 036697cb4..4c64d6003 100644 --- a/src/hivedscheduler/deploy/start.sh.template +++ b/src/hivedscheduler/deploy/start.sh.template @@ -28,7 +28,7 @@ kubectl apply --overwrite=true -f hivedscheduler.yaml || exit $? sleep 10 # Wait until the service is ready. -PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v hivedscheduler || exit $? +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v hivedscheduler-hs || exit $? {% endif %} diff --git a/src/hivedscheduler/deploy/stop.sh.template b/src/hivedscheduler/deploy/stop.sh.template index 4062afa11..3f1215b53 100644 --- a/src/hivedscheduler/deploy/stop.sh.template +++ b/src/hivedscheduler/deploy/stop.sh.template @@ -21,8 +21,13 @@ pushd $(dirname "$0") > /dev/null {% if cluster_cfg['hivedscheduler']['config']|length > 1 %} +{% for vc in cluster_cfg['hivedscheduler']['structured-config']['virtualClusters'] %} PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.maintaintool.update_resource \ - --operation delete --resource statefulset --name hivedscheduler-sts + --operation delete --resource statefulset --name hivedscheduler-ds-{{ vc }} +{% endfor %} + +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.maintaintool.update_resource \ + --operation delete --resource statefulset --name hivedscheduler-hs if kubectl get service | grep -q "hivedscheduler-service"; then kubectl delete service hivedscheduler-service || exit $? diff --git a/src/rest-server/src/models/v2/job/k8s.js b/src/rest-server/src/models/v2/job/k8s.js index 6580b05b2..dfee86722 100644 --- a/src/rest-server/src/models/v2/job/k8s.js +++ b/src/rest-server/src/models/v2/job/k8s.js @@ -559,7 +559,7 @@ const generateTaskRole = (frameworkName, taskRole, labels, config, storageConfig }; // hived spec if (launcherConfig.enabledHived) { - frameworkTaskRole.task.pod.spec.schedulerName = launcherConfig.scheduler; + frameworkTaskRole.task.pod.spec.schedulerName = `${launcherConfig.scheduler}-ds-${config.taskRoles[taskRole].hivedPodSpec.virtualCluster}`; delete frameworkTaskRole.task.pod.spec.containers[0].resources.limits['nvidia.com/gpu']; frameworkTaskRole.task.pod.spec.containers[0]