This commit is contained in:
Zihao Chen 2018-07-11 17:14:06 +08:00
Родитель 327664457c
Коммит 43e4d75557
3 изменённых файлов: 31 добавлений и 13 удалений

Просмотреть файл

@ -12,6 +12,7 @@ coverage = "*"
requests = "*"
mesoshttp = "*"
typing = "*"
pytz = "*"
[requires]
python_version = "2.7"

27
Pipfile.lock сгенерированный
Просмотреть файл

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "3d9881f13941acef76267b1a7de710da562e6de6629fef6a66beb3fe3faac99f"
"sha256": "b712a037c9dd815f242b2961737d5eee88f3d5c298c5c8bf6c0290abf47974de"
},
"pipfile-spec": 6,
"requires": {
@ -51,6 +51,14 @@
"index": "pypi",
"version": "==0.2.13"
},
"pytz": {
"hashes": [
"sha256:a061aa0a9e06881eb8b3b2b43f05b9439d6583c206d0a6c340ff72a7b6669053",
"sha256:ffb9ef1de172603304d9d2819af6f5ece76f2e85ec10692a524dd876e72bf277"
],
"index": "pypi",
"version": "==2018.5"
},
"requests": {
"hashes": [
"sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1",
@ -116,7 +124,6 @@
"sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
"sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
],
"markers": "python_version != '3.2.*' and python_version != '3.3.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version >= '2.6'",
"version": "==1.23"
}
},
@ -125,7 +132,10 @@
"hashes": [
"sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba",
"sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed",
"sha256:104ab3934abaf5be871a583541e8829d6c19ce7bde2923b2751e0d3ca44db60a",
"sha256:15b111b6a0f46ee1a485414a52a7ad1d703bdf984e9ed3c288a4414d3871dcbd",
"sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640",
"sha256:1c383d2ef13ade2acc636556fd544dba6e14fa30755f26812f54300e401f98f2",
"sha256:28b2191e7283f4f3568962e373b47ef7f0392993bb6660d079c62bd50fe9d162",
"sha256:2eb564bbf7816a9d68dd3369a510be3327f1c618d2357fa6b1216994c2e3d508",
"sha256:337ded681dd2ef9ca04ef5d93cfc87e52e09db2594c296b4a0a3662cb1b41249",
@ -146,11 +156,16 @@
"sha256:7e1fe19bd6dce69d9fd159d8e4a80a8f52101380d5d3a4d374b6d3eae0e5de9c",
"sha256:8c3cb8c35ec4d9506979b4cf90ee9918bc2e49f84189d9bf5c36c0c1119c6558",
"sha256:9d6dd10d49e01571bf6e147d3b505141ffc093a06756c60b053a859cb2128b1f",
"sha256:9e112fcbe0148a6fa4f0a02e8d58e94470fc6cb82a5481618fea901699bf34c4",
"sha256:ac4fef68da01116a5c117eba4dd46f2e06847a497de5ed1d64bb99a5fda1ef91",
"sha256:b8815995e050764c8610dbc82641807d196927c3dbed207f0a079833ffcf588d",
"sha256:be6cfcd8053d13f5f5eeb284aa8a814220c3da1b0078fa859011c7fffd86dab9",
"sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd",
"sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d",
"sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
"sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
"sha256:e4d96c07229f58cb686120f168276e434660e4358cc9cf3b0464210b04913e77",
"sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80",
"sha256:f8a923a85cb099422ad5a2e345fe877bbc89a8a8b23235824a93488150e45f6e"
],
"index": "pypi",
"version": "==4.5.1"
@ -182,10 +197,10 @@
},
"pbr": {
"hashes": [
"sha256:3747c6f017f2dc099986c325239661948f9f5176f6880d9fdef164cb664cd665",
"sha256:a9c27eb8f0e24e786e544b2dbaedb729c9d8546342b5a6818d8eda098ad4340d"
"sha256:4f2b11d95917af76e936811be8361b2b19616e5ef3b55956a429ec7864378e0c",
"sha256:e0f23b61ec42473723b2fec2f33fb12558ff221ee551962f01dd4de9053c2055"
],
"version": "==4.0.4"
"version": "==4.1.0"
},
"six": {
"hashes": [

Просмотреть файл

@ -1,6 +1,7 @@
import threading
from datetime import datetime, timedelta
import pytz
from typing import Iterable, Callable, NamedTuple, Set, Dict, List, Tuple
import logging_aux
@ -25,7 +26,7 @@ def _check_node_health_unapproved(node_status):
def _find_missing_nodes(rq_nodes, res_nodes):
# type: (List[str], List[str]) -> List[str]
return [name for name in rq_nodes if name not in res_nodes]
return [name for name in _upper_strings(rq_nodes) if name not in _upper_strings(res_nodes)]
def _check_node_state(node_status, target_state):
@ -97,7 +98,7 @@ class HpcClusterManager(object):
# type: (Callable[[list[str]], ()]) -> ()
self._node_closed_callbacks.append(callback)
def add_slaveinfo(self, fqdn, agent_id, task_id, cpus, last_heartbeat=datetime.utcnow()):
def add_slaveinfo(self, fqdn, agent_id, task_id, cpus, last_heartbeat=datetime.now(pytz.utc)):
# type: (str, str, str, float, datetime) -> ()
u_fqdn = fqdn.upper()
hostname = _get_hostname_from_fqdn(u_fqdn)
@ -114,7 +115,7 @@ class HpcClusterManager(object):
self._heart_beat_table[hostname] = slaveinfo
self.logger.info("Heart beat entry added: {}".format(str(slaveinfo)))
def on_slave_heartbeat(self, hostname, now=datetime.utcnow()):
def on_slave_heartbeat(self, hostname, now=datetime.now(pytz.utc)):
# type: (str, datetime) -> ()
u_hostname = hostname.upper()
if u_hostname in self._heart_beat_table:
@ -123,7 +124,7 @@ class HpcClusterManager(object):
if self._heart_beat_table[u_hostname].state == HpcState.Provisioning:
with self._table_lock:
if self._heart_beat_table[u_hostname].state == HpcState.Provisioning:
self._set_nodes_configuring(u_hostname)
self._set_nodes_configuring([u_hostname])
else:
self.logger.error("Host {} is not recognized. Heartbeat ignored.".format(u_hostname))
self.logger.debug("_table {} ".format(self._heart_beat_table))
@ -165,7 +166,7 @@ class HpcClusterManager(object):
return True
return False
def _check_timeout(self, now=datetime.utcnow()):
def _check_timeout(self, now=datetime.now(pytz.utc)):
# type: (datetime) -> ([SlaveInfo], [SlaveInfo], [SlaveInfo])
# TODO: Check configuring timeout
provision_timeout_list = []
@ -276,8 +277,8 @@ class HpcClusterManager(object):
self.logger.info("Nodes configured: {}".format(configured_node_names))
self._set_nodes_running(configured_node_names)
if missing_nodes:
# Missing is valid state of nodes in configuring.
self.logger.info("Nodes missing when configuring: {}".format(missing_nodes))
self._set_nodes_closed(missing_nodes)
def _check_runaway_and_idle_compute_nodes(self):
# type: () -> ()
@ -312,7 +313,7 @@ class HpcClusterManager(object):
self.logger.info("Get idle_timeout_nodes:{}".format(str(idle_timeout_nodes)))
self._set_nodes_draining(idle_timeout_nodes)
def _check_node_idle_timeout(self, node_names, now=datetime.utcnow()):
def _check_node_idle_timeout(self, node_names, now=datetime.now(pytz.utc)):
# type: (Iterable[str], datetime) -> [str]
new_node_idle_check_table = {}
for u_node_name in _upper_strings(node_names):
@ -325,6 +326,7 @@ class HpcClusterManager(object):
else:
new_node_idle_check_table[u_node_name] = now
self._node_idle_check_table = new_node_idle_check_table
self.logger.info("_check_node_idle_timeout: now - " + str(now))
self.logger.info("_check_node_idle_timeout: " + str(self._node_idle_check_table))
return [name for name, value in self._node_idle_check_table.iteritems() if
(now - value) >= self._node_idle_timedelta]