зеркало из https://github.com/Azure/aztk.git
make it work with multiple nodes
This commit is contained in:
Родитель
abd50d246a
Коммит
43b5307784
|
@ -31,7 +31,7 @@ class ResourceMonitorPlugin(PluginConfiguration):
|
|||
PluginFile(".env", os.path.join(dir_path, ".env")),
|
||||
PluginFile("docker-compose.yml", os.path.join(dir_path, "docker-compose.yml")),
|
||||
PluginFile("nodestats.py", os.path.join(dir_path, "nodestats.py")),
|
||||
PluginFile("requrements.txt", os.path.join(dir_path, "requrements.txt")),
|
||||
PluginFile("requirements.txt", os.path.join(dir_path, "requirements.txt")),
|
||||
PluginFile("resource_monitor_dashboard.json",
|
||||
os.path.join(dir_path, "resource_monitor_dashboard.json")),
|
||||
],
|
||||
|
|
|
@ -143,7 +143,7 @@ class NodeStatsCollector:
|
|||
Node Stats Manager class
|
||||
"""
|
||||
|
||||
def __init__(self, pool_id, node_id, is_master, refresh_interval=_DEFAULT_STATS_UPDATE_INTERVAL):
|
||||
def __init__(self, host, pool_id, node_id, is_master, refresh_interval=_DEFAULT_STATS_UPDATE_INTERVAL):
|
||||
self.pool_id = pool_id
|
||||
self.node_id = node_id
|
||||
self.is_master = is_master
|
||||
|
@ -153,7 +153,7 @@ class NodeStatsCollector:
|
|||
|
||||
self.disk = IOThroughputAggregator()
|
||||
self.network = IOThroughputAggregator()
|
||||
self.telemetry_client = InfluxDBClient(HOST, PORT, USER, PASSWORD, DBNAME)
|
||||
self.telemetry_client = InfluxDBClient(host, PORT, USER, PASSWORD, DBNAME)
|
||||
|
||||
def init(self):
|
||||
"""
|
||||
|
@ -313,12 +313,21 @@ def main():
|
|||
logger.info("Operating system: %s", os_environment())
|
||||
logger.info("Cpu count: %s", psutil.cpu_count())
|
||||
|
||||
pool_id = os.environ.get('AZ_BATCH_POOL_ID', '_test-pool-1')
|
||||
node_id = os.environ.get('AZ_BATCH_NODE_ID', '_test-node-1')
|
||||
is_master = os.environ.get('AZTK_IS_MASTER', False)
|
||||
|
||||
host = HOST
|
||||
if len(sys.argv) > 0:
|
||||
HOST = sys.argv[1]
|
||||
host = sys.argv[1]
|
||||
is_master = sys.argv[2]
|
||||
pool_id = sys.argv[3]
|
||||
node_id = sys.argv[4]
|
||||
|
||||
if pool_id is None:
|
||||
pool_id = os.environ.get('AZ_BATCH_POOL_ID', '_test-pool-1')
|
||||
|
||||
if node_id is None:
|
||||
node_id = os.environ.get('AZ_BATCH_NODE_ID', '_test-node-1')
|
||||
|
||||
if is_master is None:
|
||||
is_master = os.environ.get('AZTK_IS_MASTER', "0")
|
||||
|
||||
logger.info('setting host to {}'.format(HOST))
|
||||
|
||||
|
@ -326,10 +335,11 @@ def main():
|
|||
logger.info('enabling event loop debug mode')
|
||||
logger.info('cluster_id {}'.format(pool_id))
|
||||
logger.info('node_id {}'.format(node_id))
|
||||
logger.info('is_master {}'.format(is_master))
|
||||
|
||||
|
||||
# create node stats manager
|
||||
collector = NodeStatsCollector(pool_id, node_id, is_master)
|
||||
collector = NodeStatsCollector(host, pool_id, node_id, is_master)
|
||||
collector.init()
|
||||
collector.run()
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"id": 1,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
|
@ -33,6 +33,7 @@
|
|||
},
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
|
@ -55,11 +56,11 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"$$hashKey": "object:283",
|
||||
"$$hashKey": "object:272",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"Cpu #"
|
||||
"hostName"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
|
@ -101,7 +102,6 @@
|
|||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:937",
|
||||
"format": "percent",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
|
@ -110,7 +110,6 @@
|
|||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:938",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
|
@ -159,7 +158,6 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"$$hashKey": "object:1274",
|
||||
"groupBy": [],
|
||||
"measurement": "Network read",
|
||||
"orderByTime": "ASC",
|
||||
|
@ -179,7 +177,6 @@
|
|||
"tags": []
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:1338",
|
||||
"groupBy": [],
|
||||
"measurement": "Disk write",
|
||||
"orderByTime": "ASC",
|
||||
|
@ -218,7 +215,6 @@
|
|||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:1502",
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
|
@ -227,7 +223,6 @@
|
|||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:1503",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
|
@ -274,7 +269,6 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"$$hashKey": "object:615",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
|
@ -320,7 +314,6 @@
|
|||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:652",
|
||||
"format": "decbytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
|
@ -329,7 +322,6 @@
|
|||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:653",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
|
@ -378,6 +370,6 @@
|
|||
},
|
||||
"timezone": "",
|
||||
"title": "Perf Counters",
|
||||
"uid": null,
|
||||
"version": 0
|
||||
"uid": "h5zSugWmk",
|
||||
"version": 1
|
||||
}
|
||||
|
|
|
@ -4,8 +4,11 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|||
sudo curl -L https://github.com/docker/compose/releases/download/1.21.0/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose
|
||||
sudo chmod +x /usr/local/bin/docker-compose
|
||||
|
||||
cd $DIR
|
||||
|
||||
# Install pip requirements
|
||||
echo "Install pip requirements "
|
||||
sudo chmod 777 requirements.txt
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
if [ "$AZTK_IS_MASTER" = "1" ]; then
|
||||
|
@ -13,9 +16,11 @@ if [ "$AZTK_IS_MASTER" = "1" ]; then
|
|||
sudo docker-compose up --no-start
|
||||
echo "Run the containers"
|
||||
sudo docker-compose start
|
||||
else
|
||||
AZTK_IS_MASTER=0
|
||||
fi
|
||||
|
||||
echo "Run nodestats in background"
|
||||
sudo touch nodestats.out
|
||||
sudo chmod 777 nodestats.out
|
||||
sudo python3 $DIR/nodestats.py > nodestats.out 2>&1 $AZTK_MASTER_IP &
|
||||
sudo python3 nodestats.py > nodestats.out 2>&1 $AZTK_MASTER_IP $AZTK_IS_MASTER $AZ_BATCH_POOL_ID $AZ_BATCH_NODE_ID &
|
||||
|
|
Загрузка…
Ссылка в новой задаче