зеркало из https://github.com/microsoft/lisa.git
Stabilize MPI tests for Azure Linux
This commit is contained in:
Родитель
5c8d0258a6
Коммит
81559cd0b8
|
@ -9,10 +9,11 @@ from assertpy import assert_that
|
|||
from retry import retry
|
||||
|
||||
from lisa.base_tools import Cat, Sed, Uname, Wget
|
||||
from lisa.tools.git import Git
|
||||
from lisa.feature import Feature
|
||||
from lisa.features import Disk
|
||||
from lisa.operating_system import CBLMariner, Oracle, Redhat, Ubuntu
|
||||
from lisa.tools import Firewall, Ls, Lspci, Make, Service
|
||||
from lisa.tools import Chmod, Find, Firewall, Ls, Lspci, Make, Service
|
||||
from lisa.tools.tar import Tar
|
||||
from lisa.util import (
|
||||
LisaException,
|
||||
|
@ -466,7 +467,6 @@ class Infiniband(Feature):
|
|||
|
||||
def install_open_mpi(self) -> None:
|
||||
node = self._node
|
||||
# Install Open MPI
|
||||
wget = node.tools[Wget]
|
||||
tar_file = (
|
||||
"https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz"
|
||||
|
@ -497,6 +497,55 @@ class Infiniband(Feature):
|
|||
make.make("", cwd=openmpi_folder, sudo=True)
|
||||
make.make_install(cwd=openmpi_folder, sudo=True)
|
||||
|
||||
def install_intel_mpi_benchmarking_tool(self, tool_names: List[str] = ["IMB-MPI1"]) -> None:
|
||||
# Assumption is we have required mpi package built and installed
|
||||
node = self._node
|
||||
if not isinstance(node.os, CBLMariner):
|
||||
# These tools are included in other distro packages
|
||||
return
|
||||
# Clone and build Intel MPI Benchmarks https://github.com/intel/mpi-benchmarks.git
|
||||
git = node.tools[Git]
|
||||
git.clone(url="https://github.com/intel/mpi-benchmarks.git", cwd=node.working_path)
|
||||
|
||||
imb_src_folder = node.get_pure_path(f"{node.working_path}/mpi-benchmarks")
|
||||
|
||||
find = node.tools[Find]
|
||||
# find mpicc path
|
||||
find_results = find.find_files(
|
||||
node.get_pure_path("/"), "mpicc", sudo=True
|
||||
)
|
||||
assert_that(len(find_results)).described_as(
|
||||
"Could not find location of mpicc from MPI package"
|
||||
).is_greater_than(0)
|
||||
mpicc_path = find_results[0]
|
||||
assert_that(mpicc_path).described_as(
|
||||
"Could not find location of mpicc from MPI package"
|
||||
).is_not_empty()
|
||||
|
||||
# find mpicxx path
|
||||
find_results = find.find_files(
|
||||
node.get_pure_path("/"), "mpicxx", sudo=True
|
||||
)
|
||||
assert_that(len(find_results)).described_as(
|
||||
"Could not find location of mpicxx from MPI package"
|
||||
).is_greater_than(0)
|
||||
mpicxx_path = find_results[0]
|
||||
assert_that(mpicxx_path).described_as(
|
||||
"Could not find location of mpicxx from MPI package"
|
||||
).is_not_empty()
|
||||
|
||||
node.tools[Chmod].chmod(mpicc_path, "755", sudo=True)
|
||||
node.tools[Chmod].chmod(mpicxx_path, "755", sudo=True)
|
||||
|
||||
# tool_names = ["IMB-MPI1", "IMB-RMA", "IMB-NBC"]
|
||||
for tool in tool_names:
|
||||
make = node.tools[Make]
|
||||
make.make(f"{tool} CC={mpicc_path} CXX={mpicxx_path}",
|
||||
cwd=imb_src_folder, sudo=True,
|
||||
shell=False, sendYesCmd=False)
|
||||
node.tools[Chmod].chmod(f"{imb_src_folder}/{tool}", "755", sudo=True)
|
||||
|
||||
|
||||
def install_ibm_mpi(self, platform_mpi_url: str) -> None:
|
||||
node = self._node
|
||||
if isinstance(node.os, Redhat):
|
||||
|
|
|
@ -74,6 +74,8 @@ class Make(Tool):
|
|||
thread_count: int = 0,
|
||||
update_envs: Optional[Dict[str, str]] = None,
|
||||
ignore_error: bool = False,
|
||||
shell: bool = True,
|
||||
sendYesCmd: bool = True
|
||||
) -> ExecutableResult:
|
||||
expected_exit_code: Optional[int] = 0
|
||||
if thread_count == 0:
|
||||
|
@ -95,13 +97,17 @@ class Make(Tool):
|
|||
|
||||
if ignore_error:
|
||||
expected_exit_code = None
|
||||
# yes '' answers all questions with default value.
|
||||
command = ""
|
||||
if sendYesCmd:
|
||||
# yes '' answers all questions with default value.
|
||||
command = "yes '' | "
|
||||
|
||||
result = self.node.execute(
|
||||
f"yes '' | make -j{thread_count} {arguments}",
|
||||
f"{command} make -j{thread_count} {arguments}",
|
||||
cwd=cwd,
|
||||
timeout=timeout,
|
||||
sudo=sudo,
|
||||
shell=True,
|
||||
shell=shell,
|
||||
update_envs=update_envs,
|
||||
expected_exit_code=expected_exit_code,
|
||||
expected_exit_code_failure_message="Failed to make",
|
||||
|
|
|
@ -15,7 +15,7 @@ from lisa import (
|
|||
simple_requirement,
|
||||
)
|
||||
from lisa.features import AvailabilitySetEnabled, Infiniband, Sriov
|
||||
from lisa.operating_system import BSD, Windows
|
||||
from lisa.operating_system import BSD, CBLMariner, Windows
|
||||
from lisa.sut_orchestrator.azure.tools import Waagent
|
||||
from lisa.tools import Find, KernelConfig, Ls, Modprobe, Ssh
|
||||
from lisa.util import (
|
||||
|
@ -286,6 +286,9 @@ class InfinibandSuite(TestSuite):
|
|||
client_ssh.enable_public_key(server_ssh.generate_key_pairs())
|
||||
server_ssh.add_known_host(client_ip)
|
||||
client_ssh.add_known_host(server_ip)
|
||||
sudo=False
|
||||
if isinstance(server_node.os, CBLMariner):
|
||||
sudo=True
|
||||
|
||||
# Note: Using bash because script is not supported by Dash
|
||||
# sh points to dash on Ubuntu
|
||||
|
@ -295,6 +298,7 @@ class InfinibandSuite(TestSuite):
|
|||
"-env I_MPI_FABRICS=shm:ofi -env SECS_PER_SAMPLE=600 "
|
||||
"-env FI_PROVIDER=mlx -env I_MPI_DEBUG=5 -env I_MPI_PIN_DOMAIN=numa "
|
||||
"/opt/intel/oneapi/mpi/2021.1.1/bin/IMB-MPI1 pingpong",
|
||||
sudo=sudo,
|
||||
expected_exit_code=0,
|
||||
expected_exit_code_failure_message="Failed intra-node pingpong test "
|
||||
"with intel mpi",
|
||||
|
@ -306,6 +310,7 @@ class InfinibandSuite(TestSuite):
|
|||
"-env I_MPI_FABRICS=shm:ofi -env SECS_PER_SAMPLE=600 "
|
||||
"-env FI_PROVIDER=mlx -env I_MPI_DEBUG=5 -env I_MPI_PIN_DOMAIN=numa "
|
||||
"/opt/intel/oneapi/mpi/2021.1.1/bin/IMB-MPI1 pingpong",
|
||||
sudo=sudo,
|
||||
expected_exit_code=0,
|
||||
expected_exit_code_failure_message="Failed inter-node pingpong test "
|
||||
"with intel mpi",
|
||||
|
@ -319,6 +324,7 @@ class InfinibandSuite(TestSuite):
|
|||
"-n 44 -env I_MPI_FABRICS=shm:ofi -env SECS_PER_SAMPLE=600 "
|
||||
"-env FI_PROVIDER=mlx -env I_MPI_DEBUG=5 -env I_MPI_PIN_DOMAIN=numa "
|
||||
f"/opt/intel/oneapi/mpi/2021.1.1/bin/{test}",
|
||||
sudo=sudo,
|
||||
expected_exit_code=0,
|
||||
expected_exit_code_failure_message=f"Failed {test} test with intel mpi",
|
||||
timeout=3000,
|
||||
|
@ -360,10 +366,13 @@ class InfinibandSuite(TestSuite):
|
|||
raise SkippedException(err)
|
||||
|
||||
run_in_parallel([server_ib.install_open_mpi, client_ib.install_open_mpi])
|
||||
|
||||
server_node.execute("ldconfig", sudo=True)
|
||||
client_node.execute("ldconfig", sudo=True)
|
||||
|
||||
# Only for mariner, we need to build intel benchmarking tools
|
||||
# as they are not included in our packages
|
||||
server_ib.install_intel_mpi_benchmarking_tool()
|
||||
|
||||
# Restart the ssh sessions for changes to /etc/security/limits.conf
|
||||
# to take effect
|
||||
server_node.close()
|
||||
|
@ -386,7 +395,7 @@ class InfinibandSuite(TestSuite):
|
|||
# Ping Pong test
|
||||
find = server_node.tools[Find]
|
||||
find_results = find.find_files(
|
||||
server_node.get_pure_path("/usr"), "IMB-MPI1", sudo=True
|
||||
server_node.get_pure_path("/"), "IMB-MPI1", sudo=True
|
||||
)
|
||||
assert_that(len(find_results)).described_as(
|
||||
"Could not find location of IMB-MPI1 for Open MPI"
|
||||
|
@ -407,7 +416,7 @@ class InfinibandSuite(TestSuite):
|
|||
|
||||
# IMB-MPI Tests
|
||||
find_results = find.find_files(
|
||||
server_node.get_pure_path("/usr"), "IMB-MPI1", sudo=True
|
||||
server_node.get_pure_path("/"), "IMB-MPI1", sudo=True
|
||||
)
|
||||
assert_that(len(find_results)).described_as(
|
||||
"Could not find location of Open MPI test: IMB-MPI1"
|
||||
|
@ -417,7 +426,7 @@ class InfinibandSuite(TestSuite):
|
|||
"Could not find location of Open MPI test: IMB-MPI1"
|
||||
).is_not_empty()
|
||||
server_node.execute(
|
||||
f"/usr/local/bin/mpirun --host {server_ip},{client_ip} "
|
||||
f"/usr/local/bin/mpirun -hosts {server_ip},{client_ip} "
|
||||
"-n 2 --mca btl self,vader,openib --mca btl_openib_cq_size 4096 "
|
||||
"--mca btl_openib_allow_ib 1 --mca "
|
||||
f"btl_openib_warn_no_device_params_found 0 {test_path}",
|
||||
|
@ -571,6 +580,12 @@ class InfinibandSuite(TestSuite):
|
|||
raise SkippedException(err)
|
||||
|
||||
run_in_parallel([server_ib.install_mvapich_mpi, client_ib.install_mvapich_mpi])
|
||||
test_names = ["IMB-MPI1", "IMB-RMA", "IMB-NBC"]
|
||||
# Only for mariner, we need to build intel benchmarking tools
|
||||
# as they are not included in our packages
|
||||
server_ib.install_intel_mpi_benchmarking_tool(tool_names=test_names)
|
||||
|
||||
server_node.execute("ldconfig", sudo=True)
|
||||
|
||||
# Restart the ssh sessions for changes to /etc/security/limits.conf
|
||||
# to take effect
|
||||
|
@ -590,13 +605,15 @@ class InfinibandSuite(TestSuite):
|
|||
client_ssh.enable_public_key(server_ssh.generate_key_pairs())
|
||||
server_ssh.add_known_host(client_ip)
|
||||
client_ssh.add_known_host(server_ip)
|
||||
sudo=False
|
||||
if isinstance(server_node.os, CBLMariner):
|
||||
sudo=True
|
||||
|
||||
# Run MPI tests
|
||||
find = server_node.tools[Find]
|
||||
test_names = ["IMB-MPI1", "IMB-RMA", "IMB-NBC"]
|
||||
for test in test_names:
|
||||
find_results = find.find_files(
|
||||
server_node.get_pure_path("/usr"), test, sudo=True
|
||||
server_node.get_pure_path("/"), test, sudo=True
|
||||
)
|
||||
assert_that(len(find_results)).described_as(
|
||||
f"Could not find location of MVAPICH MPI test: {test}"
|
||||
|
@ -611,6 +628,7 @@ class InfinibandSuite(TestSuite):
|
|||
expected_exit_code=0,
|
||||
expected_exit_code_failure_message=f"Failed {test} test "
|
||||
"with MVAPICH MPI",
|
||||
sudo=sudo
|
||||
)
|
||||
|
||||
def _check_nd_enabled(self, node: Node) -> None:
|
||||
|
|
Загрузка…
Ссылка в новой задаче