habanalabs: increase timeout if working with simulator
Where there is a spike in the CPU consumption, it may cause random failures in the C/I since the KMD timeout for CPU and/or QMAN0 jobs expires and it stops communicating to the simulator. This commit fixes it by increasing timeout on polling functions if working with simulator. Signed-off-by: Dalit Ben Zoor <dbenzoor@habana.ai> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
Родитель
f0539fb0fb
Коммит
b1b537713e
|
@ -1147,7 +1147,13 @@ int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr,
|
||||||
* either by the direct access of the device or by another core
|
* either by the direct access of the device or by another core
|
||||||
*/
|
*/
|
||||||
u32 *paddr = (u32 *) (uintptr_t) addr;
|
u32 *paddr = (u32 *) (uintptr_t) addr;
|
||||||
ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
|
ktime_t timeout;
|
||||||
|
|
||||||
|
/* timeout should be longer when working with simulator */
|
||||||
|
if (!hdev->pdev)
|
||||||
|
timeout_us *= 10;
|
||||||
|
|
||||||
|
timeout = ktime_add_us(ktime_get(), timeout_us);
|
||||||
|
|
||||||
might_sleep();
|
might_sleep();
|
||||||
|
|
||||||
|
|
|
@ -1042,7 +1042,12 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
||||||
|
|
||||||
#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
|
#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
|
||||||
({ \
|
({ \
|
||||||
ktime_t __timeout = ktime_add_us(ktime_get(), timeout_us); \
|
ktime_t __timeout; \
|
||||||
|
/* timeout should be longer when working with simulator */ \
|
||||||
|
if (hdev->pdev) \
|
||||||
|
__timeout = ktime_add_us(ktime_get(), timeout_us); \
|
||||||
|
else \
|
||||||
|
__timeout = ktime_add_us(ktime_get(), (timeout_us * 10)); \
|
||||||
might_sleep_if(sleep_us); \
|
might_sleep_if(sleep_us); \
|
||||||
for (;;) { \
|
for (;;) { \
|
||||||
(val) = RREG32(addr); \
|
(val) = RREG32(addr); \
|
||||||
|
|
Загрузка…
Ссылка в новой задаче