* rehase evaluation inputs to reduce precision errors
* add AB_DEBUG option for displaying outputs values
This commit is contained in:
ghostplant 2021-04-14 09:09:22 +00:00 коммит произвёл GitHub
Родитель e2219f6a2c
Коммит fe0098a855
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 37 добавлений и 5 удалений

Просмотреть файл

@ -10,6 +10,7 @@ DEVICE_NAME ?=
HOST_MODE ?= 0
EXPECTED_TIMEOUT ?= inf
BATCH ?=
AB_DEBUG ?= 0
CPU_THREADS ?=
INNER_CMD = ./antares/run.sh
@ -18,7 +19,7 @@ BACKEND = $(shell ./antares/get_backend.sh)
PARAMS ?= docker run -v $(shell pwd):/antares -w /antares --privileged -v /:/host \
--shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-v $(shell dirname `find /usr/lib/ -name libnvidia-ptxjitcompiler.so` 2>/dev/null | tail -n 1):/usr/local/nvidia/lib64 \
-v $(shell pwd)/public/roc_prof:/usr/local/bin/rp -e CPU_THREADS=$(CPU_THREADS) -e RECORD=$(RECORD) -e BATCH=$(BATCH) \
-v $(shell pwd)/public/roc_prof:/usr/local/bin/rp -e CPU_THREADS=$(CPU_THREADS) -e RECORD=$(RECORD) -e BATCH=$(BATCH) -e AB_DEBUG=$(AB_DEBUG) \
-e STEP=$(STEP) -e AGENT_URL=$(value AGENT_URL) -e TUNER=$(TUNER) -e CONFIG='$(value CONFIG)' -e BACKEND=$(BACKEND) -e COMPUTE_V1='$(value COMPUTE_V1)' \
-e COMMIT=$(COMMIT) -e HARDWARE_CONFIG=$(HARDWARE_CONFIG) -e DEVICE_NAME='$(value DEVICE_NAME)' -e EXPECTED_TIMEOUT=$(EXPECTED_TIMEOUT)

Просмотреть файл

@ -171,8 +171,11 @@ struct ExecutionModule {
std::string backend;
void *hModule;
bool debug_output;
ExecutionModule(std::string source) {
debug_output = getenv("AB_DEBUG") && *getenv("AB_DEBUG") ? atoi(getenv("AB_DEBUG")) : 0;
static const char file_proto[] = "file://";
if (0 == strncmp(source.c_str(), file_proto, sizeof(file_proto) - 1)) {
@ -264,7 +267,26 @@ struct ExecutionModule {
if (--tensor_used[it->in_args[i]] == 0) {
ab::release(tensor_memory[it->in_args[i]], local_tensors[it->in_args[i]].mem_size());
}
if (debug_output) {
for (auto &arg: it->out_args) {
char d[32];
ab::memcpyDtoH(d, tensor_memory[arg], sizeof(d));
ab::synchronize();
if (local_tensors[arg].dtype == "float32")
fprintf(stderr, "[DEBUG] %s(%s) = %g, %g, %g, %g ..\n", arg.c_str(), local_tensors[arg].dtype.c_str(), ((float*)d)[0], ((float*)d)[1], ((float*)d)[2], ((float*)d)[3]);
else if (local_tensors[arg].dtype == "float64")
fprintf(stderr, "[DEBUG] %s(%s) = %g, %g, %g, %g ..\n", arg.c_str(), local_tensors[arg].dtype.c_str(), ((double*)d)[0], ((double*)d)[1], ((double*)d)[2], ((double*)d)[3]);
else if (local_tensors[arg].dtype == "int32")
fprintf(stderr, "[DEBUG] %s(%s) = %d, %d, %d, %d ..\n", arg.c_str(), local_tensors[arg].dtype.c_str(), ((int*)d)[0], ((int*)d)[1], ((int*)d)[2], ((int*)d)[3]);
else
fprintf(stderr, "[DEBUG] %s(%s) = %016x, %016x, %016x, %016x ..\n", arg.c_str(), local_tensors[arg].dtype.c_str(), ((int*)d)[0], ((int*)d)[1], ((int*)d)[2], ((int*)d)[3]);
}
}
}
if (debug_output)
fprintf(stderr, "[DEBUG] =======================\n");
return 0;
}
};

Просмотреть файл

@ -40,7 +40,10 @@ int main(int argc, char** argv)
((int*)hptr.data())[x] = (x + i + 1) % 71;
} else if (it.dtype == "float32") {
for (size_t x = 0; x < size; ++x)
((float*)hptr.data())[x] = (x + i + 1) % 71;
((float*)hptr.data())[x] = ((x + i + 1) % 71 - 35.5) * 0.00001;
} else if (it.dtype == "float64") {
for (size_t x = 0; x < size; ++x)
((double*)hptr.data())[x] = ((x + i + 1) % 71 - 35.5) * 0.00001;
} else {
size_t byte_size = size * it.type_size();
for (size_t x = 0; x < byte_size / sizeof(int); ++x)
@ -71,6 +74,12 @@ int main(int argc, char** argv)
if (it.dtype == "int32") {
for (size_t x = 0; x < byte_size / sizeof(int); ++x)
digest += (x + 1) % 83 * ((int*)hptr.data())[x];
} else if (it.dtype == "float32") {
for (size_t x = 0; x < byte_size / sizeof(float); ++x)
digest += (x + 1) % 83 * ((float*)hptr.data())[x];
} else if (it.dtype == "float64") {
for (size_t x = 0; x < byte_size / sizeof(double); ++x)
digest += (x + 1) % 83 * ((double*)hptr.data())[x];
} else {
for (size_t x = 0; x < byte_size / sizeof(float); ++x)
digest += (x + 1) % 83 * ((float*)hptr.data())[x];

Просмотреть файл

@ -65,7 +65,7 @@ class OpTensor:
return self.cast(output_dtype)
if self._op == 'const' and self._value == 1:
return other.cast(output_dtype)
return OpTensor('op', {"name": "*", "inputs": [self, other]}, output_dtype)
return OpTensor('op', {"name": "*", "inputs": [self.cast(output_dtype), other.cast(output_dtype)]}, output_dtype)
def __rmul__(self, other):
other = OpTensor.parse(other)
@ -114,7 +114,7 @@ class OpTensor:
return self.cast(output_dtype)
if self._op == 'const' and self._value == 0:
return other.cast(output_dtype)
return OpTensor('op', {"name": "+", "inputs": [self, other]}, output_dtype)
return OpTensor('op', {"name": "+", "inputs": [self.cast(output_dtype), other.cast(output_dtype)]}, output_dtype)
def __radd__(self, other):
other = OpTensor.parse(other)
@ -125,7 +125,7 @@ class OpTensor:
output_dtype = OpTensor.merge_dtype(self, other)
if other._op == 'const' and other._value == 0:
return self.cast(output_dtype)
return OpTensor('op', {"name": "-", "inputs": [self, other]}, output_dtype)
return OpTensor('op', {"name": "-", "inputs": [self.cast(output_dtype), other.cast(output_dtype)]}, output_dtype)
def __rsub__(self, other):
other = OpTensor.parse(other)