зеркало из https://github.com/microsoft/antares.git
Ab debug (#235)
* rehase evaluation inputs to reduce precision errors * add AB_DEBUG option for displaying outputs values
This commit is contained in:
Родитель
e2219f6a2c
Коммит
fe0098a855
3
Makefile
3
Makefile
|
@ -10,6 +10,7 @@ DEVICE_NAME ?=
|
|||
HOST_MODE ?= 0
|
||||
EXPECTED_TIMEOUT ?= inf
|
||||
BATCH ?=
|
||||
AB_DEBUG ?= 0
|
||||
|
||||
CPU_THREADS ?=
|
||||
INNER_CMD = ./antares/run.sh
|
||||
|
@ -18,7 +19,7 @@ BACKEND = $(shell ./antares/get_backend.sh)
|
|||
PARAMS ?= docker run -v $(shell pwd):/antares -w /antares --privileged -v /:/host \
|
||||
--shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
|
||||
-v $(shell dirname `find /usr/lib/ -name libnvidia-ptxjitcompiler.so` 2>/dev/null | tail -n 1):/usr/local/nvidia/lib64 \
|
||||
-v $(shell pwd)/public/roc_prof:/usr/local/bin/rp -e CPU_THREADS=$(CPU_THREADS) -e RECORD=$(RECORD) -e BATCH=$(BATCH) \
|
||||
-v $(shell pwd)/public/roc_prof:/usr/local/bin/rp -e CPU_THREADS=$(CPU_THREADS) -e RECORD=$(RECORD) -e BATCH=$(BATCH) -e AB_DEBUG=$(AB_DEBUG) \
|
||||
-e STEP=$(STEP) -e AGENT_URL=$(value AGENT_URL) -e TUNER=$(TUNER) -e CONFIG='$(value CONFIG)' -e BACKEND=$(BACKEND) -e COMPUTE_V1='$(value COMPUTE_V1)' \
|
||||
-e COMMIT=$(COMMIT) -e HARDWARE_CONFIG=$(HARDWARE_CONFIG) -e DEVICE_NAME='$(value DEVICE_NAME)' -e EXPECTED_TIMEOUT=$(EXPECTED_TIMEOUT)
|
||||
|
||||
|
|
|
@ -171,8 +171,11 @@ struct ExecutionModule {
|
|||
std::string backend;
|
||||
|
||||
void *hModule;
|
||||
bool debug_output;
|
||||
|
||||
ExecutionModule(std::string source) {
|
||||
debug_output = getenv("AB_DEBUG") && *getenv("AB_DEBUG") ? atoi(getenv("AB_DEBUG")) : 0;
|
||||
|
||||
static const char file_proto[] = "file://";
|
||||
|
||||
if (0 == strncmp(source.c_str(), file_proto, sizeof(file_proto) - 1)) {
|
||||
|
@ -264,7 +267,26 @@ struct ExecutionModule {
|
|||
if (--tensor_used[it->in_args[i]] == 0) {
|
||||
ab::release(tensor_memory[it->in_args[i]], local_tensors[it->in_args[i]].mem_size());
|
||||
}
|
||||
|
||||
if (debug_output) {
|
||||
for (auto &arg: it->out_args) {
|
||||
char d[32];
|
||||
ab::memcpyDtoH(d, tensor_memory[arg], sizeof(d));
|
||||
ab::synchronize();
|
||||
if (local_tensors[arg].dtype == "float32")
|
||||
fprintf(stderr, "[DEBUG] %s(%s) = %g, %g, %g, %g ..\n", arg.c_str(), local_tensors[arg].dtype.c_str(), ((float*)d)[0], ((float*)d)[1], ((float*)d)[2], ((float*)d)[3]);
|
||||
else if (local_tensors[arg].dtype == "float64")
|
||||
fprintf(stderr, "[DEBUG] %s(%s) = %g, %g, %g, %g ..\n", arg.c_str(), local_tensors[arg].dtype.c_str(), ((double*)d)[0], ((double*)d)[1], ((double*)d)[2], ((double*)d)[3]);
|
||||
else if (local_tensors[arg].dtype == "int32")
|
||||
fprintf(stderr, "[DEBUG] %s(%s) = %d, %d, %d, %d ..\n", arg.c_str(), local_tensors[arg].dtype.c_str(), ((int*)d)[0], ((int*)d)[1], ((int*)d)[2], ((int*)d)[3]);
|
||||
else
|
||||
fprintf(stderr, "[DEBUG] %s(%s) = %016x, %016x, %016x, %016x ..\n", arg.c_str(), local_tensors[arg].dtype.c_str(), ((int*)d)[0], ((int*)d)[1], ((int*)d)[2], ((int*)d)[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (debug_output)
|
||||
fprintf(stderr, "[DEBUG] =======================\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -40,7 +40,10 @@ int main(int argc, char** argv)
|
|||
((int*)hptr.data())[x] = (x + i + 1) % 71;
|
||||
} else if (it.dtype == "float32") {
|
||||
for (size_t x = 0; x < size; ++x)
|
||||
((float*)hptr.data())[x] = (x + i + 1) % 71;
|
||||
((float*)hptr.data())[x] = ((x + i + 1) % 71 - 35.5) * 0.00001;
|
||||
} else if (it.dtype == "float64") {
|
||||
for (size_t x = 0; x < size; ++x)
|
||||
((double*)hptr.data())[x] = ((x + i + 1) % 71 - 35.5) * 0.00001;
|
||||
} else {
|
||||
size_t byte_size = size * it.type_size();
|
||||
for (size_t x = 0; x < byte_size / sizeof(int); ++x)
|
||||
|
@ -71,6 +74,12 @@ int main(int argc, char** argv)
|
|||
if (it.dtype == "int32") {
|
||||
for (size_t x = 0; x < byte_size / sizeof(int); ++x)
|
||||
digest += (x + 1) % 83 * ((int*)hptr.data())[x];
|
||||
} else if (it.dtype == "float32") {
|
||||
for (size_t x = 0; x < byte_size / sizeof(float); ++x)
|
||||
digest += (x + 1) % 83 * ((float*)hptr.data())[x];
|
||||
} else if (it.dtype == "float64") {
|
||||
for (size_t x = 0; x < byte_size / sizeof(double); ++x)
|
||||
digest += (x + 1) % 83 * ((double*)hptr.data())[x];
|
||||
} else {
|
||||
for (size_t x = 0; x < byte_size / sizeof(float); ++x)
|
||||
digest += (x + 1) % 83 * ((float*)hptr.data())[x];
|
||||
|
|
|
@ -65,7 +65,7 @@ class OpTensor:
|
|||
return self.cast(output_dtype)
|
||||
if self._op == 'const' and self._value == 1:
|
||||
return other.cast(output_dtype)
|
||||
return OpTensor('op', {"name": "*", "inputs": [self, other]}, output_dtype)
|
||||
return OpTensor('op', {"name": "*", "inputs": [self.cast(output_dtype), other.cast(output_dtype)]}, output_dtype)
|
||||
|
||||
def __rmul__(self, other):
|
||||
other = OpTensor.parse(other)
|
||||
|
@ -114,7 +114,7 @@ class OpTensor:
|
|||
return self.cast(output_dtype)
|
||||
if self._op == 'const' and self._value == 0:
|
||||
return other.cast(output_dtype)
|
||||
return OpTensor('op', {"name": "+", "inputs": [self, other]}, output_dtype)
|
||||
return OpTensor('op', {"name": "+", "inputs": [self.cast(output_dtype), other.cast(output_dtype)]}, output_dtype)
|
||||
|
||||
def __radd__(self, other):
|
||||
other = OpTensor.parse(other)
|
||||
|
@ -125,7 +125,7 @@ class OpTensor:
|
|||
output_dtype = OpTensor.merge_dtype(self, other)
|
||||
if other._op == 'const' and other._value == 0:
|
||||
return self.cast(output_dtype)
|
||||
return OpTensor('op', {"name": "-", "inputs": [self, other]}, output_dtype)
|
||||
return OpTensor('op', {"name": "-", "inputs": [self.cast(output_dtype), other.cast(output_dtype)]}, output_dtype)
|
||||
|
||||
def __rsub__(self, other):
|
||||
other = OpTensor.parse(other)
|
||||
|
|
Загрузка…
Ссылка в новой задаче