зеркало из https://github.com/microsoft/DeepSpeed.git
Update clang-format version from 16 to 18. (#5839)
We used a slightly old version of clang-format before, this caused issues when folks installed the latest via apt or similar rather than python to try and fix their formatting issues. Plus installing older versions is a pain and the formatting style of the newer version seems better?
This commit is contained in:
Родитель
0584689d43
Коммит
297a6840e1
|
@ -301,9 +301,8 @@ int regular_read(const char* filename, std::vector<char>& buffer)
|
|||
} while (r > 0);
|
||||
|
||||
if (read_bytes != num_bytes) {
|
||||
std::cerr << "read error "
|
||||
<< " read_bytes (read) = " << read_bytes << " num_bytes (fstat) = " << num_bytes
|
||||
<< std::endl;
|
||||
std::cerr << "read error " << " read_bytes (read) = " << read_bytes
|
||||
<< " num_bytes (fstat) = " << num_bytes << std::endl;
|
||||
}
|
||||
assert(read_bytes == num_bytes);
|
||||
close(fd);
|
||||
|
|
|
@ -72,9 +72,8 @@ int deepspeed_py_aio_write(const torch::Tensor& buffer,
|
|||
|
||||
const std::chrono::duration<double> fn_time =
|
||||
std::chrono::high_resolution_clock::now() - start_time;
|
||||
std::cout << "Elapsed time(usec): "
|
||||
<< "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
|
||||
<< std::endl;
|
||||
std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6
|
||||
<< " call = " << fn_time.count() * 1e6 << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -118,8 +117,7 @@ int deepspeed_py_aio_read(torch::Tensor& buffer,
|
|||
|
||||
const std::chrono::duration<double> fn_time =
|
||||
std::chrono::high_resolution_clock::now() - start_time;
|
||||
std::cout << "Elapsed time(usec): "
|
||||
<< "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
|
||||
<< std::endl;
|
||||
std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6
|
||||
<< " call = " << fn_time.count() * 1e6 << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -93,9 +93,8 @@ int deepspeed_aio_handle_t::read(torch::Tensor& buffer, const char* filename, co
|
|||
if (validate) { validate_aio_operation(true, filename, read_buffer, num_file_bytes); }
|
||||
const std::chrono::duration<double> fn_time =
|
||||
std::chrono::high_resolution_clock::now() - start_time;
|
||||
std::cout << "Elapsed time(usec): "
|
||||
<< "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
|
||||
<< std::endl;
|
||||
std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6
|
||||
<< " call = " << fn_time.count() * 1e6 << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -128,9 +127,8 @@ int deepspeed_aio_handle_t::write(const torch::Tensor& buffer,
|
|||
|
||||
const std::chrono::duration<double> fn_time =
|
||||
std::chrono::high_resolution_clock::now() - start_time;
|
||||
std::cout << "Elapsed time(usec): "
|
||||
<< "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
|
||||
<< std::endl;
|
||||
std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6
|
||||
<< " call = " << fn_time.count() * 1e6 << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
|
|||
#include "deepspeed_py_copy.h"
|
||||
#include <omp.h>
|
||||
|
||||
#define ROUND_DOWN(size, step) ((size) & ~((step)-1))
|
||||
#define ROUND_DOWN(size, step) ((size) & ~((step) - 1))
|
||||
|
||||
#if defined(__AVX512__) or defined(__AVX256__)
|
||||
union AVX_Data {
|
||||
|
|
|
@ -125,11 +125,10 @@ struct CheckArch {
|
|||
std::cerr << #PTR " is not correctly aligned\n"; \
|
||||
return false; \
|
||||
}
|
||||
#define EVOFORMER_CHECK(COND, ERR) \
|
||||
if (!(COND)) { \
|
||||
std::cerr << "[Evoformer Attention]" \
|
||||
<< "'" #COND "' failed: " << ERR << "\n"; \
|
||||
return false; \
|
||||
#define EVOFORMER_CHECK(COND, ERR) \
|
||||
if (!(COND)) { \
|
||||
std::cerr << "[Evoformer Attention]" << "'" #COND "' failed: " << ERR << "\n"; \
|
||||
return false; \
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ inline void writeAs(void* dst, const T& val)
|
|||
std::memcpy(dst, &val, sizeof(T));
|
||||
}
|
||||
|
||||
#define ROUND_DOWN(size, step) ((size) & ~((step)-1))
|
||||
#define ROUND_DOWN(size, step) ((size) & ~((step) - 1))
|
||||
|
||||
#if defined(__AVX512__)
|
||||
#define SIMD_STORE(a, d) _mm512_storeu_ps(a, d)
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#define TILE (128 * 1024 * 1024)
|
||||
#if defined(__AVX512__) or defined(__AVX256__)
|
||||
|
||||
#define ROUND_DOWN(size, step) ((size) & ~((step)-1))
|
||||
#define ROUND_DOWN(size, step) ((size) & ~((step) - 1))
|
||||
|
||||
#if defined(__AVX512__)
|
||||
#define SIMD_STORE(a, d) _mm512_storeu_ps(a, d)
|
||||
|
|
|
@ -82,11 +82,11 @@
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
__inline__ __attribute__((always_inline)) T reduce_block_into_lanes(
|
||||
T* x,
|
||||
T val,
|
||||
int lanes = 1,
|
||||
bool share_result = false) // lanes is intended to be <= 32.
|
||||
__inline__ __attribute__((always_inline)) T
|
||||
reduce_block_into_lanes(T* x,
|
||||
T val,
|
||||
int lanes = 1,
|
||||
bool share_result = false) // lanes is intended to be <= 32.
|
||||
{
|
||||
auto item_ct1 = sycl::ext::oneapi::experimental::this_nd_item<3>();
|
||||
int tid = item_ct1.get_local_id(2) + item_ct1.get_local_id(1) * item_ct1.get_local_range(2);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
accelerate
|
||||
clang-format==16.0.2
|
||||
clang-format==18.1.3
|
||||
comet_ml>=3.41.0
|
||||
deepspeed-kernels ; sys_platform == 'linux'
|
||||
docutils<0.18
|
||||
|
|
Загрузка…
Ссылка в новой задаче