[ruby/prism] Faster pm_integer_parse pm_integer_string using karatsuba algorithm

https://github.com/ruby/prism/commit/ae4fb6b988
This commit is contained in:
tompng 2024-02-26 22:05:30 +09:00 коммит произвёл Kevin Newton
Родитель 4186609d88
Коммит 5113d6b059
1 изменённых файлов: 290 добавлений и 120 удалений

Просмотреть файл

@ -1,117 +1,139 @@
#include "prism/util/pm_integer.h" #include "prism/util/pm_integer.h"
/** /**
* Create a new node for an integer in the linked list. * Bigint with arbitary base. In practice, base is 1<<32 or 10**9.
* When base is 10**9, it acts as bigdecimal.
*/ */
static pm_integer_word_t * typedef struct {
pm_integer_node_create(pm_integer_t *integer, uint32_t value) { size_t length;
integer->length++; uint32_t *values;
} bigint_t;
pm_integer_word_t *node = xmalloc(sizeof(pm_integer_word_t));
if (node == NULL) return NULL;
*node = (pm_integer_word_t) { .next = NULL, .value = value };
return node;
}
/** /**
* Copy one integer onto another. * Adds two bigint_t with the given base.
*/ */
static void static bigint_t
pm_integer_copy(pm_integer_t *dest, const pm_integer_t *src) { big_add(bigint_t left, bigint_t right, uint64_t base) {
dest->negative = src->negative; size_t length = (left.length < right.length ? right.length : left.length);
dest->length = 0; uint32_t *values = (uint32_t*) malloc(sizeof(uint32_t) * (length + 1));
uint64_t carry = 0;
dest->head.value = src->head.value; for (size_t i = 0; i < length; i++) {
dest->head.next = NULL; uint64_t sum = carry + (i < left.length ? left.values[i] : 0) + (i < right.length ? right.values[i] : 0);
values[i] = (uint32_t) (sum % base);
pm_integer_word_t *dest_current = &dest->head; carry = sum / base;
const pm_integer_word_t *src_current = src->head.next;
while (src_current != NULL) {
dest_current->next = pm_integer_node_create(dest, src_current->value);
if (dest_current->next == NULL) return;
dest_current = dest_current->next;
src_current = src_current->next;
} }
if (carry > 0) {
dest_current->next = NULL; values[length] = (uint32_t) carry;
length++;
}
return (bigint_t) { length, values };
} }
/** /**
* Add a 32-bit integer to an integer. * Calculates `a - b - c` with the given base.
* Result is assumed to be positive value. Internal use for karatsuba_multiply.
*/ */
static void static bigint_t
pm_integer_add(pm_integer_t *integer, uint32_t addend) { big_sub2(bigint_t a, bigint_t b, bigint_t c, uint64_t base) {
uint32_t carry = addend; size_t length = a.length;
pm_integer_word_t *current = &integer->head; uint32_t *values = (uint32_t*) malloc(sizeof(uint32_t) * length);
int64_t carry = 0;
for (size_t i = 0; i < length; i++) {
int64_t sub = carry + a.values[i] - (i < b.length ? b.values[i] : 0) - (i < c.length ? c.values[i] : 0);
if (sub >= 0) {
values[i] = (uint32_t) sub;
carry = 0;
} else {
sub += 2 * (int64_t) base;
values[i] = (uint32_t) ((uint64_t) sub % base);
carry = sub / (int64_t) base - 2;
}
}
while (length > 1 && values[length - 1] == 0) length--;
return (bigint_t) { length, values };
}
while (carry > 0) { /**
uint64_t result = (uint64_t) current->value + carry; * Multiply two bigint_t with the given base using karatsuba algorithm.
carry = (uint32_t) (result >> 32); */
current->value = (uint32_t) result; static bigint_t
karatsuba_multiply(bigint_t left, bigint_t right, uint64_t base) {
if (carry > 0) { if (left.length > right.length) {
if (current->next == NULL) { bigint_t temp = left;
current->next = pm_integer_node_create(integer, carry); left = right;
break; right = temp;
}
if (left.length <= 10) {
size_t length = left.length + right.length;
uint32_t *values = (uint32_t*) calloc(length, sizeof(uint32_t));
for (size_t i = 0; i < left.length; i++) {
uint32_t carry = 0;
for (size_t j = 0; j < right.length; j++) {
uint64_t product = (uint64_t) left.values[i] * right.values[j] + values[i + j] + carry;
values[i + j] = (uint32_t) (product % base);
carry = (uint32_t) (product / base);
} }
values[i + right.length] = carry;
current = current->next;
} }
while (length > 1 && values[length - 1] == 0) length--;
return (bigint_t) { length, values };
} }
} if (left.length * 2 <= right.length) {
uint32_t *values = (uint32_t*) calloc(left.length + right.length, sizeof(uint32_t));
for (size_t start_offset = 0; start_offset < right.length; start_offset += left.length) {
size_t end_offset = start_offset + left.length;
if (end_offset > right.length) end_offset = right.length;
bigint_t sliced_right = { end_offset - start_offset, right.values + start_offset };
bigint_t v = karatsuba_multiply(left, sliced_right, base);
uint32_t carry = 0;
for (size_t i = 0; i < v.length; i++) {
uint64_t sum = (uint64_t) values[start_offset + i] + v.values[i] + carry;
values[start_offset + i] = (uint32_t) (sum % base);
carry = (uint32_t) (sum / base);
}
free(v.values);
values[start_offset + v.length] += carry;
}
return (bigint_t) { left.length + right.length, values };
}
size_t half = left.length / 2;
bigint_t x0 = { half, left.values };
bigint_t x1 = { left.length - half, left.values + half };
bigint_t y0 = { half, right.values };
bigint_t y1 = { right.length - half, right.values + half };
bigint_t z0 = karatsuba_multiply(x0, y0, base);
bigint_t z2 = karatsuba_multiply(x1, y1, base);
/** // For simplicity to avoid considering negative values,
* Multiple an integer by a 32-bit integer. In practice, the multiplier is the // use `z1 = (x0 + x1) * (y0 + y1) - z0 - z2` instead of original karatsuba algorithm.
* base of the integer, so this is 2, 8, 10, or 16. bigint_t x01 = big_add(x0, x1, base);
*/ bigint_t y01 = big_add(y0, y1, base);
static void bigint_t xy = karatsuba_multiply(x01, y01, base);
pm_integer_multiply(pm_integer_t *integer, uint32_t multiplier) { bigint_t z1 = big_sub2(xy, z0, z2, base);
size_t length = left.length + right.length;
uint32_t *values = (uint32_t*) calloc(length, sizeof(uint32_t));
memcpy(values, z0.values, sizeof(uint32_t) * z0.length);
memcpy(values + 2 * half, z2.values, sizeof(uint32_t) * z2.length);
uint32_t carry = 0; uint32_t carry = 0;
for(size_t i = 0; i < z1.length; i++) {
for (pm_integer_word_t *current = &integer->head; current != NULL; current = current->next) { uint64_t sum = (uint64_t) carry + values[i + half] + z1.values[i];
uint64_t result = (uint64_t) current->value * multiplier + carry; values[i + half] = (uint32_t) (sum % base);
carry = (uint32_t) (result >> 32); carry = (uint32_t) (sum / base);
current->value = (uint32_t) result;
if (carry > 0 && current->next == NULL) {
current->next = pm_integer_node_create(integer, carry);
break;
}
} }
} for(size_t i = half + z1.length; carry > 0; i++) {
uint64_t sum = (uint64_t) carry + values[i];
/** values[i] = (uint32_t) (sum % base);
* Divide an individual word by a 32-bit integer. This will recursively divide carry = (uint32_t) (sum / base);
* any subsequent nodes in the linked list.
*/
static uint32_t
pm_integer_divide_word(pm_integer_t *integer, pm_integer_word_t *word, uint32_t dividend) {
uint32_t remainder = 0;
if (word->next != NULL) {
remainder = pm_integer_divide_word(integer, word->next, dividend);
if (integer->length > 0 && word->next->value == 0) {
xfree(word->next);
word->next = NULL;
integer->length--;
}
} }
while (length > 1 && values[length - 1] == 0) length--;
uint64_t value = ((uint64_t) remainder << 32) | word->value; free(z0.values);
word->value = (uint32_t) (value / dividend); free(z1.values);
return (uint32_t) (value % dividend); free(z2.values);
} free(x01.values);
free(y01.values);
/** free(xy.values);
* Divide an integer by a 32-bit integer. In practice, this is only 10 so that return (bigint_t) { length, values };
* we can format it as a string. It returns the remainder of the division.
*/
static uint32_t
pm_integer_divide(pm_integer_t *integer, uint32_t dividend) {
return pm_integer_divide_word(integer, &integer->head, dividend);
} }
/** /**
@ -140,6 +162,140 @@ pm_integer_parse_digit(const uint8_t character) {
} }
} }
/**
* Create a bigint_t from uint64_t with the given base.
*/
static bigint_t
uint64_to_bigint(uint64_t value, uint64_t base) {
uint64_t v = value;
size_t len = 0;
while (value > 0) { len++; value /= base; }
if (len == 0) len = 1;
uint32_t *values = (uint32_t*) malloc(sizeof(uint32_t) * len);
for (size_t i = 0; i < len; i++) {
values[i] = (uint32_t) (v % base);
v /= base;
}
return (bigint_t) { len, values };
}
/**
* Convert base of bigint.
* In practice, it converts 10**9 to 1<<32 or 1<<32 to 10**9.
*/
static bigint_t
karatsuba_convert_base(bigint_t source, uint64_t base_from, uint64_t base_to) {
size_t bigints_length = (source.length + 1) / 2;
bigint_t *bigints = (bigint_t*) malloc(sizeof(bigint_t) * bigints_length);
for (size_t i = 0; i < source.length; i += 2) {
uint64_t v = source.values[i] + base_from * (i + 1 < source.length ? source.values[i + 1] : 0);
bigints[i / 2] = uint64_to_bigint(v, base_to);
}
bigint_t base = uint64_to_bigint(base_from, base_to);
while (bigints_length > 1) {
size_t new_length = (bigints_length + 1) / 2;
bigint_t new_base = karatsuba_multiply(base, base, base_to);
free(base.values);
base = new_base;
bigint_t *new_bigints = (bigint_t*) malloc(sizeof(bigint_t) * new_length);
for (size_t i = 0; i < bigints_length; i += 2) {
if (i + 1 == bigints_length) {
new_bigints[i / 2] = bigints[i];
} else {
bigint_t multiplied = karatsuba_multiply(base, bigints[i + 1], base_to);
new_bigints[i / 2] = big_add(bigints[i], multiplied, base_to);
free(bigints[i].values);
free(bigints[i + 1].values);
free(multiplied.values);
}
}
free(bigints);
bigints = new_bigints;
bigints_length = new_length;
}
free(base.values);
bigint_t result = bigints[0];
free(bigints);
return result;
}
/**
* Convert digits to bigint_t with the given power-of-two base.
*/
static bigint_t
big_parse_powof2(uint32_t base, const uint8_t *digits, size_t digits_length) {
size_t bit = 1;
while (base > (uint32_t) (1 << bit)) bit++;
size_t length = (digits_length * bit + 31) / 32;
uint32_t *values = (uint32_t*) calloc(length, sizeof(uint32_t));
for (size_t i = 0; i < digits_length; i++) {
size_t bit_position = bit * (digits_length - i - 1);
uint32_t value = digits[i];
size_t index = bit_position / 32;
size_t shift = bit_position % 32;
values[index] |= value << shift;
if (32 - shift < bit) values[index + 1] |= value >> (32 - shift);
}
while (length > 1 && values[length - 1] == 0) length--;
return (bigint_t) { length, values };
}
/**
* Convert decimal digits to bigint.
*/
static bigint_t
big_parse_decimal(const uint8_t *digits, size_t digits_length) {
// Construct a bigdecimal from the digits.
const size_t batch = 9;
const uint64_t batch_base = 1000000000;
size_t values_length = (digits_length + batch - 1) / batch;
bigint_t bigint = { values_length, (uint32_t*) calloc(values_length, sizeof(uint32_t)) };
uint32_t v = 0;
for (size_t i = 0; i < digits_length; i++) {
v = v * 10 + digits[i];
size_t reverse_index = digits_length - i - 1;
if (reverse_index % batch == 0) {
bigint.values[reverse_index / batch] = v;
v = 0;
}
}
// Convert bigint base from 10**9 to 1<<32.
bigint_t converted = karatsuba_convert_base(bigint, batch_base, ((uint64_t) 1 << 32));
free(bigint.values);
return converted;
}
/**
* Parse a large integer from a string that does not fit into uint32_t.
*/
static void
pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *start, const uint8_t *end) {
// Allocate an array to store digits.
uint8_t *digits = malloc(sizeof(uint8_t) * (size_t) (end - start));
size_t digits_length = 0;
for (; start < end; start++) {
if (*start == '_') continue;
digits[digits_length++] = (uint8_t) pm_integer_parse_digit(*start);
}
// Construct bigint_t from the digits.
bigint_t bigint =
multiplier == 10 ? big_parse_decimal(digits, digits_length) : big_parse_powof2(multiplier, digits, digits_length);
// Pack bigint_t to pm_integer_t.
integer->length = bigint.length - 1;
integer->head.value = bigint.values[0];
pm_integer_word_t *current = &integer->head;
for (size_t i = 1; i < bigint.length; i++) {
current->next = malloc(sizeof(pm_integer_word_t));
current = current->next;
current->value = bigint.values[i];
current->next = NULL;
}
free(bigint.values);
free(digits);
}
/** /**
* Parse an integer from a string. This assumes that the format of the integer * Parse an integer from a string. This assumes that the format of the integer
* has already been validated, as internal validation checks are not performed * has already been validated, as internal validation checks are not performed
@ -189,15 +345,19 @@ pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *s
// invalid integer. If this is the case, we'll just return 0. // invalid integer. If this is the case, we'll just return 0.
if (start >= end) return; if (start >= end) return;
// Add the first digit to the integer. const uint8_t *ptr = start;
pm_integer_add(integer, pm_integer_parse_digit(*start++)); uint64_t value = pm_integer_parse_digit(*ptr++);
for (; ptr < end; ptr++) {
// Add the subsequent digits to the integer. if (*ptr == '_') continue;
for (; start < end; start++) { value = value * multiplier + pm_integer_parse_digit(*ptr);
if (*start == '_') continue; if (value > UINT32_MAX) {
pm_integer_multiply(integer, multiplier); // If the integer is too large to fit into a single node, then we'll
pm_integer_add(integer, pm_integer_parse_digit(*start)); // parse it as a big integer.
pm_integer_parse_big(integer, multiplier, start, end);
return;
}
} }
integer->head.value = (uint32_t) value;
} }
/** /**
@ -254,29 +414,39 @@ pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
return; return;
} }
default: { default: {
// First, allocate a buffer that we'll copy the decimal digits into. // Pack pm_integer_t to bigint_t.
size_t length = (integer->length + 1) * 10; size_t length = integer->length + 1;
char *digits = xcalloc(length, sizeof(char)); uint32_t *values = calloc(length, sizeof(uint32_t));
const pm_integer_word_t *current = &(integer->head);
for (size_t i = 0; i < length; i++) {
values[i] = current->value;
current = current->next;
}
bigint_t bigint = { length, values };
// Convert bigint base from 1<<32 to 10**9.
bigint_t converted = karatsuba_convert_base(bigint, (uint64_t) 1 << 32, 1000000000);
free(values);
// Allocate a buffer that we'll copy the decimal digits into.
size_t char_length = converted.length * 9;
char *digits = calloc(char_length, sizeof(char));
if (digits == NULL) return; if (digits == NULL) return;
// Next, create a new integer that we'll use to store the result of // Pack bigdecimal to digits.
// the division and modulo operations. for (size_t i = 0; i < converted.length; i++) {
pm_integer_t copy; uint32_t v = converted.values[i];
pm_integer_copy(&copy, integer); for (size_t j = 0; j < 9; j++) {
digits[char_length - 9 * i - j - 1] = (char) ('0' + v % 10);
// Then, iterate through the integer, dividing by 10 and storing the v /= 10;
// result in the buffer. }
char *ending = digits + length - 1;
char *current = ending;
while (copy.length > 0 || copy.head.value > 0) {
uint32_t remainder = pm_integer_divide(&copy, 10);
*current-- = (char) ('0' + remainder);
} }
size_t start_offset = 0;
while (start_offset < char_length - 1 && digits[start_offset] == '0') start_offset++;
// Finally, append the string to the buffer and free the digits. // Finally, append the string to the buffer and free the digits.
pm_buffer_append_string(buffer, current + 1, (size_t) (ending - current)); pm_buffer_append_string(buffer, digits + start_offset, char_length - start_offset);
xfree(digits); free(digits);
free(converted.values);
return; return;
} }
} }