addons-server/lib/recommend/_recommend.c

155 строки
4.0 KiB
C

#include <Python.h>
PyObject *array, *ArrayType;
/* Specialized _symmetric_diff_count for array.array() objects.
* We get to skip all the GC and rich comparison functions since array()
* stores C types directly.
*
* This calculates len(set(xs).symmetric_difference(ys)) without the
* extra data structures in between. xs and ys must be sorted in
* ascending order for the algorithm to work.
*/
static Py_ssize_t
array_symmetric_diff_count(PyListObject *a, PyListObject *b) {
Py_ssize_t i, j, rv = 0;
long x, y;
/* Exploit the ascending sort of both lists and traverse both arrays
* together, moving the pointer of the iterator with the smaller
* value. Leftovers are picked up after the loop.
*/
for (i = 0, j = 0; i < Py_SIZE(a) && j < Py_SIZE(b);) {
x = a->ob_item[i], y = b->ob_item[j];
if (x < y) {
rv++;
i++;
} else if (x > y ) {
rv++;
j++;
} else {
i++;
j++;
}
}
if (i < Py_SIZE(a)) {
rv += Py_SIZE(a) - i;
} else if (j < Py_SIZE(b)) {
rv += Py_SIZE(b) - j;
}
return rv;
}
/* Generalized version of array_symmetric_diff_count for Python objects. */
static Py_ssize_t
_symmetric_diff_count(PyObject *self, PyObject *args) {
PyListObject *a, *b;
PyObject *xs, *ys, *x, *y;
Py_ssize_t rv = 0;
int cmp, cnt=0;
/* Parse arguments and get iterators. */
if (!PyArg_ParseTuple(args, "OO", &a, &b))
return -1;
if (PyObject_IsInstance(a, ArrayType) &&
PyObject_IsInstance(b, ArrayType)) {
return array_symmetric_diff_count(a, b);
}
xs = PyObject_GetIter(a);
ys = PyObject_GetIter(b);
if (xs == NULL || ys == NULL){
Py_XDECREF(xs);
Py_XDECREF(ys);
return -1;
}
x = PyIter_Next(xs);
y = PyIter_Next(ys);
while (1) {
if (x == NULL) {
Py_DECREF(xs);
/* Swap the names so we can share the final loop. */
x = y;
xs = ys;
break;
} else if (y == NULL) {
Py_DECREF(ys);
break;
}
cmp = PyObject_Compare(x, y);
if (cmp == -1) {
Py_DECREF(x);
x = PyIter_Next(xs);
rv++;
} else if (cmp == 1) {
Py_DECREF(y);
y = PyIter_Next(ys);
rv++;
} else {
Py_DECREF(x);
Py_DECREF(y);
x = PyIter_Next(xs);
y = PyIter_Next(ys);
}
}
/* xs and x are the only PyObjects left. */
if (PyErr_Occurred()) {
Py_DECREF(x); Py_DECREF(xs);
return -1;
}
while (x != NULL) {
rv++;
Py_DECREF(x);
x = PyIter_Next(xs);
}
Py_DECREF(xs);
return rv;
}
/* A wrapper around _symmetric_diff_count so this is available for
* testing.
*/
static PyObject *
symmetric_diff_count(PyObject *self, PyObject *args) {
Py_ssize_t rv = _symmetric_diff_count(self, args);
if (rv == -1)
return rv;
return PyInt_FromLong(rv);
}
/* Calculate the similarity through a simple euclidean distance. */
static PyObject *
similarity(PyObject *self, PyObject *args) {
Py_ssize_t diff = _symmetric_diff_count(self, args);
double rv = 1. / (1. + diff);
return PyFloat_FromDouble(rv);
}
static PyMethodDef
recommend_methods[] = {
{"symmetric_diff_count", symmetric_diff_count, METH_VARARGS,
"symmetric_diff_count(list1, list2)\n\
\n\
Count the number of items that are in exactly one of the lists.\n\
Both lists are expected to be sorted in ascending order.\n"},
{"similarity", similarity, METH_VARARGS,
"similarity(list1, list2)\n\
\n\
Get a correlation coefficient between the two lists, calculated as\n\
1. / (1. + symmetric_diff_count(list1, list2)\n"},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC
init_recommend(void) {
(void) Py_InitModule("_recommend", recommend_methods);
array = PyImport_ImportModule("array");
ArrayType = PyObject_GetAttrString(array, "array");
}