зеркало из https://github.com/mozilla/kitsune.git
377 строки
13 KiB
Plaintext
Executable File
377 строки
13 KiB
Plaintext
Executable File
#!/usr/bin/env python26
|
|
|
|
ID_FACTOR = 6
|
|
|
|
try:
|
|
from localsettings import *
|
|
except ImportError:
|
|
from localsettings_django import *
|
|
|
|
#############################################################################
|
|
## data source definition
|
|
#############################################################################
|
|
|
|
MYSQL = """
|
|
type = mysql
|
|
sql_host = {sql_host}
|
|
sql_user = {sql_user}
|
|
sql_pass = {sql_pass}
|
|
sql_db = {sql_db}
|
|
|
|
sql_query_pre = SET NAMES utf8
|
|
sql_query_pre = SET SESSION query_cache_type = OFF
|
|
""".format(sql_host = MYSQL_HOST,sql_user = MYSQL_USER,
|
|
sql_pass = MYSQL_PASS,sql_db=MYSQL_NAME)
|
|
|
|
CHARSET_TABLE = """
|
|
charset_table = U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z,\
|
|
A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6,\
|
|
U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101,\
|
|
U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109,\
|
|
U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F,\
|
|
U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, U+0116->U+0117,\
|
|
U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D, U+011D,\
|
|
U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, U+0134->U+0135,\
|
|
U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, U+013C,\
|
|
U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, U+0143->U+0144,\
|
|
U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, U+014B,\
|
|
U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, U+0152->U+0153,\
|
|
U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159, U+0159,\
|
|
U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, U+0160->U+0161,\
|
|
U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, U+0167,\
|
|
U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, U+016E->U+016F,\
|
|
U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175, U+0175,\
|
|
U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C,\
|
|
U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F,\
|
|
U+05D0..U+05EA, U+0531..U+0556->U+0561..U+0586, U+0561..U+0587, U+0621..U+063A, U+01B9,\
|
|
U+01BF, U+0640..U+064A, U+0660..U+0669, U+066E, U+066F, U+0671..U+06D3, U+06F0..U+06FF,\
|
|
U+0904..U+0939, U+0958..U+095F, U+0960..U+0963, U+0966..U+096F, U+097B..U+097F,\
|
|
U+0985..U+09B9, U+09CE, U+09DC..U+09E3, U+09E6..U+09EF, U+0A05..U+0A39, U+0A59..U+0A5E,\
|
|
U+0A66..U+0A6F, U+0A85..U+0AB9, U+0AE0..U+0AE3, U+0AE6..U+0AEF, U+0B05..U+0B39,\
|
|
U+0B5C..U+0B61, U+0B66..U+0B6F, U+0B71, U+0B85..U+0BB9, U+0BE6..U+0BF2, U+0C05..U+0C39,\
|
|
U+0C66..U+0C6F, U+0C85..U+0CB9, U+0CDE..U+0CE3, U+0CE6..U+0CEF, U+0D05..U+0D39, U+0D60,\
|
|
U+0D61, U+0D66..U+0D6F, U+0D85..U+0DC6, U+1900..U+1938, U+1946..U+194F, U+A800..U+A805,\
|
|
U+A807..U+A822, U+0386->U+03B1, U+03AC->U+03B1, U+0388->U+03B5, U+03AD->U+03B5,\
|
|
U+0389->U+03B7, U+03AE->U+03B7, U+038A->U+03B9, U+0390->U+03B9, U+03AA->U+03B9,\
|
|
U+03AF->U+03B9, U+03CA->U+03B9, U+038C->U+03BF, U+03CC->U+03BF, U+038E->U+03C5,\
|
|
U+03AB->U+03C5, U+03B0->U+03C5, U+03CB->U+03C5, U+03CD->U+03C5, U+038F->U+03C9,\
|
|
U+03CE->U+03C9, U+03C2->U+03C3, U+0391..U+03A1->U+03B1..U+03C1,\
|
|
U+03A3..U+03A9->U+03C3..U+03C9, U+03B1..U+03C1, U+03C3..U+03C9, U+0E01..U+0E2E,\
|
|
U+0E30..U+0E3A, U+0E40..U+0E45, U+0E47, U+0E50..U+0E59, U+A000..U+A48F, U+4E00..U+9FBF,\
|
|
U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF, U+2F800..U+2FA1F, U+2E80..U+2EFF,\
|
|
U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF, U+3040..U+309F, U+30A0..U+30FF,\
|
|
U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF, U+3130..U+318F, U+A000..U+A48F,\
|
|
U+A490..U+A4CF
|
|
|
|
"""
|
|
|
|
NGRAM_CHARS = """
|
|
ngram_len = 1
|
|
ngram_chars = U+4E00..U+9FBB, U+3400..U+4DB5, U+20000..U+2A6D6, U+FA0E, \
|
|
U+FA0F, U+FA11, U+FA13, U+FA14, U+FA1F, U+FA21, U+FA23, U+FA24, U+FA27, \
|
|
U+FA28, U+FA29, U+3105..U+312C, U+31A0..U+31B7, U+3041, U+3043, U+3045, \
|
|
U+3047, U+3049, U+304B, U+304D, U+304F, U+3051, U+3053, U+3055, U+3057, \
|
|
U+3059, U+305B, U+305D, U+305F, U+3061, U+3063, U+3066, U+3068, \
|
|
U+306A..U+306F, U+3072, U+3075, U+3078, U+307B, U+307E..U+3083, U+3085, \
|
|
U+3087, U+3089..U+308E, U+3090..U+3093, U+30A1, U+30A3, U+30A5, U+30A7, \
|
|
U+30A9, U+30AD, U+30AF, U+30B3, U+30B5, U+30BB, U+30BD, U+30BF, U+30C1, \
|
|
U+30C3, U+30C4, U+30C6, U+30CA, U+30CB, U+30CD, U+30CE, U+30DE, U+30DF, \
|
|
U+30E1, U+30E2, U+30E3, U+30E5, U+30E7, U+30EE, U+30F0..U+30F3, U+30F5, \
|
|
U+30F6, U+31F0, U+31F1, U+31F2, U+31F3, U+31F4, U+31F5, U+31F6, U+31F7, \
|
|
U+31F8, U+31F9, U+31FA, U+31FB, U+31FC, U+31FD, U+31FE, U+31FF, \
|
|
U+AC00..U+D7A3, U+1100..U+1159, U+1161..U+11A2, U+11A8..U+11F9, \
|
|
U+A000..U+A48C, U+A492..U+A4C6
|
|
|
|
"""
|
|
|
|
config = """
|
|
source questions
|
|
{{
|
|
{mysql}
|
|
|
|
sql_query = \
|
|
SELECT \
|
|
IF(a.id, q.id * 10e{n} + a.id, q.id * 10e{n}) AS id, \
|
|
q.id AS question_id, \
|
|
q.title AS title, \
|
|
q.content AS question_content, \
|
|
a.content AS answer_content, \
|
|
q.num_answers AS replies, \
|
|
IF(q.num_answers, 1, 0) AS has_answers, \
|
|
IF(\
|
|
(SELECT \
|
|
COUNT(helpful) \
|
|
FROM \
|
|
questions_answervote av\
|
|
WHERE \
|
|
av.answer_id = a.id \
|
|
AND helpful = 1), \
|
|
1, 0) AS has_helpful, \
|
|
q.status AS status, \
|
|
IF(q.solution_id, 1, 0) AS is_solved, \
|
|
q.is_locked AS is_locked, \
|
|
UNIX_TIMESTAMP(q.created) AS created, \
|
|
UNIX_TIMESTAMP(q.updated) AS updated, \
|
|
(\
|
|
SELECT \
|
|
CRC32(username) \
|
|
FROM \
|
|
auth_user \
|
|
WHERE \
|
|
q.creator_id = auth_user.id \
|
|
) AS question_creator, \
|
|
(\
|
|
SELECT \
|
|
CRC32(username) \
|
|
FROM \
|
|
auth_user \
|
|
WHERE \
|
|
a.creator_id = auth_user.id \
|
|
) AS answer_creator, \
|
|
q.num_votes_past_week AS question_votes, \
|
|
a.upvotes AS answer_votes, \
|
|
(UNIX_TIMESTAMP() - q.updated)/{age_unit} AS age \
|
|
FROM \
|
|
questions_question q \
|
|
LEFT JOIN \
|
|
questions_answer a ON a.question_id = q.id
|
|
|
|
|
|
sql_attr_uint = question_id
|
|
sql_attr_uint = replies
|
|
sql_attr_uint = status
|
|
sql_attr_bool = is_solved
|
|
sql_attr_bool = is_locked
|
|
sql_attr_bool = has_answers
|
|
sql_attr_bool = has_helpful
|
|
sql_attr_timestamp = created
|
|
sql_attr_timestamp = updated
|
|
sql_attr_uint = question_creator
|
|
sql_attr_uint = answer_creator
|
|
sql_attr_uint = question_votes
|
|
sql_attr_uint = answer_votes
|
|
sql_attr_uint = age
|
|
|
|
sql_attr_multi = uint tag from query; SELECT \
|
|
IF(a.id, q.id * 10e{n} + a.id, q.id * 10e{n}) AS id, \
|
|
CRC32(t.name) \
|
|
FROM \
|
|
questions_answer a \
|
|
RIGHT JOIN \
|
|
questions_question q \
|
|
ON q.id = a.question_id \
|
|
INNER JOIN \
|
|
taggit_taggeditem ti \
|
|
ON q.id = ti.object_id AND \
|
|
ti.content_type_id = (\
|
|
SELECT \
|
|
id \
|
|
FROM \
|
|
django_content_type \
|
|
WHERE \
|
|
app_label = 'questions' AND \
|
|
model = 'question'\
|
|
) \
|
|
LEFT JOIN \
|
|
taggit_tag t \
|
|
ON t.id = ti.tag_id
|
|
}}
|
|
""".format(mysql=MYSQL, age_unit=AGE_DIVISOR, n=ID_FACTOR)
|
|
|
|
config = config + """
|
|
index questions
|
|
{{
|
|
source = questions
|
|
path = {root_path}{catalog_path}/questions-catalog
|
|
charset_type = utf-8
|
|
morphology = stem_en
|
|
min_stemming_len = 4
|
|
stopwords = {root_path}/stopwords.txt
|
|
wordforms = {root_path}/wordforms.txt
|
|
{charset}
|
|
{ngram}
|
|
}}
|
|
""".format(root_path=ROOT_PATH, catalog_path=CATALOG_PATH,
|
|
charset=CHARSET_TABLE, ngram=NGRAM_CHARS)
|
|
|
|
config = config + """
|
|
source discussion_forums
|
|
{{
|
|
{mysql}
|
|
|
|
sql_query = \
|
|
SELECT \
|
|
post.id, \
|
|
post.thread_id AS thread_id, \
|
|
thread.forum_id AS forum_id, \
|
|
thread.title AS title, \
|
|
thread.is_sticky AS is_sticky, \
|
|
thread.is_locked AS is_locked, \
|
|
post.author_id AS author_id, \
|
|
CRC32(author.username) AS author_ord, \
|
|
post.content AS content, \
|
|
UNIX_TIMESTAMP(thread.created) AS created, \
|
|
( \
|
|
SELECT \
|
|
UNIX_TIMESTAMP(forums_post.created) \
|
|
FROM \
|
|
forums_post \
|
|
WHERE \
|
|
forums_post.id = thread.last_post_id \
|
|
) AS updated, \
|
|
(UNIX_TIMESTAMP() - post.updated)/{age_unit} AS age, \
|
|
thread.replies AS replies \
|
|
FROM \
|
|
forums_post AS post \
|
|
INNER JOIN \
|
|
forums_thread AS thread ON (post.thread_id = thread.id) \
|
|
INNER JOIN \
|
|
auth_user AS author ON (post.author_id = author.id)
|
|
|
|
sql_attr_uint = thread_id
|
|
sql_attr_uint = forum_id
|
|
sql_attr_bool = is_sticky
|
|
sql_attr_bool = is_locked
|
|
sql_attr_uint = author_id
|
|
sql_attr_uint = author_ord
|
|
sql_attr_timestamp = created
|
|
sql_attr_timestamp = updated
|
|
sql_attr_uint = age
|
|
sql_attr_uint = replies
|
|
|
|
}}
|
|
""".format(mysql=MYSQL, age_unit=AGE_DIVISOR)
|
|
|
|
config = config + """
|
|
index discussion_forums
|
|
{{
|
|
source = discussion_forums
|
|
path = {root_path}{catalog_path}/discussion-forums-catalog
|
|
charset_type = utf-8
|
|
morphology = stem_en
|
|
min_stemming_len = 4
|
|
stopwords = {root_path}/stopwords.txt
|
|
wordforms = {root_path}/wordforms.txt
|
|
{charset}
|
|
{ngram}
|
|
}}
|
|
""".format(root_path=ROOT_PATH, catalog_path=CATALOG_PATH,
|
|
charset=CHARSET_TABLE, ngram=NGRAM_CHARS)
|
|
|
|
config = config + """
|
|
source wiki_pages
|
|
{{
|
|
{mysql}
|
|
|
|
sql_query = \
|
|
SELECT \
|
|
d.id, \
|
|
d.title, \
|
|
CRC32(d.locale) AS locale, \
|
|
d.current_revision_id AS current, \
|
|
d.parent_id, \
|
|
d.html AS content, \
|
|
d.category, \
|
|
d.slug, \
|
|
r.summary, \
|
|
r.keywords, \
|
|
UNIX_TIMESTAMP(r.created) AS updated \
|
|
FROM \
|
|
wiki_document d \
|
|
INNER JOIN \
|
|
wiki_revision r \
|
|
ON r.id = d.current_revision_id \
|
|
WHERE \
|
|
r.is_approved = 1 AND \
|
|
content NOT LIKE 'REDIRECT [%'
|
|
|
|
sql_attr_timestamp = updated
|
|
sql_attr_uint = locale
|
|
sql_attr_uint = category
|
|
|
|
sql_attr_multi = uint tag from query; SELECT \
|
|
d.id, \
|
|
CRC32(t.name) \
|
|
FROM \
|
|
wiki_document d \
|
|
INNER JOIN \
|
|
taggit_taggeditem ti \
|
|
ON d.id = ti.object_id AND \
|
|
ti.content_type_id = (\
|
|
SELECT \
|
|
id \
|
|
FROM \
|
|
django_content_type \
|
|
WHERE \
|
|
app_label = 'wiki' AND \
|
|
model = 'document'\
|
|
) \
|
|
LEFT JOIN \
|
|
taggit_tag t \
|
|
ON t.id = ti.tag_id
|
|
|
|
sql_attr_multi = uint fx from query; SELECT \
|
|
d.id, \
|
|
item_id \
|
|
FROM ( \
|
|
SELECT \
|
|
id, \
|
|
IF(parent_id, parent_id, id) AS joiner \
|
|
FROM \
|
|
wiki_document \
|
|
) d \
|
|
JOIN \
|
|
wiki_firefoxversion f \
|
|
ON \
|
|
d.joiner = f.document_id;
|
|
|
|
sql_attr_multi = uint os from query; SELECT \
|
|
d.id, \
|
|
item_id \
|
|
FROM ( \
|
|
SELECT \
|
|
id, \
|
|
IF(parent_id, parent_id, id) AS joiner \
|
|
FROM \
|
|
wiki_document \
|
|
) d \
|
|
JOIN \
|
|
wiki_operatingsystem f \
|
|
ON \
|
|
d.joiner = f.document_id;
|
|
}}
|
|
""".format(mysql=MYSQL)
|
|
|
|
config = config + """
|
|
index wiki_pages
|
|
{{
|
|
source = wiki_pages
|
|
path = {root_path}{catalog_path}/wiki-page-catalog
|
|
charset_type = utf-8
|
|
morphology = stem_en
|
|
min_stemming_len = 4
|
|
stopwords = {root_path}/stopwords.txt
|
|
wordforms = {root_path}/wordforms.txt
|
|
html_strip = 1
|
|
html_index_attrs = image=alt,title; a=title;
|
|
html_remove_elements = script, style, object, embed, param
|
|
{charset}
|
|
{ngram}
|
|
}}
|
|
""".format(root_path=ROOT_PATH, catalog_path=CATALOG_PATH,
|
|
charset=CHARSET_TABLE, ngram=NGRAM_CHARS)
|
|
|
|
config = config + """
|
|
searchd
|
|
{{
|
|
listen = {listen_port}
|
|
listen = {listen_sql_host}:{listen_sql_port}:mysql41
|
|
log = {root_path}{log_path}/searchd.log
|
|
pid_file = {root_path}{etc_path}/searchd.pid
|
|
}}
|
|
""".format(listen_port=LISTEN_PORT,root_path=ROOT_PATH,log_path=LOG_PATH,
|
|
etc_path=ETC_PATH,listen_sql_host=LISTEN_SQL_HOST,
|
|
listen_sql_port=LISTEN_SQL_PORT,)
|
|
|
|
print config
|