Merge pull request #194 from edx/revert-es-model
Revert "Replaced Tire with elasticsearch-model"
This commit is contained in:
Коммит
3b9d16738f
|
@ -23,6 +23,4 @@ before_install:
|
|||
- mkdir -p ./mongo/log
|
||||
- mongod --fork --dbpath ./mongo/db --logpath ./mongo/log/mongodb.log
|
||||
|
||||
script:
|
||||
- bin/rake search:initialize
|
||||
- bin/rspec
|
||||
script: bundle exec rspec
|
||||
|
|
5
Gemfile
5
Gemfile
|
@ -31,8 +31,8 @@ gem 'will_paginate_mongoid', "~>2.0"
|
|||
gem 'rdiscount'
|
||||
gem 'nokogiri', "~>1.6.8"
|
||||
|
||||
gem 'elasticsearch', '~> 0.4'
|
||||
gem 'elasticsearch-model', '~> 0.1.8'
|
||||
gem 'tire', "0.6.2"
|
||||
gem 'tire-contrib'
|
||||
|
||||
gem 'dalli'
|
||||
|
||||
|
@ -47,7 +47,6 @@ group :test do
|
|||
gem 'guard-unicorn'
|
||||
gem 'rack-test', :require => 'rack/test'
|
||||
gem 'rspec', '~> 2.11.0'
|
||||
gem 'webmock', '~> 1.22'
|
||||
end
|
||||
|
||||
gem 'newrelic_rpm'
|
||||
|
|
42
Gemfile.lock
42
Gemfile.lock
|
@ -32,7 +32,7 @@ GEM
|
|||
minitest (~> 5.1)
|
||||
thread_safe (~> 0.3, >= 0.3.4)
|
||||
tzinfo (~> 1.1)
|
||||
addressable (2.4.0)
|
||||
ansi (1.5.0)
|
||||
bson (3.2.4)
|
||||
bson_ext (1.5.1)
|
||||
builder (3.2.2)
|
||||
|
@ -41,8 +41,6 @@ GEM
|
|||
simplecov
|
||||
url
|
||||
coderay (1.0.7)
|
||||
crack (0.4.3)
|
||||
safe_yaml (~> 1.0.0)
|
||||
dalli (2.1.0)
|
||||
database_cleaner (1.5.1)
|
||||
delayed_job (4.1.1)
|
||||
|
@ -55,33 +53,18 @@ GEM
|
|||
docile (1.1.5)
|
||||
domain_name (0.5.24)
|
||||
unf (>= 0.0.5, < 1.0.0)
|
||||
elasticsearch (0.4.11)
|
||||
elasticsearch-api (= 0.4.11)
|
||||
elasticsearch-transport (= 0.4.11)
|
||||
elasticsearch-api (0.4.11)
|
||||
multi_json
|
||||
elasticsearch-model (0.1.8)
|
||||
activesupport (> 3)
|
||||
elasticsearch (> 0.4)
|
||||
hashie
|
||||
elasticsearch-transport (0.4.11)
|
||||
faraday
|
||||
multi_json
|
||||
enumerize (0.11.0)
|
||||
activesupport (>= 3.2)
|
||||
factory_girl (4.5.0)
|
||||
activesupport (>= 3.0.0)
|
||||
faker (1.6.1)
|
||||
i18n (~> 0.5)
|
||||
faraday (0.9.2)
|
||||
multipart-post (>= 1.2, < 3)
|
||||
guard (1.3.2)
|
||||
listen (>= 0.4.2)
|
||||
thor (>= 0.14.6)
|
||||
guard-unicorn (0.0.7)
|
||||
guard (>= 1.1)
|
||||
hashdiff (0.2.3)
|
||||
hashie (3.4.3)
|
||||
hashr (0.0.22)
|
||||
http-cookie (1.0.2)
|
||||
domain_name (~> 0.5)
|
||||
i18n (0.7.0)
|
||||
|
@ -106,7 +89,6 @@ GEM
|
|||
mongoid
|
||||
rake
|
||||
multi_json (1.11.2)
|
||||
multipart-post (2.0.0)
|
||||
netrc (0.10.3)
|
||||
newrelic_rpm (3.16.0.318)
|
||||
nokogiri (1.6.8)
|
||||
|
@ -143,7 +125,6 @@ GEM
|
|||
rspec-expectations (2.11.2)
|
||||
diff-lcs (~> 1.1.3)
|
||||
rspec-mocks (2.11.2)
|
||||
safe_yaml (1.0.4)
|
||||
simplecov (0.11.1)
|
||||
docile (~> 1.1.0)
|
||||
json (~> 1.8)
|
||||
|
@ -157,6 +138,16 @@ GEM
|
|||
thor (0.16.0)
|
||||
thread_safe (0.3.5)
|
||||
tilt (1.3.3)
|
||||
tire (0.6.2)
|
||||
activemodel (>= 3.0)
|
||||
activesupport
|
||||
ansi
|
||||
hashr (~> 0.0.19)
|
||||
multi_json (~> 1.3)
|
||||
rake
|
||||
rest-client (~> 1.6)
|
||||
tire-contrib (0.1.1)
|
||||
tire
|
||||
tzinfo (1.2.2)
|
||||
thread_safe (~> 0.1)
|
||||
unf (0.1.4)
|
||||
|
@ -167,10 +158,6 @@ GEM
|
|||
rack
|
||||
raindrops (~> 0.7)
|
||||
url (0.3.2)
|
||||
webmock (1.22.3)
|
||||
addressable (>= 2.3.6)
|
||||
crack (>= 0.3.2)
|
||||
hashdiff
|
||||
will_paginate (3.0.7)
|
||||
will_paginate_mongoid (2.0.1)
|
||||
mongoid
|
||||
|
@ -189,8 +176,6 @@ DEPENDENCIES
|
|||
database_cleaner (~> 1.5.1)
|
||||
delayed_job
|
||||
delayed_job_mongoid
|
||||
elasticsearch (~> 0.4)
|
||||
elasticsearch-model (~> 0.1.8)
|
||||
enumerize
|
||||
factory_girl (~> 4.0)
|
||||
faker (~> 1.6)
|
||||
|
@ -214,7 +199,8 @@ DEPENDENCIES
|
|||
rs_voteable_mongo!
|
||||
rspec (~> 2.11.0)
|
||||
sinatra
|
||||
tire (= 0.6.2)
|
||||
tire-contrib
|
||||
unicorn
|
||||
webmock (~> 1.22)
|
||||
will_paginate_mongoid (~> 2.0)
|
||||
yajl-ruby
|
||||
|
|
53
README.rst
53
README.rst
|
@ -14,8 +14,9 @@ An independent comment system which supports voting and nested comments. It
|
|||
also supports features including instructor endorsement for education-aimed
|
||||
discussion platforms.
|
||||
|
||||
Getting Started
|
||||
---------------
|
||||
|
||||
Running the Server
|
||||
------------------
|
||||
If you are running cs_comments_service as part of edx-platform__ development under
|
||||
devstack, it is strongly recommended to read `those setup documents`__ first. Note that
|
||||
devstack will take care of just about all of the installation, configuration, and
|
||||
|
@ -29,49 +30,15 @@ This service relies on Elasticsearch and MongoDB. By default the service will us
|
|||
however, if you wish to change these values, refer to `config/application.yml` and `config/mongoid.yml` for the
|
||||
environment variables that can be set to override the defaults.
|
||||
|
||||
Install the requisite gems:
|
||||
Before the server is first run, ensure gems are installed by doing ``bundle install``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ bundle install
|
||||
|
||||
Setup the search index. Note that the command below creates an alias with a unique name (e.g. content_20160101), and
|
||||
assigns it a known alias: content. If you choose not to use the command below, you should still opt to reference your
|
||||
index by an alias rather than the actual index name. This will enable you to swap out indices (e.g. reindex) without
|
||||
having to take downtime or modify code with a new index name.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ bin/rake search:initialize
|
||||
|
||||
Run ther server:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ ruby app.rb
|
||||
|
||||
By default Sinatra runs on port `4567`. If you'd like to use a different port pass the `-p` parameter:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ ruby app.rb -p 5678
|
||||
To run the server, do ``ruby app.rb [-p PORT]`` where PORT defaults to 4567.
|
||||
|
||||
|
||||
Running Tests
|
||||
-------------
|
||||
Tests are built using the rspec__ framework, and can be run with the command below:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ bin/rspec
|
||||
|
||||
If you'd like to view additional options for the command, append the `--help` option:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ bin/rspec --help
|
||||
|
||||
__ http://rspec.info/
|
||||
To run tests, do ``bundle exec rspec``. Append ``--help`` or see rspec documentation
|
||||
for additional options to this command.
|
||||
|
||||
Internationalization (i18n) and Localization (l10n)
|
||||
---------------------------------------------------
|
||||
|
@ -95,12 +62,12 @@ follow the instructions here__ to set up your ``.transifexrc`` file.
|
|||
__ http://support.transifex.com/customer/portal/articles/1000855-configuring-the-client
|
||||
|
||||
To upload strings to Transifex for translation when you change the set
|
||||
of translatable strings: ``bin/rake i18n:push``
|
||||
of translatable strings: ``bundle exec rake i18n:push``
|
||||
|
||||
To fetch the latest translations from Transifex: ``bin/rake i18n:pull``
|
||||
To fetch the latest translations from Transifex: ``bundle exec rake i18n:pull``
|
||||
|
||||
The repository includes some translations so they will be available
|
||||
upon deployment. To commit an update to these: ``bin/rake i18n:commit``
|
||||
upon deployment. To commit an update to these: ``bundle exec rake i18n:commit``
|
||||
|
||||
License
|
||||
-------
|
||||
|
|
16
Rakefile
16
Rakefile
|
@ -17,23 +17,21 @@ rescue LoadError
|
|||
# no rspec available
|
||||
end
|
||||
|
||||
Tire.configure do
|
||||
url YAML.load(application_yaml)['elasticsearch_server']
|
||||
end
|
||||
|
||||
LOG = Logger.new(STDERR)
|
||||
|
||||
desc 'Load the environment'
|
||||
task :environment do
|
||||
environment = ENV['SINATRA_ENV'] || 'development'
|
||||
Sinatra::Base.environment = environment
|
||||
|
||||
Mongoid.load!('config/mongoid.yml')
|
||||
Mongoid.logger.level = Logger::INFO
|
||||
|
||||
module CommentService
|
||||
class << self;
|
||||
attr_accessor :config
|
||||
|
||||
def search_enabled?
|
||||
self.config[:enable_search]
|
||||
end
|
||||
attr_accessor :config;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -43,8 +41,8 @@ task :environment do
|
|||
Dir[File.dirname(__FILE__) + '/models/*.rb'].each { |file| require file }
|
||||
end
|
||||
|
||||
Dir.glob('lib/tasks/*.rake').each { |r| import r }
|
||||
|
||||
task :console => :environment do
|
||||
binding.pry
|
||||
end
|
||||
|
||||
Dir.glob('lib/tasks/*.rake').each { |r| import r }
|
||||
|
|
|
@ -1,22 +1,22 @@
|
|||
get "#{APIPREFIX}/threads" do # retrieve threads by course
|
||||
|
||||
|
||||
threads = Content.where({"_type" => "CommentThread", "course_id" => params["course_id"]})
|
||||
if params[:commentable_ids]
|
||||
threads = threads.in({"commentable_id" => params[:commentable_ids].split(",")})
|
||||
end
|
||||
|
||||
handle_threads_query(
|
||||
threads,
|
||||
params['user_id'],
|
||||
params['course_id'],
|
||||
get_group_ids_from_params(params),
|
||||
value_to_boolean(params['flagged']),
|
||||
value_to_boolean(params['unread']),
|
||||
value_to_boolean(params['unanswered']),
|
||||
params['sort_key'],
|
||||
params['sort_order'],
|
||||
params['page'],
|
||||
params['per_page']
|
||||
threads,
|
||||
params["user_id"],
|
||||
params["course_id"],
|
||||
get_group_ids_from_params(params),
|
||||
value_to_boolean(params["flagged"]),
|
||||
value_to_boolean(params["unread"]),
|
||||
value_to_boolean(params["unanswered"]),
|
||||
params["sort_key"],
|
||||
params["sort_order"],
|
||||
params["page"],
|
||||
params["per_page"]
|
||||
).to_json
|
||||
end
|
||||
|
||||
|
@ -83,12 +83,6 @@ post "#{APIPREFIX}/threads/:thread_id/comments" do |thread_id|
|
|||
end
|
||||
|
||||
delete "#{APIPREFIX}/threads/:thread_id" do |thread_id|
|
||||
begin
|
||||
thread.destroy
|
||||
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
||||
# If the thread is not in the index, that's actually a good thing given that we just removed it.
|
||||
# Note that this exception will probably only be encountered for tests that don't wait for the index
|
||||
# to be refreshed before attempting to destroy a newly-recreated thread.
|
||||
end
|
||||
thread.destroy
|
||||
thread.to_hash.to_json
|
||||
end
|
||||
|
|
|
@ -60,4 +60,5 @@ delete "#{APIPREFIX}/comments/:comment_id" do |comment_id|
|
|||
pass
|
||||
end
|
||||
end
|
||||
comment.to_hash.to_json
|
||||
end
|
||||
|
|
224
api/search.rb
224
api/search.rb
|
@ -1,141 +1,105 @@
|
|||
def get_thread_ids(context, group_ids, local_params, search_text)
|
||||
filters = []
|
||||
filters.push({term: {commentable_id: local_params['commentable_id']}}) if local_params['commentable_id']
|
||||
filters.push({terms: {commentable_id: local_params['commentable_ids'].split(',')}}) if local_params['commentable_ids']
|
||||
filters.push({term: {course_id: local_params['course_id']}}) if local_params['course_id']
|
||||
|
||||
filters.push({or: [
|
||||
{not: {exists: {field: :context}}},
|
||||
{term: {context: context}}
|
||||
]})
|
||||
|
||||
unless group_ids.empty?
|
||||
filters.push(
|
||||
{
|
||||
or: [
|
||||
{:not => {:exists => {:field => :group_id}}},
|
||||
{:terms => {:group_id => group_ids}}
|
||||
]
|
||||
}
|
||||
)
|
||||
end
|
||||
|
||||
self.class.trace_execution_scoped(['Custom/get_search_threads/es_search']) do
|
||||
body = {
|
||||
size: CommentService.config['max_deep_search_comment_count'].to_i,
|
||||
sort: [
|
||||
{updated_at: :desc}
|
||||
],
|
||||
query: {
|
||||
multi_match: {
|
||||
query: search_text,
|
||||
fields: [:title, :body],
|
||||
operator: :AND
|
||||
},
|
||||
filtered: {
|
||||
filter: {
|
||||
and: filters
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
response = Elasticsearch::Model.client.search(index: Content::ES_INDEX_NAME, body: body)
|
||||
|
||||
thread_ids = Set.new
|
||||
response['hits']['hits'].each do |hit|
|
||||
case hit['_type']
|
||||
when CommentThread.document_type
|
||||
thread_ids.add(hit['_id'])
|
||||
when Comment.document_type
|
||||
thread_ids.add(hit['_source']['comment_thread_id'])
|
||||
else
|
||||
# There shouldn't be any other document types. Nevertheless, ignore them, if they are present.
|
||||
next
|
||||
end
|
||||
end
|
||||
thread_ids
|
||||
end
|
||||
end
|
||||
|
||||
def get_suggested_text(search_text)
|
||||
body = {
|
||||
suggestions: {
|
||||
text: search_text,
|
||||
phrase: {
|
||||
field: :_all
|
||||
}
|
||||
}
|
||||
}
|
||||
response = Elasticsearch::Model.client.suggest(index: Content::ES_INDEX_NAME, body: body)
|
||||
suggestions = response.fetch('suggestions', [])
|
||||
if suggestions.length > 0
|
||||
options = suggestions[0]['options']
|
||||
if options.length > 0
|
||||
return options[0]['text']
|
||||
end
|
||||
end
|
||||
|
||||
nil
|
||||
end
|
||||
|
||||
def get_threads(context, group_ids, local_params, search_text)
|
||||
# Because threads and comments are currently separate unrelated documents in
|
||||
# Elasticsearch, we must first query for all matching documents, then
|
||||
# extract the set of thread ids, and then sort the threads by the specified
|
||||
# criteria and paginate. For performance reasons, we currently limit the
|
||||
# number of documents considered (ordered by update recency), which means
|
||||
# that matching threads can be missed if the search terms are very common.
|
||||
thread_ids = get_thread_ids(context, group_ids, local_params, search_text)
|
||||
corrected_text = nil
|
||||
|
||||
if thread_ids.empty?
|
||||
# Sadly, Elasticsearch does not have a facility for computing suggestions
|
||||
# with respect to a filter. It would be expensive to determine the best
|
||||
# suggestion with respect to our filter parameters, so we simply re-query
|
||||
# with the top suggestion. If that has no results, then we return no results
|
||||
# and no correction.
|
||||
corrected_text = get_suggested_text(search_text)
|
||||
thread_ids = get_thread_ids(context, group_ids, local_params, corrected_text) if corrected_text
|
||||
corrected_text = nil if thread_ids.empty?
|
||||
end
|
||||
|
||||
result_obj = handle_threads_query(
|
||||
CommentThread.in({_id: thread_ids.to_a}),
|
||||
local_params['user_id'],
|
||||
local_params['course_id'],
|
||||
group_ids,
|
||||
value_to_boolean(local_params['flagged']),
|
||||
value_to_boolean(local_params['unread']),
|
||||
value_to_boolean(local_params['unanswered']),
|
||||
local_params['sort_key'],
|
||||
local_params['sort_order'],
|
||||
local_params['page'],
|
||||
local_params['per_page'],
|
||||
context
|
||||
)
|
||||
|
||||
unless result_obj.empty?
|
||||
result_obj[:corrected_text] = corrected_text
|
||||
# NOTE this reflects the total results from ES, but does not consider
|
||||
# any post-filtering that might happen (e.g. unread, flagged...) before
|
||||
# results are shown to the user.
|
||||
result_obj[:total_results] = thread_ids.size
|
||||
end
|
||||
|
||||
result_obj.to_json
|
||||
end
|
||||
|
||||
get "#{APIPREFIX}/search/threads" do
|
||||
local_params = params # Necessary for params to be available inside blocks
|
||||
group_ids = get_group_ids_from_params(local_params)
|
||||
context = local_params["context"] ? local_params["context"] : "course"
|
||||
search_text = local_params["text"]
|
||||
if !search_text
|
||||
'{}'
|
||||
{}.to_json
|
||||
else
|
||||
# Because threads and comments are currently separate unrelated documents in
|
||||
# Elasticsearch, we must first query for all matching documents, then
|
||||
# extract the set of thread ids, and then sort the threads by the specified
|
||||
# criteria and paginate. For performance reasons, we currently limit the
|
||||
# number of documents considered (ordered by update recency), which means
|
||||
# that matching threads can be missed if the search terms are very common.
|
||||
|
||||
get_matching_thread_ids = lambda do |search_text|
|
||||
self.class.trace_execution_scoped(["Custom/get_search_threads/es_search"]) do
|
||||
search = Tire.search Content::ES_INDEX_NAME do
|
||||
query do
|
||||
match [:title, :body], search_text, :operator => "AND"
|
||||
filtered do
|
||||
filter :term, :commentable_id => local_params["commentable_id"] if local_params["commentable_id"]
|
||||
filter :terms, :commentable_id => local_params["commentable_ids"].split(",") if local_params["commentable_ids"]
|
||||
filter :term, :course_id => local_params["course_id"] if local_params["course_id"]
|
||||
filter :or, [
|
||||
{:not => {:exists => {:field => :context}}},
|
||||
{:term => {:context => context}}
|
||||
]
|
||||
|
||||
get_threads(context, group_ids, local_params, search_text)
|
||||
if not group_ids.empty?
|
||||
if group_ids.length > 1
|
||||
group_id_criteria = {:terms => {:group_id => group_ids}}
|
||||
else
|
||||
group_id_criteria = {:term => {:group_id => group_ids[0]}}
|
||||
end
|
||||
|
||||
filter :or, [
|
||||
{:not => {:exists => {:field => :group_id}}},
|
||||
group_id_criteria
|
||||
]
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
sort do
|
||||
by "updated_at", "desc"
|
||||
end
|
||||
size CommentService.config["max_deep_search_comment_count"].to_i
|
||||
end
|
||||
thread_ids = Set.new
|
||||
search.results.each do |content|
|
||||
case content.type
|
||||
when "comment_thread"
|
||||
thread_ids.add(content.id)
|
||||
when "comment"
|
||||
thread_ids.add(content.comment_thread_id)
|
||||
end
|
||||
end
|
||||
thread_ids
|
||||
end
|
||||
end
|
||||
|
||||
# Sadly, Elasticsearch does not have a facility for computing suggestions
|
||||
# with respect to a filter. It would be expensive to determine the best
|
||||
# suggestion with respect to our filter parameters, so we simply re-query
|
||||
# with the top suggestion. If that has no results, then we return no results
|
||||
# and no correction.
|
||||
thread_ids = get_matching_thread_ids.call(search_text)
|
||||
corrected_text = nil
|
||||
if thread_ids.empty?
|
||||
suggest = Tire.suggest Content::ES_INDEX_NAME do
|
||||
suggestion "" do
|
||||
text search_text
|
||||
phrase :_all
|
||||
end
|
||||
end
|
||||
corrected_text = suggest.results.texts.first
|
||||
thread_ids = get_matching_thread_ids.call(corrected_text) if corrected_text
|
||||
corrected_text = nil if thread_ids.empty?
|
||||
end
|
||||
|
||||
result_obj = handle_threads_query(
|
||||
CommentThread.in({"_id" => thread_ids.to_a}),
|
||||
local_params["user_id"],
|
||||
local_params["course_id"],
|
||||
group_ids,
|
||||
value_to_boolean(local_params["flagged"]),
|
||||
value_to_boolean(local_params["unread"]),
|
||||
value_to_boolean(local_params["unanswered"]),
|
||||
local_params["sort_key"],
|
||||
local_params["sort_order"],
|
||||
local_params["page"],
|
||||
local_params["per_page"],
|
||||
context
|
||||
)
|
||||
if !result_obj.empty?
|
||||
result_obj[:corrected_text] = corrected_text
|
||||
# NOTE this reflects the total results from ES, but does not consider
|
||||
# any post-filtering that might happen (e.g. unread, flagged...) before
|
||||
# results are shown to the user.
|
||||
result_obj[:total_results] = thread_ids.size
|
||||
end
|
||||
result_obj.to_json
|
||||
end
|
||||
end
|
||||
|
|
90
app.rb
90
app.rb
|
@ -14,10 +14,6 @@ module CommentService
|
|||
class << self
|
||||
attr_accessor :config
|
||||
attr_accessor :blocked_hashes
|
||||
|
||||
def search_enabled?
|
||||
self.config[:enable_search]
|
||||
end
|
||||
end
|
||||
API_VERSION = 'v1'
|
||||
API_PREFIX = "/api/#{API_VERSION}"
|
||||
|
@ -30,6 +26,11 @@ end
|
|||
application_yaml = ERB.new(File.read("config/application.yml")).result()
|
||||
CommentService.config = YAML.load(application_yaml).with_indifferent_access
|
||||
|
||||
Tire.configure do
|
||||
url CommentService.config[:elasticsearch_server]
|
||||
logger STDERR if ENV["ENABLE_ELASTICSEARCH_DEBUGGING"]
|
||||
end
|
||||
|
||||
Mongoid.load!("config/mongoid.yml", environment)
|
||||
Mongoid.logger.level = Logger::INFO
|
||||
Mongo::Logger.logger.level = ENV["ENABLE_MONGO_DEBUGGING"] ? Logger::DEBUG : Logger::INFO
|
||||
|
@ -47,13 +48,11 @@ helpers do
|
|||
end
|
||||
end
|
||||
|
||||
Dir[File.dirname(__FILE__) + '/lib/**/*.rb'].each { |file| require file }
|
||||
Dir[File.dirname(__FILE__) + '/models/*.rb'].each { |file| require file }
|
||||
Dir[File.dirname(__FILE__) + '/presenters/*.rb'].each { |file| require file }
|
||||
Dir[File.dirname(__FILE__) + '/lib/**/*.rb'].each {|file| require file}
|
||||
Dir[File.dirname(__FILE__) + '/models/*.rb'].each {|file| require file}
|
||||
Dir[File.dirname(__FILE__) + '/presenters/*.rb'].each {|file| require file}
|
||||
|
||||
Elasticsearch::Model.client = Elasticsearch::Client.new(host: CommentService.config[:elasticsearch_server], log: false)
|
||||
|
||||
# Ensure Elasticsearch index mappings exist.
|
||||
# Ensure elasticsearch index mappings exist.
|
||||
Comment.put_search_index_mapping
|
||||
CommentThread.put_search_index_mapping
|
||||
|
||||
|
@ -107,6 +106,7 @@ class Time
|
|||
end
|
||||
|
||||
|
||||
|
||||
# these files must be required in order
|
||||
require './api/search'
|
||||
require './api/commentables'
|
||||
|
@ -138,61 +138,55 @@ error ArgumentError do
|
|||
error 400, [env['sinatra.error'].message].to_json
|
||||
end
|
||||
|
||||
CommentService.blocked_hashes = Content.mongo_client[:blocked_hash].find(nil, projection: {hash: 1}).map { |d| d["hash"] }
|
||||
CommentService.blocked_hashes = Content.mongo_client[:blocked_hash].find(nil, projection: {hash: 1}).map {|d| d["hash"]}
|
||||
|
||||
def get_db_is_master
|
||||
Mongoid::Clients.default.command(isMaster: 1)
|
||||
end
|
||||
|
||||
def elasticsearch_health
|
||||
Elasticsearch::Model.client.cluster.health
|
||||
end
|
||||
|
||||
|
||||
def is_mongo_available?
|
||||
begin
|
||||
response = get_db_is_master
|
||||
return response.ok? && (response.documents.first['ismaster'] == true)
|
||||
rescue
|
||||
# ignored
|
||||
end
|
||||
|
||||
false
|
||||
end
|
||||
|
||||
def is_elasticsearch_available?
|
||||
begin
|
||||
health = elasticsearch_health
|
||||
return !health['timed_out'] && %w(yellow green).include?(health['status'])
|
||||
rescue
|
||||
# ignored
|
||||
end
|
||||
|
||||
false
|
||||
def get_es_status
|
||||
res = Tire::Configuration.client.get Tire::Configuration.url
|
||||
JSON.parse res.body
|
||||
end
|
||||
|
||||
get '/heartbeat' do
|
||||
error 500, JSON.generate({OK: false, check: :db}) unless is_mongo_available?
|
||||
error 500, JSON.generate({OK: false, check: :es}) unless is_elasticsearch_available?
|
||||
JSON.generate({OK: true})
|
||||
# mongo is reachable and ready to handle requests
|
||||
db_ok = false
|
||||
begin
|
||||
res = get_db_is_master
|
||||
db_ok = res.ok? && res.documents.first['ismaster'] == true
|
||||
rescue
|
||||
end
|
||||
error 500, JSON.generate({"OK" => false, "check" => "db"}) unless db_ok
|
||||
|
||||
# E_S is reachable and ready to handle requests
|
||||
es_ok = false
|
||||
begin
|
||||
es_status = get_es_status
|
||||
es_ok = es_status["status"] == 200
|
||||
rescue
|
||||
end
|
||||
error 500, JSON.generate({"OK" => false, "check" => "es"}) unless es_ok
|
||||
|
||||
JSON.generate({"OK" => true})
|
||||
end
|
||||
|
||||
get '/selftest' do
|
||||
begin
|
||||
t1 = Time.now
|
||||
status = {
|
||||
db: get_db_is_master,
|
||||
es: elasticsearch_health,
|
||||
last_post_created: (Content.last.created_at rescue nil),
|
||||
total_posts: Content.count,
|
||||
total_users: User.count,
|
||||
elapsed_time: Time.now - t1
|
||||
"db" => get_db_is_master,
|
||||
"es" => get_es_status,
|
||||
"last_post_created" => (Content.last.created_at rescue nil),
|
||||
"total_posts" => Content.count,
|
||||
"total_users" => User.count,
|
||||
"elapsed_time" => Time.now - t1
|
||||
}
|
||||
JSON.generate(status)
|
||||
rescue => ex
|
||||
[500,
|
||||
{'Content-Type' => 'text/plain'},
|
||||
"#{ex.backtrace.first}: #{ex.message} (#{ex.class})\n\t#{ex.backtrace[1..-1].join("\n\t")}"
|
||||
[ 500,
|
||||
{'Content-Type' => 'text/plain'},
|
||||
"#{ex.backtrace.first}: #{ex.message} (#{ex.class})\n\t#{ex.backtrace[1..-1].join("\n\t")}"
|
||||
]
|
||||
end
|
||||
end
|
||||
|
|
|
@ -4,4 +4,3 @@ elasticsearch_server: <%= ENV['SEARCH_SERVER'] || 'http://localhost:9200' %>
|
|||
max_deep_search_comment_count: 5000
|
||||
default_locale: <%= ENV['SERVICE_LANGUAGE'] || 'en-US' %>
|
||||
manual_pagination_batch_size: <%= ENV['MANUAL_PAGINATION_BATCH_SIZE'] || 500 %>
|
||||
enable_search: true
|
||||
|
|
|
@ -1,60 +1,32 @@
|
|||
require 'elasticsearch'
|
||||
|
||||
module TaskHelpers
|
||||
module ElasticsearchHelper
|
||||
LOG = Logger.new(STDERR)
|
||||
|
||||
def self.create_index(name=nil)
|
||||
name ||= "#{Content::ES_INDEX_NAME}_#{Time.now.strftime('%Y%m%d%H%M%S')}"
|
||||
index = Tire.index(name)
|
||||
|
||||
mappings = {}
|
||||
[Comment, CommentThread].each do |model|
|
||||
mappings.merge! model.mappings.to_hash
|
||||
LOG.info "Creating new index: #{name}..."
|
||||
index.create
|
||||
|
||||
[CommentThread, Comment].each do |model|
|
||||
LOG.info "Applying index mappings for #{model.name}"
|
||||
model.put_search_index_mapping(index)
|
||||
end
|
||||
LOG.info '...done!'
|
||||
|
||||
Elasticsearch::Model.client.indices.create(index: name, body: {mappings: mappings})
|
||||
LOG.info "Created new index: #{name}."
|
||||
name
|
||||
index
|
||||
end
|
||||
|
||||
def self.delete_index(name)
|
||||
begin
|
||||
Elasticsearch::Model.client.indices.delete(index: name)
|
||||
LOG.info "Deleted index: #{name}."
|
||||
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
||||
# NOTE (CCB): Future versions of the Elasticsearch client support the ignore parameter,
|
||||
# that can be used to ignore 404 errors.
|
||||
LOG.info "Unable to delete non-existent index: #{name}."
|
||||
end
|
||||
Tire.index(name).delete
|
||||
end
|
||||
|
||||
def self.get_index
|
||||
CommentThread.tire.index
|
||||
end
|
||||
|
||||
def self.get_index_shard_count(name)
|
||||
settings = Elasticsearch::Model.client.indices.get_settings(index: name)
|
||||
settings[name]['settings']['index']['number_of_shards']
|
||||
end
|
||||
|
||||
def self.move_alias(alias_name, index_name)
|
||||
actions = [
|
||||
{add: {index: index_name, alias: alias_name}}
|
||||
]
|
||||
|
||||
begin
|
||||
response = Elasticsearch::Model.client.indices.get_alias(name: alias_name)
|
||||
if response.length
|
||||
actions.unshift({remove: {index: response.keys.join(','), alias: alias_name}})
|
||||
end
|
||||
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
||||
# NOTE (CCB): Future versions of the Elasticsearch client support the ignore parameter,
|
||||
# that can be used to ignore 404 errors.
|
||||
end
|
||||
|
||||
body = {actions: actions}
|
||||
Elasticsearch::Model.client.indices.update_aliases(body: body)
|
||||
LOG.info "Alias [#{alias_name}] now points to index [#{index_name}]."
|
||||
end
|
||||
|
||||
def self.refresh_index(name)
|
||||
Elasticsearch::Model.client.indices.refresh(index: name)
|
||||
settings = Tire.index(name)
|
||||
settings['index.number_of_shards']
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -91,6 +91,11 @@ namespace :db do
|
|||
end
|
||||
|
||||
task :seed => [:environment, :clean] do
|
||||
Tire.index 'comment_threads' do
|
||||
delete
|
||||
end
|
||||
CommentThread.create_elasticsearch_index
|
||||
|
||||
beginning_time = Time.now
|
||||
|
||||
(1..10).map { |id| create_test_user(id) }
|
||||
|
|
|
@ -1,36 +1,170 @@
|
|||
require 'task_helpers'
|
||||
|
||||
namespace :search do
|
||||
desc 'Indexes content updated in the last N minutes.'
|
||||
task :catchup, [:minutes] => :environment do |t, args|
|
||||
start_time = Time.now - (args[:minutes].to_i * 60)
|
||||
def import_from_cursor(cursor, index, opts)
|
||||
tot = cursor.count
|
||||
cnt = 0
|
||||
t = Time.now
|
||||
index.import cursor, {:method => :paginate, :per_page => opts[:batch_size]} do |documents|
|
||||
if cnt % opts[:batch_size] == 0 then
|
||||
elapsed_secs = (Time.now - t).round(2)
|
||||
pct_complete = (100 * (cnt/tot.to_f)).round(2)
|
||||
LOG.info "#{index.name}: imported #{cnt} of #{tot} (#{pct_complete}% complete after #{elapsed_secs} seconds)"
|
||||
end
|
||||
cnt += documents.length
|
||||
sleep opts[:sleep_time]
|
||||
documents
|
||||
end
|
||||
LOG.info "#{index.name}: finished importing #{cnt} documents"
|
||||
cnt
|
||||
end
|
||||
|
||||
[Comment, CommentThread].each do |model|
|
||||
model.where(:updated_at.gte => start_time).import(index: Content::ES_INDEX_NAME)
|
||||
def move_alias_to(name, index)
|
||||
# if there was a previous index, switch over the alias to point to the new index
|
||||
alias_ = Tire::Alias.find name
|
||||
if alias_
|
||||
# does the alias already point to this index?
|
||||
if alias_.indices.include? index.name
|
||||
return false
|
||||
end
|
||||
# remove the alias from wherever it points to now
|
||||
LOG.info "alias already exists (will move): #{alias_.indices.to_ary.join(',')}"
|
||||
alias_.indices.each do |old_index_name|
|
||||
alias_.indices.delete old_index_name unless old_index_name == name
|
||||
end
|
||||
else
|
||||
# create the alias
|
||||
LOG.info "alias \"#{name}\" does not yet exist - creating."
|
||||
alias_ = Tire::Alias.new :name => name
|
||||
end
|
||||
# point the alias at our new index
|
||||
alias_.indices.add index.name
|
||||
alias_.save
|
||||
LOG.info "alias \"#{name}\" now points to index #{index.name}."
|
||||
true
|
||||
end
|
||||
|
||||
def do_reindex (opts, in_place=false)
|
||||
start_time = Time.now
|
||||
|
||||
# create the new index with a unique name
|
||||
new_index = TaskHelpers::ElasticsearchHelper.create_index
|
||||
|
||||
# unless the user is forcing a rebuild, or the index does not yet exist, we
|
||||
# can do a Tire api reindex which is much faster than reimporting documents
|
||||
# from mongo.
|
||||
#
|
||||
# Checking if the index exists is tricky. Tire automatically created an index
|
||||
# for the model class when the app loaded if one did not already exist. However,
|
||||
# it won't create an alias, which is what our app uses. So if the index exists
|
||||
# but not the alias, we know that it's auto-created.
|
||||
old_index = TaskHelpers::ElasticsearchHelper.get_index
|
||||
alias_name = old_index.name
|
||||
alias_ = Tire::Alias.find alias_name
|
||||
if alias_.nil?
|
||||
# edge case.
|
||||
# the alias doesn't exist, so we know the index was auto-created.
|
||||
# We will delete it and replace it with an alias.
|
||||
raise RuntimeError, 'Cannot reindex in-place, no valid source index' if in_place
|
||||
LOG.warn 'deleting auto-created index to make room for the alias'
|
||||
old_index.delete
|
||||
# NOTE on the small chance that another process re-auto-creates the index
|
||||
# we just deleted before we have a chance to create the alias, this next
|
||||
# call will fail.
|
||||
move_alias_to(Content::ES_INDEX_NAME, new_index_name)
|
||||
end
|
||||
|
||||
op = in_place ? 'reindex' : '(re)build index'
|
||||
LOG.info "preparing to #{op}"
|
||||
|
||||
content_types = %w(Comment CommentThread)
|
||||
if in_place
|
||||
# reindex, moving source documents directly from old index to new
|
||||
LOG.info 'copying documents from original index (this may take a while!)'
|
||||
old_index.reindex new_index.name
|
||||
LOG.info 'done copying!'
|
||||
else
|
||||
# fetch all the documents ever, up til start_time
|
||||
cursor = Content.where(:_type.in => content_types, :updated_at.lte => start_time)
|
||||
# import them to the new index
|
||||
import_from_cursor(cursor, new_index, opts)
|
||||
end
|
||||
|
||||
# move the alias if necessary
|
||||
did_alias_move = move_alias_to(Content::ES_INDEX_NAME, new_index)
|
||||
|
||||
if did_alias_move
|
||||
# Reimport any source documents that got updated since start_time,
|
||||
# while the alias still pointed to the old index.
|
||||
# Elasticsearch understands our document ids, so re-indexing the same
|
||||
# document won't create duplicates.
|
||||
LOG.info "importing any documents that changed between #{start_time} and now"
|
||||
cursor = Content.where(:_type.in => content_types, :updated_at.gte => start_time)
|
||||
import_from_cursor(cursor, new_index, opts)
|
||||
end
|
||||
end
|
||||
|
||||
desc 'Reindex all data from the database'
|
||||
task :reindex, [:index] => :environment do |t, args|
|
||||
args.with_defaults(:index => Content::ES_INDEX_NAME)
|
||||
[Comment, CommentThread].each do |model|
|
||||
model.import(index: args[:index])
|
||||
desc 'Copies contents of MongoDB into Elasticsearch if updated in the last N minutes.'
|
||||
task :catchup, [:minutes, :batch_size, :sleep_time] => :environment do |t, args|
|
||||
opts = batch_opts args
|
||||
the_index = TaskHelpers::ElasticsearchHelper.get_index
|
||||
alias_ = Tire::Alias.find the_index.name
|
||||
# this check makes sure we are working with the index to which
|
||||
# the desired model's alias presently points.
|
||||
raise RuntimeError, "could not find live index" if alias_.nil?
|
||||
start_time = Time.now - (args[:minutes].to_i * 60)
|
||||
cursor = Content.where(:_type.in => %w(Comment CommentThread), :updated_at.gte => start_time)
|
||||
import_from_cursor(cursor, the_index, opts)
|
||||
end
|
||||
|
||||
def batch_opts(args)
|
||||
args = args.to_hash
|
||||
{:batch_size => args[:batch_size].nil? ? 500 : args[:batch_size].to_i,
|
||||
:sleep_time => args[:sleep_time].nil? ? 0 : args[:sleep_time].to_i}
|
||||
end
|
||||
|
||||
desc 'Removes any data from Elasticsearch that no longer exists in MongoDB.'
|
||||
task :prune, [:batch_size, :sleep_time] => :environment do |t, args|
|
||||
opts = batch_opts args
|
||||
the_index = TaskHelpers::ElasticsearchHelper.get_index
|
||||
puts "pruning #{the_index.name}"
|
||||
alias_ = Tire::Alias.find the_index.name
|
||||
raise RuntimeError, 'could not find live index' if alias_.nil?
|
||||
scan_size = opts[:batch_size] / TaskHelpers::ElasticsearchHelper.get_index_shard_count(the_index.name)
|
||||
cnt = 0
|
||||
[CommentThread, Comment].each do |klass|
|
||||
doc_type = klass.document_type
|
||||
# this check makes sure we are working with the index to which
|
||||
# the desired model's alias presently points.
|
||||
search = Tire::Search::Scan.new the_index.name, {size: scan_size, type: doc_type}
|
||||
search.each do |results|
|
||||
es_ids = results.map(&:id)
|
||||
mongo_ids = klass.where(:id.in => es_ids).map { |d| d.id.to_s }
|
||||
to_delete = es_ids - mongo_ids
|
||||
if to_delete.size > 0
|
||||
cnt += to_delete.size
|
||||
puts "deleting #{to_delete.size} orphaned #{doc_type} documents from elasticsearch"
|
||||
the_index.bulk_delete (to_delete).map { |v| {"type" => doc_type, "id" => v} }
|
||||
end
|
||||
puts "#{the_index.name}/#{doc_type}: processed #{search.seen} of #{search.total}"
|
||||
sleep opts[:sleep_time]
|
||||
end
|
||||
end
|
||||
puts "done pruning #{the_index.name}, deleted a total of #{cnt} orphaned documents"
|
||||
end
|
||||
|
||||
desc 'Rebuild the content index from MongoDB data.'
|
||||
task :rebuild, [:batch_size, :sleep_time] => :environment do |t, args|
|
||||
do_reindex(batch_opts(args))
|
||||
end
|
||||
|
||||
desc 'Rebuild the content index from already-indexed data (in place).'
|
||||
task :reindex, [:batch_size, :sleep_time] => :environment do |t, args|
|
||||
do_reindex(batch_opts(args), true)
|
||||
end
|
||||
|
||||
desc 'Generate a new, empty physical index, without bringing it online.'
|
||||
task :create_index => :environment do
|
||||
TaskHelpers::ElasticsearchHelper.create_index
|
||||
end
|
||||
|
||||
desc 'Creates a new search index and points the "content" alias to it'
|
||||
task :initialize => :environment do
|
||||
index = TaskHelpers::ElasticsearchHelper.create_index
|
||||
TaskHelpers::ElasticsearchHelper.move_alias(Content::ES_INDEX_NAME, index)
|
||||
end
|
||||
|
||||
desc 'Sets/moves an alias to the specified index'
|
||||
task :move_alias, [:alias, :index] => :environment do |t, args|
|
||||
TaskHelpers::ElasticsearchHelper.move_alias(args[:alias], args[:index])
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
require_relative 'concerns/searchable'
|
||||
require_relative 'content'
|
||||
require_relative 'constants'
|
||||
|
||||
|
@ -7,7 +6,8 @@ class Comment < Content
|
|||
include Mongoid::Timestamps
|
||||
include Mongoid::MagicCounterCache
|
||||
include ActiveModel::MassAssignmentSecurity
|
||||
include Searchable
|
||||
include Tire::Model::Search
|
||||
include Tire::Model::Callbacks
|
||||
|
||||
voteable self, :up => +1, :down => -1
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
require 'new_relic/agent/method_tracer'
|
||||
require_relative 'concerns/searchable'
|
||||
require_relative 'content'
|
||||
require_relative 'constants'
|
||||
|
||||
|
@ -7,7 +6,8 @@ class CommentThread < Content
|
|||
include Mongoid::Timestamps
|
||||
include Mongoid::Attributes::Dynamic
|
||||
include ActiveModel::MassAssignmentSecurity
|
||||
include Searchable
|
||||
include Tire::Model::Search
|
||||
include Tire::Model::Callbacks
|
||||
extend Enumerize
|
||||
|
||||
voteable self, :up => +1, :down => -1
|
||||
|
@ -31,6 +31,7 @@ class CommentThread < Content
|
|||
|
||||
index({author_id: 1, course_id: 1})
|
||||
|
||||
|
||||
index_name Content::ES_INDEX_NAME
|
||||
|
||||
mapping do
|
||||
|
@ -39,8 +40,10 @@ class CommentThread < Content
|
|||
indexes :created_at, type: :date, included_in_all: false
|
||||
indexes :updated_at, type: :date, included_in_all: false
|
||||
indexes :last_activity_at, type: :date, included_in_all: false
|
||||
|
||||
indexes :comment_count, type: :integer, included_in_all: false
|
||||
indexes :votes_point, type: :integer, as: 'votes_point', included_in_all: false
|
||||
|
||||
indexes :context, type: :string, index: :not_analyzed, included_in_all: false
|
||||
indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false
|
||||
indexes :commentable_id, type: :string, index: :not_analyzed, included_in_all: false
|
||||
|
|
|
@ -1,40 +0,0 @@
|
|||
module Searchable
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
included do
|
||||
include Elasticsearch::Model
|
||||
|
||||
# We specify our own callbacks, instead of using Elasticsearch::Model::Callbacks, so that we can disable
|
||||
# indexing for tests where search functionality is not needed. This should improve test execution times.
|
||||
after_create :index_document
|
||||
after_update :update_indexed_document
|
||||
after_destroy :delete_document
|
||||
|
||||
def self.put_search_index_mapping(index=nil)
|
||||
index ||= self.index_name
|
||||
success = self.__elasticsearch__.client.indices.put_mapping(index: index, type: self.document_type, body: self.mappings.to_hash)
|
||||
unless success
|
||||
logger.warn "WARNING! could not apply search index mapping for #{self.name}"
|
||||
end
|
||||
end
|
||||
|
||||
def as_indexed_json(options={})
|
||||
# TODO: Play with the `MyModel.indexes` method -- reject non-mapped attributes, `:as` options, etc
|
||||
self.as_json(options.merge root: false)
|
||||
end
|
||||
|
||||
private
|
||||
def index_document
|
||||
__elasticsearch__.index_document if CommentService.search_enabled?
|
||||
end
|
||||
|
||||
# This is named in this manner to prevent collisions with Mongoid's update_document method.
|
||||
def update_indexed_document
|
||||
__elasticsearch__.update_document if CommentService.search_enabled?
|
||||
end
|
||||
|
||||
def delete_document
|
||||
__elasticsearch__.delete_document if CommentService.search_enabled?
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,10 +1,8 @@
|
|||
class Content
|
||||
|
||||
include Mongoid::Document
|
||||
include Mongo::Voteable
|
||||
|
||||
ES_INDEX_NAME = 'content'
|
||||
|
||||
|
||||
field :visible, type: Boolean, default: true
|
||||
field :abuse_flaggers, type: Array, default: []
|
||||
field :historical_abuse_flaggers, type: Array, default: [] #preserve abuse flaggers after a moderator unflags
|
||||
|
@ -18,6 +16,16 @@ class Content
|
|||
index({comment_thread_id: 1, endorsed: 1}, {sparse: true})
|
||||
index({commentable_id: 1}, {sparse: true, background: true})
|
||||
|
||||
ES_INDEX_NAME = 'content'
|
||||
|
||||
def self.put_search_index_mapping(idx=nil)
|
||||
idx ||= self.tire.index
|
||||
success = idx.mapping(self.tire.document_type, {:properties => self.tire.mapping})
|
||||
unless success
|
||||
logger.warn "WARNING! could not apply search index mapping for #{self.name}"
|
||||
end
|
||||
end
|
||||
|
||||
before_save :set_username
|
||||
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ require 'faker'
|
|||
|
||||
|
||||
describe 'app' do
|
||||
include_context 'search_enabled'
|
||||
before(:each) { set_api_key_header }
|
||||
let(:body) { Faker::Lorem.word }
|
||||
|
||||
|
|
|
@ -3,14 +3,15 @@ require 'unicode_shared_examples'
|
|||
|
||||
describe "app" do
|
||||
describe "search" do
|
||||
include_context 'search_enabled'
|
||||
|
||||
before (:each) { set_api_key_header }
|
||||
|
||||
let(:author) { create_test_user(42) }
|
||||
|
||||
let(:course_id) { "test/course/id" }
|
||||
|
||||
def get_result_ids(result)
|
||||
result["collection"].map { |t| t["id"] }
|
||||
result["collection"].map {|t| t["id"]}
|
||||
end
|
||||
|
||||
describe "GET /api/v1/search/threads" do
|
||||
|
@ -20,17 +21,17 @@ describe "app" do
|
|||
result.should == {}
|
||||
end
|
||||
|
||||
it "returns an empty result if text parameter is missing" do
|
||||
it "returns an empty reuslt if text parameter is missing" do
|
||||
get "/api/v1/search/threads", course_id: course_id
|
||||
assert_empty_response
|
||||
end
|
||||
|
||||
it "returns an empty result if sort key is invalid" do
|
||||
it "returns an empty reuslt if sort key is invalid" do
|
||||
get "/api/v1/search/threads", course_id: course_id, text: "foobar", sort_key: "invalid", sort_order: "desc"
|
||||
assert_empty_response
|
||||
end
|
||||
|
||||
it "returns an empty result if sort order is invalid" do
|
||||
it "returns an empty reuslt if sort order is invalid" do
|
||||
get "/api/v1/search/threads", course_id: course_id, text: "foobar", sort_key: "date", sort_order: "invalid"
|
||||
assert_empty_response
|
||||
end
|
||||
|
@ -68,13 +69,13 @@ describe "app" do
|
|||
last_response.should be_ok
|
||||
result = parse(last_response.body)
|
||||
actual_ids = Set.new get_result_ids(result)
|
||||
expected_ids = Set.new expected_thread_indexes.map { |i| threads[i].id.to_s }
|
||||
expected_ids = Set.new expected_thread_indexes.map {|i| threads[i].id.to_s}
|
||||
actual_ids.should == expected_ids
|
||||
end
|
||||
|
||||
it "by course_id" do
|
||||
get "/api/v1/search/threads", text: "text", course_id: "test/course/id0"
|
||||
assert_response_contains((0..29).find_all { |i| i % 2 == 0 })
|
||||
assert_response_contains((0..29).find_all {|i| i % 2 == 0})
|
||||
end
|
||||
|
||||
it "by context" do
|
||||
|
@ -86,7 +87,7 @@ describe "app" do
|
|||
user = create_test_user(Random.new)
|
||||
user.mark_as_read(threads[0])
|
||||
get "/api/v1/search/threads", text: "text", course_id: "test/course/id0", user_id: user.id, unread: true
|
||||
assert_response_contains((1..29).find_all { |i| i % 2 == 0 })
|
||||
assert_response_contains((1..29).find_all {|i| i % 2 == 0})
|
||||
end
|
||||
|
||||
it "with flagged filter" do
|
||||
|
@ -120,22 +121,22 @@ describe "app" do
|
|||
|
||||
it "by commentable_id" do
|
||||
get "/api/v1/search/threads", text: "text", commentable_id: "commentable0"
|
||||
assert_response_contains((0..29).find_all { |i| i % 3 == 0 })
|
||||
assert_response_contains((0..29).find_all {|i| i % 3 == 0})
|
||||
end
|
||||
|
||||
it "by commentable_ids" do
|
||||
get "/api/v1/search/threads", text: "text", commentable_ids: "commentable0,commentable1"
|
||||
assert_response_contains((0..29).find_all { |i| i % 3 == 0 || i % 3 == 1 })
|
||||
assert_response_contains((0..29).find_all {|i| i % 3 == 0 || i % 3 == 1})
|
||||
end
|
||||
|
||||
it "by group_id" do
|
||||
get "/api/v1/search/threads", text: "text", group_id: "1"
|
||||
assert_response_contains((0..29).find_all { |i| i % 5 == 0 || i % 5 == 1 })
|
||||
assert_response_contains((0..29).find_all {|i| i % 5 == 0 || i % 5 == 1})
|
||||
end
|
||||
|
||||
it "by group_ids" do
|
||||
get "/api/v1/search/threads", text: "text", group_ids: "1,2"
|
||||
expected_ids = (0..29).find_all { |i| i % 5 == 0 || i % 5 == 1 || i % 5 == 2 }
|
||||
expected_ids = (0..29).find_all {|i| i % 5 == 0 || i % 5 == 1 || i % 5 == 2}
|
||||
assert_response_contains(expected_ids)
|
||||
end
|
||||
|
||||
|
@ -147,8 +148,8 @@ describe "app" do
|
|||
|
||||
describe "sorting works" do
|
||||
let!(:threads) do
|
||||
threads = (0..5).map { |i| make_thread(author, "text", course_id, "dummy") }
|
||||
[1, 2].map { |i| author.vote(threads[i], :up) }
|
||||
threads = (0..5).map {|i| make_thread(author, "text", course_id, "dummy")}
|
||||
[1, 2].map {|i| author.vote(threads[i], :up)}
|
||||
[1, 3].map do |i|
|
||||
threads[i].comment_count = 5
|
||||
threads[i].save!
|
||||
|
@ -163,7 +164,7 @@ describe "app" do
|
|||
last_response.should be_ok
|
||||
result = parse(last_response.body)
|
||||
actual_ids = get_result_ids(result)
|
||||
expected_ids = expected_thread_indexes.map { |i| threads[i].id.to_s }
|
||||
expected_ids = expected_thread_indexes.map {|i| threads[i].id.to_s}
|
||||
actual_ids.should == expected_ids
|
||||
end
|
||||
|
||||
|
@ -196,7 +197,7 @@ describe "app" do
|
|||
|
||||
describe "pagination" do
|
||||
let!(:threads) do
|
||||
threads = (1..50).map { |i| make_thread(author, "text", course_id, "dummy") }
|
||||
threads = (1..50).map {|i| make_thread(author, "text", course_id, "dummy")}
|
||||
refresh_es_index
|
||||
threads
|
||||
end
|
||||
|
@ -209,7 +210,7 @@ describe "app" do
|
|||
result = parse(last_response.body)
|
||||
result_ids += get_result_ids(result)
|
||||
end
|
||||
result_ids.should == threads.reverse.map { |t| t.id.to_s }
|
||||
result_ids.should == threads.reverse.map {|t| t.id.to_s}
|
||||
end
|
||||
|
||||
it "works correctly with page size 1" do
|
||||
|
@ -226,7 +227,7 @@ describe "app" do
|
|||
end
|
||||
|
||||
describe "spelling correction" do
|
||||
let(:commentable_id) { "test_commentable" }
|
||||
let(:commentable_id) {"test_commentable"}
|
||||
|
||||
def check_correction(original_text, corrected_text)
|
||||
get "/api/v1/search/threads", text: original_text
|
||||
|
@ -291,8 +292,8 @@ describe "app" do
|
|||
end
|
||||
end
|
||||
|
||||
it 'returns the correct values for total_results and num_pages' do
|
||||
course_id = 'test/course/id'
|
||||
it "returns the correct values for total_results and num_pages" do
|
||||
course_id = "test/course/id"
|
||||
for i in 1..100 do
|
||||
text = "all"
|
||||
text += " half" if i % 2 == 0
|
||||
|
@ -301,14 +302,15 @@ describe "app" do
|
|||
text += " one" if i == 100
|
||||
# There is currently a bug that causes only 10 threads with matching
|
||||
# titles/bodies to be considered, so this test case uses comments.
|
||||
create(:comment, course_id: course_id, body: text)
|
||||
thread = make_thread(author, "dummy text", course_id, "dummy_commentable")
|
||||
make_comment(author, thread, text)
|
||||
end
|
||||
# Elasticsearch does not necessarily make newly indexed content
|
||||
# available immediately, so we must explicitly refresh the index
|
||||
refresh_es_index
|
||||
|
||||
test_text = lambda do |text, expected_total_results, expected_num_pages|
|
||||
get '/api/v1/search/threads', course_id: course_id, text: text, per_page: '10'
|
||||
get "/api/v1/search/threads", course_id: course_id, text: text, per_page: "10"
|
||||
last_response.should be_ok
|
||||
result = parse(last_response.body)
|
||||
result["total_results"].should == expected_total_results
|
||||
|
|
210
spec/app_spec.rb
210
spec/app_spec.rb
|
@ -1,174 +1,174 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe 'app' do
|
||||
describe 'access control' do
|
||||
describe "app" do
|
||||
|
||||
describe "access control" do
|
||||
let(:user) { create_test_user(42) }
|
||||
# all routes (even nonexistent ones) are covered by the api key
|
||||
# /heartbeat is the only exception, covered in the heartbeat tests below
|
||||
let(:urls) do
|
||||
{
|
||||
'/' => 404,
|
||||
"/api/v1/users/#{user.id}" => 200,
|
||||
'/api/v1/users/doesnotexist' => 404,
|
||||
'/selftest' => 200
|
||||
let(:urls) { {
|
||||
"/" => 404,
|
||||
"/api/v1/users/#{user.id}" => 200,
|
||||
"/api/v1/users/doesnotexist" => 404,
|
||||
"/selftest" => 200
|
||||
}
|
||||
end
|
||||
}
|
||||
|
||||
it 'returns 401 when api key header is not set' do
|
||||
urls.keys.each do |url|
|
||||
it "returns 401 when api key header is unset" do
|
||||
urls.each do |url, _|
|
||||
get url
|
||||
expect(last_response.status).to eq 401
|
||||
last_response.status.should == 401
|
||||
end
|
||||
end
|
||||
|
||||
it 'returns 401 when api key value is incorrect' do
|
||||
urls.keys.each do |url|
|
||||
get url, {}, {'HTTP_X_EDX_API_KEY' => "incorrect-#{TEST_API_KEY}"}
|
||||
expect(last_response.status).to eq 401
|
||||
it "returns 401 when api key value is incorrect" do
|
||||
urls.each do |url, _|
|
||||
get url, {}, {"HTTP_X_EDX_API_KEY" => "incorrect-#{TEST_API_KEY}"}
|
||||
last_response.status.should == 401
|
||||
end
|
||||
end
|
||||
|
||||
it 'allows requests when api key value is correct' do
|
||||
it "allows requests when api key value is correct" do
|
||||
urls.each do |url, status|
|
||||
get url, {}, {'HTTP_X_EDX_API_KEY' => TEST_API_KEY}
|
||||
expect(last_response.status).to eq status
|
||||
get url, {}, {"HTTP_X_EDX_API_KEY" => TEST_API_KEY}
|
||||
last_response.status.should == status
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe 'heartbeat monitoring' do
|
||||
subject do
|
||||
get '/heartbeat'
|
||||
last_response
|
||||
describe "heartbeat monitoring" do
|
||||
it "does not require api key" do
|
||||
get "/heartbeat"
|
||||
last_response.status.should == 200
|
||||
end
|
||||
|
||||
it 'does not require api key' do
|
||||
get '/heartbeat'
|
||||
expect(subject.status).to eq 200
|
||||
end
|
||||
|
||||
context 'db check' do
|
||||
context "db check" do
|
||||
def test_db_check(response, is_success)
|
||||
db = double("db")
|
||||
stub_const('Mongoid::Clients', Class.new).stub(:default).and_return(db)
|
||||
stub_const("Mongoid::Clients", Class.new).stub(:default).and_return(db)
|
||||
result = double('result')
|
||||
result.stub(:ok?).and_return(response['ok'] == 1)
|
||||
result.stub(:documents).and_return([response])
|
||||
db.should_receive(:command).with({:isMaster => 1}).and_return(result)
|
||||
|
||||
body = parse(subject.body)
|
||||
get "/heartbeat"
|
||||
if is_success
|
||||
expect(subject.status).to eq 200
|
||||
expect(body).to eq({'OK' => true})
|
||||
last_response.status.should == 200
|
||||
parse(last_response.body).should == {"OK" => true}
|
||||
else
|
||||
expect(subject.status).to eq 500
|
||||
expect(body).to eq({'OK' => false, 'check' => 'db'})
|
||||
last_response.status.should == 500
|
||||
parse(last_response.body).should == {"OK" => false, "check" => "db"}
|
||||
end
|
||||
end
|
||||
|
||||
it 'reports success when mongo is ready' do
|
||||
test_db_check({'ismaster' => true, 'ok' => 1}, true)
|
||||
it "reports success when mongo is ready" do
|
||||
test_db_check({"ismaster" => true, "ok" => 1}, true)
|
||||
end
|
||||
|
||||
it 'reports failure when mongo is not master' do
|
||||
test_db_check({'ismaster' => false, 'ok' => 1}, false)
|
||||
it "reports failure when mongo is not master" do
|
||||
test_db_check({"ismaster" => false, "ok" => 1}, false)
|
||||
end
|
||||
|
||||
it 'reports failure when mongo is not OK' do
|
||||
test_db_check({'ismaster' => true, 'ok' => 0}, false)
|
||||
it "reports failure when mongo is not OK" do
|
||||
test_db_check({"ismaster" => true, "ok" => 0}, false)
|
||||
end
|
||||
|
||||
it 'reports failure when command response is unexpected' do
|
||||
test_db_check({'foo' => 'bar'}, false)
|
||||
it "reports failure when command response is unexpected" do
|
||||
test_db_check({"foo" => "bar"}, false)
|
||||
end
|
||||
|
||||
it 'reports failure when db command raises an error' do
|
||||
db = double('db')
|
||||
stub_const('Mongoid::Clients', Class.new).stub(:default).and_return(db)
|
||||
it "reports failure when db command raises an error" do
|
||||
db = double("db")
|
||||
stub_const("Mongoid::Clients", Class.new).stub(:default).and_return(db)
|
||||
db.should_receive(:command).with({:isMaster => 1}).and_raise(StandardError)
|
||||
|
||||
expect(subject.status).to eq 500
|
||||
expect(parse(subject.body)).to eq({'OK' => false, 'check' => 'db'})
|
||||
get "/heartbeat"
|
||||
last_response.status.should == 500
|
||||
parse(last_response.body).should == {"OK" => false, "check" => "db"}
|
||||
end
|
||||
end
|
||||
|
||||
context 'elasticsearch check' do
|
||||
after(:each) { WebMock.reset! }
|
||||
context "elasticsearch check" do
|
||||
def test_es_check(response, is_success)
|
||||
# fake HTTP call
|
||||
client = double()
|
||||
tire_config = stub_const("Tire::Configuration", Class.new)
|
||||
tire_config.stub(:url).and_return("foo")
|
||||
tire_config.stub(:client).and_return(client)
|
||||
# fake HTTP response based on our response parameter
|
||||
es_response = double()
|
||||
es_response.stub(:body).and_return(JSON.generate(response))
|
||||
client.should_receive(:get).and_return(es_response)
|
||||
|
||||
def test_es_check(service_available, status='green', timed_out=false)
|
||||
body = {
|
||||
status: status,
|
||||
timed_out: timed_out,
|
||||
}
|
||||
url = "#{CommentService.config[:elasticsearch_server]}/_cluster/health"
|
||||
stub = stub_request(:any, url).to_return(body: body.to_json, headers: {'Content-Type' => 'application/json'})
|
||||
|
||||
body = parse(subject.body)
|
||||
expect(stub).to have_been_requested
|
||||
|
||||
if service_available
|
||||
expect(last_response.status).to eq 200
|
||||
expect(body).to eq({'OK' => true})
|
||||
get "/heartbeat"
|
||||
if is_success
|
||||
last_response.status.should == 200
|
||||
parse(last_response.body).should == {"OK" => true}
|
||||
else
|
||||
expect(last_response.status).to eq 500
|
||||
expect(body).to eq({'OK' => false, 'check' => 'es'})
|
||||
last_response.status.should == 500
|
||||
parse(last_response.body).should == {"OK" => false, "check" => "es"}
|
||||
end
|
||||
end
|
||||
|
||||
it 'reports success if cluster status is green' do
|
||||
test_es_check(true, 'green')
|
||||
it "reports success when es is ready" do
|
||||
test_es_check({"status" => 200}, true)
|
||||
end
|
||||
|
||||
it 'reports success if cluster status is yellow' do
|
||||
test_es_check(true, 'yellow')
|
||||
it "reports failure when es status is unexpected" do
|
||||
test_es_check({"status" => 503}, false)
|
||||
end
|
||||
|
||||
it 'reports failure if cluster status is red' do
|
||||
test_es_check(false, 'red')
|
||||
it "reports failure when es status is malformed" do
|
||||
test_es_check("", false)
|
||||
end
|
||||
|
||||
it 'reports failure if the cluster health check times out' do
|
||||
test_es_check(false, 'green', true)
|
||||
it "reports failure when the es command raises an error" do
|
||||
client = double()
|
||||
tire_config = stub_const("Tire::Configuration", Class.new)
|
||||
tire_config.stub(:url).and_return("foo")
|
||||
tire_config.stub(:client).and_raise(StandardError)
|
||||
get "/heartbeat"
|
||||
last_response.status.should == 500
|
||||
parse(last_response.body).should == {"OK" => false, "check" => "es"}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe 'selftest' do
|
||||
subject do
|
||||
get '/selftest', {}, {'HTTP_X_EDX_API_KEY' => TEST_API_KEY}
|
||||
parse(last_response.body)
|
||||
describe "selftest" do
|
||||
|
||||
it "returns valid JSON on success" do
|
||||
get "/selftest", {}, {"HTTP_X_EDX_API_KEY" => TEST_API_KEY}
|
||||
res = parse(last_response.body)
|
||||
%w(db es total_posts total_users last_post_created elapsed_time).each do |k|
|
||||
res.should have_key k
|
||||
end
|
||||
end
|
||||
|
||||
it 'returns valid JSON on success' do
|
||||
expect(subject).to include('db', 'es', 'total_posts', 'total_users', 'last_post_created', 'elapsed_time')
|
||||
it "handles when the database is empty" do
|
||||
get "/selftest", {}, {"HTTP_X_EDX_API_KEY" => TEST_API_KEY}
|
||||
res = parse(last_response.body)
|
||||
res["total_users"].should == 0
|
||||
res["total_posts"].should == 0
|
||||
res["last_post_created"].should == nil
|
||||
end
|
||||
|
||||
it 'handles when the database is empty' do
|
||||
expect(subject).to include('total_users' => 0,
|
||||
'total_posts' => 0,
|
||||
'last_post_created' => nil)
|
||||
end
|
||||
|
||||
it 'handles when the database is not empty' do
|
||||
thread = create(:comment_thread)
|
||||
expect(subject).to include(
|
||||
'total_users' => 1,
|
||||
'total_posts' => 1,
|
||||
'last_post_created' => thread.created_at.utc.iso8601)
|
||||
it "handles when the database is not empty" do
|
||||
user = create_test_user(42)
|
||||
thread = make_thread(user, "foo", "abc", "123")
|
||||
get "/selftest", {}, {"HTTP_X_EDX_API_KEY" => TEST_API_KEY}
|
||||
res = parse(last_response.body)
|
||||
res["total_users"].should == 1
|
||||
res["total_posts"].should == 1
|
||||
Time.parse(res["last_post_created"]).to_i.should == thread.created_at.to_i
|
||||
end
|
||||
|
||||
it "displays tracebacks on failure" do
|
||||
url = "#{CommentService.config[:elasticsearch_server]}/_cluster/health"
|
||||
stub = stub_request(:any, url).to_raise(StandardError)
|
||||
|
||||
get '/selftest', {}, {'HTTP_X_EDX_API_KEY' => TEST_API_KEY}
|
||||
expect(stub).to have_been_requested
|
||||
WebMock.reset!
|
||||
|
||||
expect(last_response.status).to eq 500
|
||||
expect(last_response.headers).to include('Content-Type' => 'text/plain')
|
||||
expect(last_response.body).to include 'StandardError'
|
||||
expect(last_response.body).to include File.expand_path(__FILE__)
|
||||
Tire::Configuration.client.should_receive(:get).and_raise(StandardError)
|
||||
get "/selftest", {}, {"HTTP_X_EDX_API_KEY" => TEST_API_KEY}
|
||||
last_response.status.should == 500
|
||||
# lightweight assertion that we're seeing a traceback
|
||||
last_response.headers["Content-Type"].should == 'text/plain'
|
||||
last_response.body.should include "StandardError"
|
||||
last_response.body.should include File.expand_path(__FILE__)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
|
|
@ -16,9 +16,6 @@ require 'yajl'
|
|||
require 'support/database_cleaner'
|
||||
require 'support/elasticsearch'
|
||||
require 'support/factory_girl'
|
||||
require 'webmock/rspec'
|
||||
|
||||
WebMock.allow_net_connect!
|
||||
|
||||
# setup test environment
|
||||
set :environment, :test
|
||||
|
|
|
@ -1,27 +1,27 @@
|
|||
require 'task_helpers'
|
||||
|
||||
def refresh_es_index
|
||||
TaskHelpers::ElasticsearchHelper.refresh_index(Content::ES_INDEX_NAME)
|
||||
def delete_es_index
|
||||
Tire.index Content::ES_INDEX_NAME do
|
||||
delete
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
RSpec.shared_context 'search_enabled' do
|
||||
before(:all) do
|
||||
CommentService.config[:enable_search] = true
|
||||
def create_es_index
|
||||
new_index = Tire.index Content::ES_INDEX_NAME
|
||||
new_index.create
|
||||
[CommentThread, Comment].each do |klass|
|
||||
klass.put_search_index_mapping
|
||||
end
|
||||
end
|
||||
|
||||
before(:each) do
|
||||
index = TaskHelpers::ElasticsearchHelper.create_index
|
||||
TaskHelpers::ElasticsearchHelper.move_alias(Content::ES_INDEX_NAME, index)
|
||||
end
|
||||
|
||||
after(:each) do
|
||||
TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME)
|
||||
def refresh_es_index
|
||||
es_index_name = Content::ES_INDEX_NAME
|
||||
Tire.index es_index_name do
|
||||
refresh
|
||||
end
|
||||
end
|
||||
|
||||
RSpec.configure do |config|
|
||||
config.before(:suite) do
|
||||
CommentService.config[:enable_search] = false
|
||||
config.before(:each) do
|
||||
delete_es_index
|
||||
create_es_index
|
||||
end
|
||||
end
|
||||
|
|
Загрузка…
Ссылка в новой задаче