зеркало из https://github.com/mozilla/labs-vcap.git
warden: Use tiny C tool to detect OOM
This removes the dependency on sleepy_penguin, and all calls to #watch and #unbind outside of em-posix-spawn. Less #watch and #unbind equals more stable EM. Change-Id: I89af9cb3567c8e8c5db50779df5fdd1305c84099
This commit is contained in:
Родитель
d686dcbe68
Коммит
b0206355d8
|
@ -7,10 +7,6 @@ gem "eventmachine", "0.12.11.cloudfoundry.3"
|
|||
gem "yajl-ruby"
|
||||
gem "em-posix-spawn", '> 0.0.1'
|
||||
|
||||
group :linux do
|
||||
gem "sleepy_penguin"
|
||||
end
|
||||
|
||||
group :spec do
|
||||
gem "rspec"
|
||||
gem "warden-client"
|
||||
|
|
|
@ -27,7 +27,6 @@ GEM
|
|||
rspec-expectations (2.8.0)
|
||||
diff-lcs (~> 1.1.2)
|
||||
rspec-mocks (2.8.0)
|
||||
sleepy_penguin (3.0.1)
|
||||
thin (1.3.1)
|
||||
daemons (>= 1.0.9)
|
||||
eventmachine (>= 0.12.6)
|
||||
|
@ -53,7 +52,6 @@ DEPENDENCIES
|
|||
eventmachine (= 0.12.11.cloudfoundry.3)
|
||||
rake
|
||||
rspec
|
||||
sleepy_penguin
|
||||
vcap_common
|
||||
vcap_logging
|
||||
warden-client
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
require "warden/errors"
|
||||
require "warden/logger"
|
||||
require "sleepy_penguin"
|
||||
require "warden/container/spawn"
|
||||
|
||||
module Warden
|
||||
|
||||
|
@ -10,52 +10,42 @@ module Warden
|
|||
|
||||
module MemLimit
|
||||
|
||||
class OomNotifier < EM::Connection
|
||||
class << self
|
||||
def for_container(container)
|
||||
cgroup_root = container.cgroup_root_path
|
||||
cio = File.open(File.join(cgroup_root, 'memory.oom_control'), File::RDONLY)
|
||||
eio = SleepyPenguin::EventFD.new(0, :NONBLOCK)
|
||||
class OomNotifier
|
||||
|
||||
ctrl_file = File.open(File.join(cgroup_root, 'cgroup.event_control'), File::WRONLY)
|
||||
ctrl_file.syswrite(["#{eio.fileno} #{cio.fileno} 1"].pack("Z*"))
|
||||
ctrl_file.close
|
||||
cio.close
|
||||
include Spawn
|
||||
include Logger
|
||||
|
||||
notifier = EM.attach(eio, OomNotifier)
|
||||
notifier.container = container
|
||||
notifier
|
||||
end
|
||||
end
|
||||
attr_reader :container
|
||||
|
||||
def container=(container)
|
||||
def initialize(container)
|
||||
@container = container
|
||||
end
|
||||
|
||||
def container
|
||||
@container
|
||||
end
|
||||
oom_notifier_path = File.expand_path("../../../../../src/oom", __FILE__)
|
||||
@child = DeferredChild.new(oom_notifier_path, container.cgroup_root_path)
|
||||
|
||||
# We don't care about the data written to us. Its only purpose is to
|
||||
# notify us that a process inside the container OOMed
|
||||
def receive_data(_)
|
||||
# We rely on container destruction to unregister ourselves from
|
||||
# the event loop and close our event fd (by calling #unregister).
|
||||
#
|
||||
# NB: This is executed on the next tick of the reactor to avoid
|
||||
# doing a detach inside the read callback.
|
||||
EM.next_tick do
|
||||
Fiber.new do
|
||||
self.container.oomed
|
||||
end.resume
|
||||
# Zero exit status means a process OOMed, non-zero means an error occurred
|
||||
@child.callback do
|
||||
if @child.success?
|
||||
Fiber.new do
|
||||
container.oomed
|
||||
end.resume
|
||||
else
|
||||
debug "stderr: #{@child.err}"
|
||||
end
|
||||
end
|
||||
|
||||
# Don't care about errback, nothing we can do
|
||||
end
|
||||
|
||||
def unregister
|
||||
detach
|
||||
@io.close rescue nil
|
||||
# Overwrite callback
|
||||
@child.callback do
|
||||
# Nothing
|
||||
end
|
||||
|
||||
# TODO: kill child
|
||||
end
|
||||
end # OomNotifier
|
||||
end
|
||||
|
||||
def oomed
|
||||
self.warn "OOM condition occurred inside container #{self.handle}"
|
||||
|
@ -88,7 +78,7 @@ module Warden
|
|||
# avoid a race between when the limit is set and when the oom
|
||||
# notifier is registered.
|
||||
unless @oom_notifier
|
||||
@oom_notifier = OomNotifier.for_container(self)
|
||||
@oom_notifier = OomNotifier.new(self)
|
||||
on(:after_stop) do
|
||||
if @oom_notifier
|
||||
self.debug "Unregistering OOM Notifier for container '#{self.handle}'"
|
||||
|
|
|
@ -52,25 +52,43 @@ module Warden
|
|||
attr_reader :argv
|
||||
attr_reader :options
|
||||
|
||||
def out
|
||||
@child.out
|
||||
end
|
||||
|
||||
def err
|
||||
@child.err
|
||||
end
|
||||
|
||||
def status
|
||||
@child.status
|
||||
end
|
||||
|
||||
def runtime
|
||||
@child.runtime
|
||||
end
|
||||
|
||||
def success?
|
||||
@child.success?
|
||||
end
|
||||
|
||||
def exit_status
|
||||
@child.status.exitstatus
|
||||
end
|
||||
|
||||
def initialize(*args)
|
||||
@env, @argv, @options = extract_process_spawn_arguments(*args)
|
||||
|
||||
p = Child.new(env, *(argv + [options]))
|
||||
@child = Child.new(env, *(argv + [options]))
|
||||
|
||||
p.callback {
|
||||
unless p.success?
|
||||
# Log stderr. Don't use this as message for the raised error to
|
||||
# prevent internal information from leaking to clients.
|
||||
error "stderr: #{p.err.inspect}"
|
||||
@child.callback do
|
||||
# Log stderr when command didn't exit successfully
|
||||
error "stderr: #{err.inspect}" unless success?
|
||||
|
||||
err = WardenError.new("command exited with failure")
|
||||
set_deferred_failure(err)
|
||||
else
|
||||
set_deferred_success(p.out)
|
||||
end
|
||||
}
|
||||
set_deferred_success
|
||||
end
|
||||
|
||||
p.errback { |err|
|
||||
@child.errback do |err|
|
||||
if err == MaximumOutputExceeded
|
||||
err = WardenError.new("command exceeded maximum output")
|
||||
elsif err == TimeoutExceeded
|
||||
|
@ -80,11 +98,11 @@ module Warden
|
|||
end
|
||||
|
||||
set_deferred_failure(err)
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
# Helper to inject log message
|
||||
def set_deferred_success(result)
|
||||
def set_deferred_success
|
||||
debug "successfully ran #{argv.inspect}"
|
||||
super
|
||||
end
|
||||
|
@ -97,10 +115,23 @@ module Warden
|
|||
|
||||
def yield
|
||||
f = Fiber.current
|
||||
callback { |result| f.resume(:ok, result) }
|
||||
errback { |err| f.resume(:err, err) }
|
||||
|
||||
callback do
|
||||
if success?
|
||||
f.resume(:ok, out)
|
||||
else
|
||||
f.resume(:err, WardenError.new("command exited with failure"))
|
||||
end
|
||||
end
|
||||
|
||||
errback do |err|
|
||||
f.resume(:err, err)
|
||||
end
|
||||
|
||||
status, result = Fiber.yield
|
||||
|
||||
raise result if status == :err
|
||||
|
||||
result
|
||||
end
|
||||
end
|
||||
|
|
|
@ -18,5 +18,8 @@ runner: runner.o
|
|||
clone: clone.o
|
||||
$(CC) -o $@ -lutil $^
|
||||
|
||||
oom: oom.o
|
||||
$(CC) -o $@ $^
|
||||
|
||||
%.o: %.c
|
||||
$(CC) -c -Wall -D_GNU_SOURCE $(OPTIMIZATION) $(DEBUG) $(CFLAGS) $<
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
#include <sys/param.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int event_fd = -1;
|
||||
char oom_control_path[PATH_MAX];
|
||||
size_t oom_control_path_len;
|
||||
int oom_control_fd = -1;
|
||||
char event_control_path[PATH_MAX];
|
||||
size_t event_control_path_len;
|
||||
int event_control_fd = -1;
|
||||
char line[LINE_MAX];
|
||||
size_t line_len;
|
||||
int rv;
|
||||
uint64_t result;
|
||||
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "Usage: %s <path to cgroup>\n", argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Open event fd */
|
||||
event_fd = eventfd(0, 0);
|
||||
if (event_fd == -1) {
|
||||
perror("eventfd");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Open oom control file */
|
||||
oom_control_path_len = snprintf(oom_control_path, sizeof(oom_control_path), "%s/memory.oom_control", argv[1]);
|
||||
assert(oom_control_path_len < sizeof(oom_control_path));
|
||||
|
||||
oom_control_fd = open(oom_control_path, O_RDONLY);
|
||||
if (oom_control_fd == -1) {
|
||||
perror("open");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Open event control file */
|
||||
event_control_path_len = snprintf(event_control_path, sizeof(event_control_path), "%s/cgroup.event_control", argv[1]);
|
||||
assert(event_control_path_len < sizeof(event_control_path));
|
||||
|
||||
event_control_fd = open(event_control_path, O_WRONLY);
|
||||
if (event_control_fd == -1) {
|
||||
perror("open");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Write event fd and oom control fd to event control fd */
|
||||
line_len = snprintf(line, sizeof(line), "%d %d\n", event_fd, oom_control_fd);
|
||||
assert(line_len < sizeof(line));
|
||||
|
||||
rv = write(event_control_fd, line, line_len);
|
||||
if (rv == -1) {
|
||||
perror("write");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Read oom */
|
||||
do {
|
||||
rv = read(event_fd, &result, sizeof(result));
|
||||
} while (rv == -1 && errno == EINTR);
|
||||
|
||||
if (rv == -1) {
|
||||
perror("read");
|
||||
goto err;
|
||||
}
|
||||
|
||||
assert(rv == sizeof(result));
|
||||
|
||||
rv = access(event_control_path, W_OK);
|
||||
if (rv == -1 && errno == ENOENT) {
|
||||
/* The cgroup appears to be removed */
|
||||
perror("access");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (rv == -1) {
|
||||
perror("access");
|
||||
goto err;
|
||||
}
|
||||
|
||||
fprintf(stdout, "oom");
|
||||
|
||||
rv = 0;
|
||||
goto out;
|
||||
|
||||
err:
|
||||
rv = 1;
|
||||
goto out;
|
||||
|
||||
out:
|
||||
if (event_fd >= 0) {
|
||||
close(event_fd);
|
||||
}
|
||||
|
||||
if (oom_control_fd >= 0) {
|
||||
close(oom_control_fd);
|
||||
}
|
||||
|
||||
if (event_control_fd >= 0) {
|
||||
close(event_control_fd);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
Загрузка…
Ссылка в новой задаче