Skip to content

Commit

Permalink
Unified the make_call behavior across clients.
Browse files Browse the repository at this point in the history
  • Loading branch information
obino committed Aug 11, 2015
1 parent bf6a04e commit e4e6b8a
Show file tree
Hide file tree
Showing 7 changed files with 274 additions and 223 deletions.
48 changes: 25 additions & 23 deletions AppController/lib/app_controller_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,16 @@ def initialize(ip, secret)
@conn.add_method("add_appserver_process", "app_id", "secret")
@conn.add_method("remove_appserver_process", "app_id", "port", "secret")
end


# Provides automatic retry logic for transient SOAP errors.

# Provides automatic retry logic for transient SOAP errors. This code is
# used in few others client (it should be made in a library):
# lib/infrastructure_manager_client.rb
# lib/user_app_client.rb
# lib/taskqueue_client.rb
# lib/app_manager_client.rb
# lib/app_controller_client.rb
# Modification in this function should be reflected on the others too.
#
# Args:
# time: A Fixnum that indicates how long the timeout should be set to when
Expand All @@ -108,39 +115,34 @@ def make_call(time, retry_on_except, callr)
refused_count = 0
max = 5

# Do we need to retry at all?
if not retry_on_except
refused_count = max + 1
end

begin
Timeout::timeout(time) {
yield if block_given?
}
rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH
rescue Timeout::Error
Djinn.log_warn("[#{callr}] SOAP call to #{@ip} timed out")
raise FailedNodeException.new("Time out: is the AppController running?")
rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH,
OpenSSL::SSL::SSLError, NotImplementedError, Errno::EPIPE,
Errno::ECONNRESET, SOAP::EmptyResponseError, Exception => e
trace = e.backtrace.join("\n")
Djinn.log_warn("[#{callr}] exception in make_call to #{@ip}: #{e.class}\n#{trace}")
if refused_count > max
raise FailedNodeException.new("Connection was refused. Is the " +
"AppController running?")
raise FailedNodeException.new("[#{callr}] failed to interact with #{@ip}.")
else
refused_count += 1
Kernel.sleep(1)
Kernel.sleep(3)
retry
end
rescue Timeout::Error
Djinn.log_warn("[#{callr}] SOAP call to #{@ip} timed out")
return
rescue OpenSSL::SSL::SSLError, NotImplementedError, Errno::EPIPE,
Errno::ECONNRESET, SOAP::EmptyResponseError => e
backtrace = e.backtrace.join("\n")
Djinn.log_warn("Error in make_call: #{e.class}\n#{backtrace}")
retry
rescue Exception => except
if retry_on_except
retry
else
trace = except.backtrace.join("\n")
HelperFunctions.log_and_crash("[#{callr}] We saw an unexpected error" +
" of the type #{except.class} with the following message:\n" +
"#{except}, with trace: #{trace}")
end
end
end


def set_parameters(locations, options, apps_to_start)
result = ""
make_call(10, ABORT_ON_FAIL, "set_parameters") {
Expand Down
154 changes: 80 additions & 74 deletions AppController/lib/app_manager_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,87 +34,93 @@ def initialize(ip)
@conn.add_method("restart_app_instances_for_app", "app_name", "language")
end

# Make a SOAP call out to the AppManager.
#
# Args:
# timeout: The maximum time to wait on a remote call
# retry_on_except: Boolean if we should keep retrying the
# the call
# Returns:
# The result of the remote call.
# TODO:
# This code was copy/pasted from app_controller_client
# and can be factored out to a library. Note this for
# the transition to the python port.
# Provides automatic retry logic for transient SOAP errors. This code is
# used in few others client (it should be made in a library):
# lib/infrastructure_manager_client.rb
# lib/user_app_client.rb
# lib/taskqueue_client.rb
# lib/app_manager_client.rb
# lib/app_controller_client.rb
# Modification in this function should be reflected on the others too.
#
def make_call(timeout, retry_on_except, callr)
result = ""
Djinn.log_debug("Calling the AppManager - #{callr}")
# Args:
# time: A Fixnum that indicates how long the timeout should be set to when
# executing the caller's block.
# retry_on_except: A boolean that indicates if non-transient Exceptions
# should result in the caller's block being retried or not.
# callr: A String that names the caller's method, used for debugging
# purposes.
#
# Raises:
# FailedNodeException: if the given block contacted a machine that
# is either not running or is rejecting connections.
# SystemExit: If a non-transient Exception was thrown when executing the
# given block.
# Returns:
# The result of the block that was executed, or nil if the timeout was
# exceeded.
def make_call(time, retry_on_except, callr)
refused_count = 0
max = 5

# Do we need to retry at all?
if not retry_on_except
refused_count = max + 1
end

begin
Timeout::timeout(timeout) do
begin
yield if block_given?
end
end
rescue OpenSSL::SSL::SSLError => e
Djinn.log_warn("Saw a SSLError when calling #{callr}" +
" - trying again momentarily.")
backtrace = e.backtrace.join("\n")
Djinn.log_warn("Exception: #{e.class}\n#{backtrace}")
retry
rescue Errno::ECONNREFUSED => except
if retry_on_except
Djinn.log_warn("Saw a connection refused when calling #{callr}" +
" - trying again momentarily.")
sleep(1)
retry
Timeout::timeout(time) {
yield if block_given?
}
rescue Timeout::Error
Djinn.log_warn("[#{callr}] SOAP call to #{@ip} timed out")
raise FailedNodeException.new("Time out: is the AppController running?")
rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH,
OpenSSL::SSL::SSLError, NotImplementedError, Errno::EPIPE,
Errno::ECONNRESET, SOAP::EmptyResponseError, Exception => e
trace = e.backtrace.join("\n")
Djinn.log_warn("[#{callr}] exception in make_call to #{@ip}: #{e.class}\n#{trace}")
if refused_count > max
raise FailedNodeException.new("[#{callr}] failed to interact with #{@ip}.")
else
trace = except.backtrace.join("\n")
HelperFunctions.log_and_crash("We saw an unexpected error of the " +
"type #{except.class} with the following message:\n#{except}, with" +
" trace: #{trace}")
end
rescue Exception => except
if except.class == Interrupt
Djinn.log_fatal("Saw an Interrupt exception")
HelperFunctions.log_and_crash("Saw an Interrupt Exception")
refused_count += 1
Kernel.sleep(3)
retry
end

Djinn.log_error("An exception of type #{except.class} was thrown: #{except}.")
retry if retry_on_except
end
end


# Wrapper for SOAP call to the AppManager to start an process instance of
# an application server.
#
# Args:
# app_name: Name of the application
# app_port: The port to run the application server
# load_balancer_ip: The public IP of the load balancer
# load_balancer_port: The port of the load balancer
# language: The language the application is written in
# xmpp_ip: The IP for XMPP
# db_locations: An Array of datastore server IPs
# env_vars: A Hash of environemnt variables that should be passed to the
# application to start.
# max_memory: An Integer that names the maximum amount of memory (in
# megabytes) that should be used for this App Engine app.
# Returns:
# The PID of the process started
# Note:
# We currently send hashes over in SOAP using json because
# of incompatibilities between SOAP mappings from ruby to python.
# As we convert over to python we should use native dictionaries.
#
def start_app(app_name,
app_port,
load_balancer_ip,
language,
xmpp_ip,
db_locations,
env_vars,
max_memory=500)
# Wrapper for SOAP call to the AppManager to start an process instance of
# an application server.
#
# Args:
# app_name: Name of the application
# app_port: The port to run the application server
# load_balancer_ip: The public IP of the load balancer
# load_balancer_port: The port of the load balancer
# language: The language the application is written in
# xmpp_ip: The IP for XMPP
# db_locations: An Array of datastore server IPs
# env_vars: A Hash of environemnt variables that should be passed to the
# application to start.
# max_memory: An Integer that names the maximum amount of memory (in
# megabytes) that should be used for this App Engine app.
# Returns:
# The PID of the process started
# Note:
# We currently send hashes over in SOAP using json because
# of incompatibilities between SOAP mappings from ruby to python.
# As we convert over to python we should use native dictionaries.
#
def start_app(app_name,
app_port,
load_balancer_ip,
language,
xmpp_ip,
db_locations,
env_vars,
max_memory=500)
config = {'app_name' => app_name,
'app_port' => app_port,
'load_balancer_ip' => load_balancer_ip,
Expand Down
39 changes: 23 additions & 16 deletions AppController/lib/helperfunctions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -253,32 +253,39 @@ def self.sleep_until_port_is_closed(ip, port, use_ssl=DONT_USE_SSL)


def self.is_port_open?(ip, port, use_ssl=DONT_USE_SSL)
max = 2
refused_count = 0

begin
Timeout::timeout(1) do
begin
sock = TCPSocket.new(ip, port)
if use_ssl
ssl_context = OpenSSL::SSL::SSLContext.new()
unless ssl_context.verify_mode
ssl_context.verify_mode = OpenSSL::SSL::VERIFY_NONE
end
sslsocket = OpenSSL::SSL::SSLSocket.new(sock, ssl_context)
sslsocket.sync_close = true
sslsocket.connect
sock = TCPSocket.new(ip, port)
if use_ssl
ssl_context = OpenSSL::SSL::SSLContext.new()
unless ssl_context.verify_mode
ssl_context.verify_mode = OpenSSL::SSL::VERIFY_NONE
end
sock.close
return true
rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET
return false
sslsocket = OpenSSL::SSL::SSLSocket.new(sock, ssl_context)
sslsocket.sync_close = true
sslsocket.connect
end
sock.close
return true
end
rescue Timeout::Error
rescue OpenSSL::SSL::SSLError
Djinn.log_debug("Retry after SSL error talking to #{ip}:#{port}")
refused_count += 1
if refused_count > max
Djinn.log_warn("[is_port_open]: saw SSL error talking to #{ip}:#{port}")
else
Kernel.sleep(1)
retry
end
rescue
end

return false
end


def self.run_remote_command(ip, command, public_key_loc, want_output)
Djinn.log_debug("ip is [#{ip}], command is [#{command}], public key is [#{public_key_loc}], want output? [#{want_output}]")
public_key_loc = File.expand_path(public_key_loc)
Expand Down
Loading

0 comments on commit e4e6b8a

Please sign in to comment.