Browse Source

Connections to dnodes slow down

See #1435.
Prior to this commit, if connections were immediately closed,
no backoff or jitter were applied to delay the next connection attempt,
resulting in too many connection attempts to directory nodes that the
client could not reach, and spam in the logs.
After this commit, we enforce that we always shut down the ClientService
on any disconnection trigger or connection failure, and only attempt to
reconnect with a manual backoff delay (4 seconds increasing by 50% for
20 attempts, plus a few seconds jitter), for directory nodes, while for
non-directory nodes we always give up on any failure. Max delay is a bit
above 3 hours.
master
Adam Gibson 3 years ago
parent
commit
9a412d8be0
No known key found for this signature in database
GPG Key ID: 141001A1AF77F20B
  1. 68
      jmdaemon/jmdaemon/onionmc.py

68
jmdaemon/jmdaemon/onionmc.py

@ -231,7 +231,7 @@ class OnionLineProtocolFactory(protocol.ServerFactory):
proto.message(message) proto.message(message)
return True return True
class OnionClientFactory(protocol.ReconnectingClientFactory): class OnionClientFactory(protocol.ClientFactory):
""" We define a distinct protocol factory for outbound connections. """ We define a distinct protocol factory for outbound connections.
Notably, this factory supports only *one* protocol instance at a time. Notably, this factory supports only *one* protocol instance at a time.
""" """
@ -267,8 +267,8 @@ class OnionClientFactory(protocol.ReconnectingClientFactory):
if self.directory and not self.mc.give_up: if self.directory and not self.mc.give_up:
if reactor.running: if reactor.running:
log.info('Attempting to reconnect...') log.info('Attempting to reconnect...')
protocol.ReconnectingClientFactory.clientConnectionLost(self, protocol.ClientFactory.clientConnectionLost(self,
connector, reason) connector, reason)
def clientConnectionFailed(self, connector, reason): def clientConnectionFailed(self, connector, reason):
log.info('Onion client connection failed: ' + str(reason)) log.info('Onion client connection failed: ' + str(reason))
@ -276,8 +276,8 @@ class OnionClientFactory(protocol.ReconnectingClientFactory):
if self.directory and not self.mc.give_up: if self.directory and not self.mc.give_up:
if reactor.running: if reactor.running:
log.info('Attempting to reconnect...') log.info('Attempting to reconnect...')
protocol.ReconnectingClientFactory.clientConnectionFailed(self, protocol.ClientFactory.clientConnectionFailed(self,
connector, reason) connector, reason)
def register_connection(self, p: OnionLineProtocol) -> None: def register_connection(self, p: OnionLineProtocol) -> None:
self.proto_client = p self.proto_client = p
self.connection_callback() self.connection_callback()
@ -302,6 +302,8 @@ class OnionClientFactory(protocol.ReconnectingClientFactory):
self.message_receive_callback(message) self.message_receive_callback(message)
class OnionPeer(object): class OnionPeer(object):
""" Class encapsulating a peer we connect to.
"""
def __init__(self, messagechannel: 'OnionMessageChannel', def __init__(self, messagechannel: 'OnionMessageChannel',
socks5_host: str, socks5_port: int, socks5_host: str, socks5_port: int,
@ -513,33 +515,31 @@ class OnionPeer(object):
self.port) self.port)
self.reconnecting_service = ClientService(onionEndpoint, self.factory) self.reconnecting_service = ClientService(onionEndpoint, self.factory)
# if we want to actually do something about an unreachable host, # if we want to actually do something about an unreachable host,
# we have to force t.a.i.ClientService to give up after the timeout: # we have to force t.a.i.ClientService to give up after the timeout
d = self.reconnecting_service.whenConnected(failAfterFailures=1) d = self.reconnecting_service.whenConnected(failAfterFailures=1)
d.addErrback(self.respond_to_connection_failure) d.addCallbacks(self.respond_to_connection_success,
self.respond_to_connection_failure)
self.reconnecting_service.startService() self.reconnecting_service.startService()
def respond_to_connection_failure(self, failure): def respond_to_connection_success(self, proto) -> None:
self.connecting = False
def respond_to_connection_failure(self, failure) -> None:
self.connecting = False self.connecting = False
# the error will be one of these if we just fail # the error will be one of these if we just fail
# to connect to the other side. # to connect to the other side.
f = failure.trap(HostUnreachableError, SocksError, GeneralServerFailureError) failure.trap(HostUnreachableError, SocksError, GeneralServerFailureError)
# if this is a non-dir reachable peer, we just record comment = "" if self.directory else "; giving up."
# the failure and explicitly give up: log.info(f"Failed to connect to peer {self.peer_location()}{comment}")
if not self.directory: self.reconnecting_service.stopService()
log.info("We failed to connect to peer {}; giving up".format(
self.peer_location()))
self.reconnecting_service.stopService()
else:
# in this case, the still-running ClientService will
# just keep trying:
log.warn("We failed to connect to directory {}; trying "
"again.".format(self.peer_location()))
def register_connection(self) -> None: def register_connection(self) -> None:
self.messagechannel.register_connection(self.peer_location(), self.messagechannel.register_connection(self.peer_location(),
direction=1) direction=1)
def register_disconnection(self) -> None: def register_disconnection(self) -> None:
# for non-directory peers, just stop
self.reconnecting_service.stopService()
self.messagechannel.register_disconnection(self.peer_location()) self.messagechannel.register_disconnection(self.peer_location())
def try_to_connect(self) -> None: def try_to_connect(self) -> None:
@ -583,19 +583,20 @@ class OnionPeerPassive(OnionPeer):
class OnionDirectoryPeer(OnionPeer): class OnionDirectoryPeer(OnionPeer):
delay = 4.0 delay = 4.0
def try_to_connect(self) -> None: def try_to_connect(self) -> None:
# Delay deliberately expands out to very # Delay deliberately expands out to very
# long times as yg-s tend to be very long # long times as yg-s tend to be very long
# running bots: # running bots:
self.delay *= 1.5 # We will only expand delay 20 times max
if self.delay > 10000: # (4 * 1.5^19 = 8867.3)
log.warn("Cannot connect to directory node peer: {} " if self.delay < 8868:
"after 20 attempts, giving up.".format(self.peer_location())) self.delay *= 1.5
return # randomize by a few seconds to minimize bursty-ness locally
try: jitter = random.randint(-1, 5)
self.connect() log.info(f"Going to reattempt connection to {self.peer_location()} in "
except OnionPeerConnectionError: f"{self.delay + jitter} seconds.")
reactor.callLater(self.delay, self.try_to_connect) reactor.callLater(self.delay + jitter, self.connect)
def register_connection(self) -> None: def register_connection(self) -> None:
self.messagechannel.update_directory_map(self, connected=True) self.messagechannel.update_directory_map(self, connected=True)
@ -604,7 +605,14 @@ class OnionDirectoryPeer(OnionPeer):
def register_disconnection(self) -> None: def register_disconnection(self) -> None:
self.messagechannel.update_directory_map(self, connected=False) self.messagechannel.update_directory_map(self, connected=False)
super().register_disconnection() super().register_disconnection()
# for directory peers, we persist in trying to establish
# a connection, but with backoff:
self.try_to_connect()
def respond_to_connection_failure(self, failure) -> None:
super().respond_to_connection_failure(failure)
# same logic as for register_disconnection
self.try_to_connect()
class OnionMessageChannel(MessageChannel): class OnionMessageChannel(MessageChannel):
""" Sends messages to other nodes of the same type over Tor """ Sends messages to other nodes of the same type over Tor

Loading…
Cancel
Save