node.py: add BASEDIR/keepalive_timeout and BASEDIR/disconnect_timeout, to set/enable the foolscap timers, for #521

This commit is contained in:
Brian Warner 2008-09-24 10:51:12 -07:00
parent 69bc4624c3
commit dd9171eb72
3 changed files with 48 additions and 8 deletions

View File

@ -57,6 +57,25 @@ specification, like:
Lines that do not provide a port number will use the same client.port as the
automatically-discovered addresses.
keepalive_timeout (optional): If present, this is treated as an integral
number of seconds, and sets the Foolscap "keepalive timer" to that value. For
each connection to another node, if nothing has been heard for a while, we
will attempt to provoke the other end into saying something. The duration of
silence that passes before sending the PING will be between KT and 2*KT. This
is mainly intended to keep NAT boxes from expiring idle TCP sessions, but
also gives TCP's long-duration keepalive/disconnect timers some traffic to
work with. The default value is 240 (i.e. 4 minutes).
disconnect_timeout (optional): If present, this is treated as an integral
number of seconds, and sets the Foolscap "disconnect timer" to that value.
For each connection to another node, if nothing has been heard for a while,
we will drop the connection. The duration of silence that passes before
dropping the connection will be between DT-2*KT and 2*DT+2*KT (please see
ticket #521 for more details). If we are sending a large amount of data to
the other end (which takes more than DT-2*KT to deliver), we might
incorrectly drop the connection. The default behavior (when this file does
not exist) is to disable the disconnect timer.
authorized_keys.SSHPORT (optional): This enables an SSH-based interactive
Python shell, which can be used to inspect the internal state of the node,
for debugging. To cause the node to accept SSH connections on port 8022,

View File

@ -56,6 +56,22 @@ class Node(service.MultiService):
self.tub = Tub(certFile=certfile)
self.tub.setOption("logLocalFailures", True)
self.tub.setOption("logRemoteFailures", True)
# see #521 for a discussion of how to pick these timeout values. Using
# 30 minutes means we'll disconnect after 22 to 68 minutes of
# inactivity. Receiving data will reset this timeout, however if we
# have more than 22min of data in the outbound queue (such as 800kB
# in two pipelined segments of 10 shares each) and the far end has no
# need to contact us, our ping might be delayed, so we may disconnect
# them by accident.
keepalive_timeout_s = self.get_config("keepalive_timeout")
if keepalive_timeout_s:
self.tub.setOption("keepaliveTimeout", int(keepalive_timeout_s))
disconnect_timeout_s = self.get_config("disconnect_timeout")
if disconnect_timeout_s:
# N.B.: this is in seconds, so use "1800" to get 30min
self.tub.setOption("disconnectTimeout", int(disconnect_timeout_s))
self.nodeid = b32decode(self.tub.tubID.upper()) # binary format
self.write_config("my_nodeid", b32encode(self.nodeid).lower() + "\n")
self.short_nodeid = b32encode(self.nodeid).lower()[:8] # ready for printing

View File

@ -347,20 +347,25 @@ class SystemTestMixin(testutil.PollMixin, testutil.StallMixin):
f.write(SYSTEM_TEST_CERTS[i+1])
f.close()
def write(name, value):
open(os.path.join(basedir, name), "w").write(value+"\n")
if i == 0:
# client[0] runs a webserver and a helper, no key_generator
open(os.path.join(basedir, "webport"), "w").write("tcp:0:interface=127.0.0.1")
open(os.path.join(basedir, "run_helper"), "w").write("yes\n")
open(os.path.join(basedir, "sizelimit"), "w").write("10GB\n")
write("webport", "tcp:0:interface=127.0.0.1")
write("run_helper", "yes")
write("sizelimit", "10GB")
write("keepalive_timeout", "600")
if i == 3:
# client[3] runs a webserver and uses a helper, uses key_generator
open(os.path.join(basedir, "webport"), "w").write("tcp:0:interface=127.0.0.1")
# client[3] runs a webserver and uses a helper, uses
# key_generator
write("webport", "tcp:0:interface=127.0.0.1")
write("disconnect_timeout", "1800")
if self.key_generator_furl:
kgf = "%s\n" % (self.key_generator_furl,)
open(os.path.join(basedir, "key_generator.furl"), "w").write(kgf)
open(os.path.join(basedir, "introducer.furl"), "w").write(self.introducer_furl)
write("key_generator.furl", kgf)
write("introducer.furl", self.introducer_furl)
if self.stats_gatherer_furl:
open(os.path.join(basedir, "stats_gatherer.furl"), "w").write(self.stats_gatherer_furl)
write("stats_gatherer.furl", self.stats_gatherer_furl)
# start client[0], wait for it's tub to be ready (at which point it
# will have registered the helper furl).