Start on benchmarking infrastructure: a framework for starting nodes.
This commit is contained in:
parent
fbd383105c
commit
9ee10af884
|
@ -0,0 +1,8 @@
|
||||||
|
"""pytest-based end-to-end benchmarks of Tahoe-LAFS.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
$ pytest benchmark --number-of-nodes=3
|
||||||
|
|
||||||
|
It's possible to pass --number-of-nodes multiple times.
|
||||||
|
"""
|
|
@ -0,0 +1,107 @@
|
||||||
|
"""
|
||||||
|
pytest infrastructure for benchmarks.
|
||||||
|
|
||||||
|
The number of nodes is parameterized via a --number-of-nodes CLI option added
|
||||||
|
to pytest.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from os.path import abspath
|
||||||
|
from shutil import which, rmtree
|
||||||
|
from tempfile import mkdtemp
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import pytest_twisted
|
||||||
|
|
||||||
|
from twisted.internet import reactor
|
||||||
|
from twisted.internet.defer import DeferredList, succeed
|
||||||
|
|
||||||
|
from allmydata.util.iputil import allocate_tcp_port
|
||||||
|
|
||||||
|
from integration.grid import Client, create_grid, create_flog_gatherer
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_addoption(parser):
|
||||||
|
parser.addoption(
|
||||||
|
"--number-of-nodes",
|
||||||
|
action="append",
|
||||||
|
default=[],
|
||||||
|
type=int,
|
||||||
|
help="list of number_of_nodes to benchmark against",
|
||||||
|
)
|
||||||
|
# Required to be compatible with integration.util code that we indirectly
|
||||||
|
# depend on, but also might be useful.
|
||||||
|
parser.addoption(
|
||||||
|
"--force-foolscap",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="force_foolscap",
|
||||||
|
help=(
|
||||||
|
"If set, force Foolscap only for the storage protocol. "
|
||||||
|
+ "Otherwise HTTP will be used."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_generate_tests(metafunc):
|
||||||
|
# Make number_of_nodes accessible as a parameterized fixture:
|
||||||
|
if "number_of_nodes" in metafunc.fixturenames:
|
||||||
|
metafunc.parametrize(
|
||||||
|
"number_of_nodes",
|
||||||
|
metafunc.config.getoption("number_of_nodes"),
|
||||||
|
scope="session",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def port_allocator():
|
||||||
|
port = allocate_tcp_port()
|
||||||
|
return succeed(port)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def grid(request):
|
||||||
|
"""
|
||||||
|
Provides a new Grid with a single Introducer and flog-gathering process.
|
||||||
|
|
||||||
|
Notably does _not_ provide storage servers; use the storage_nodes
|
||||||
|
fixture if your tests need a Grid that can be used for puts / gets.
|
||||||
|
"""
|
||||||
|
tmp_path = mkdtemp(prefix="tahoe-benchmark")
|
||||||
|
request.addfinalizer(lambda: rmtree(tmp_path))
|
||||||
|
flog_binary = which("flogtool")
|
||||||
|
flog_gatherer = pytest_twisted.blockon(
|
||||||
|
create_flog_gatherer(reactor, request, tmp_path, flog_binary)
|
||||||
|
)
|
||||||
|
g = pytest_twisted.blockon(
|
||||||
|
create_grid(reactor, request, tmp_path, flog_gatherer, port_allocator)
|
||||||
|
)
|
||||||
|
return g
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def storage_nodes(grid, number_of_nodes):
|
||||||
|
nodes_d = []
|
||||||
|
for _ in range(number_of_nodes):
|
||||||
|
nodes_d.append(grid.add_storage_node())
|
||||||
|
|
||||||
|
nodes_status = pytest_twisted.blockon(DeferredList(nodes_d))
|
||||||
|
for ok, value in nodes_status:
|
||||||
|
assert ok, "Storage node creation failed: {}".format(value)
|
||||||
|
return grid.storage_servers
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def client_node(request, grid, storage_nodes, number_of_nodes) -> Client:
|
||||||
|
"""
|
||||||
|
Create a grid client node with number of shares matching number of nodes.
|
||||||
|
"""
|
||||||
|
client_node = pytest_twisted.blockon(
|
||||||
|
grid.add_client(
|
||||||
|
"client_node",
|
||||||
|
needed=number_of_nodes,
|
||||||
|
happy=number_of_nodes,
|
||||||
|
total=number_of_nodes,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
print(f"Client node pid: {client_node.process.transport.pid}")
|
||||||
|
return client_node
|
|
@ -0,0 +1,7 @@
|
||||||
|
"""Benchmarks for minimal `tahoe` CLI interactions."""
|
||||||
|
|
||||||
|
def test_cp_one_file(client_node):
|
||||||
|
"""
|
||||||
|
Upload a file with tahoe cp and then download it, measuring the latency of
|
||||||
|
both operations.
|
||||||
|
"""
|
|
@ -1,138 +0,0 @@
|
||||||
"""
|
|
||||||
First attempt at benchmarking uploads and downloads.
|
|
||||||
|
|
||||||
To run:
|
|
||||||
|
|
||||||
$ pytest benchmarks/upload_download.py -s -v -Wignore
|
|
||||||
|
|
||||||
To add latency of e.g. 60ms on Linux:
|
|
||||||
|
|
||||||
$ tc qdisc add dev lo root netem delay 30ms
|
|
||||||
|
|
||||||
To reset:
|
|
||||||
|
|
||||||
$ tc qdisc del dev lo root netem
|
|
||||||
|
|
||||||
Frequency scaling can spoil the results.
|
|
||||||
To see the range of frequency scaling on a Linux system:
|
|
||||||
|
|
||||||
$ cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_available_frequencies
|
|
||||||
|
|
||||||
And to pin the CPU frequency to the lower bound found in these files:
|
|
||||||
|
|
||||||
$ sudo cpupower frequency-set -f <lowest available frequency>
|
|
||||||
|
|
||||||
TODO Parameterization (pytest?)
|
|
||||||
|
|
||||||
- Foolscap vs not foolscap
|
|
||||||
|
|
||||||
- Number of nodes
|
|
||||||
|
|
||||||
- Data size
|
|
||||||
|
|
||||||
- Number of needed/happy/total shares.
|
|
||||||
|
|
||||||
CAVEATS: The goal here isn't a realistic benchmark, or a benchmark that will be
|
|
||||||
measured over time, or is expected to be maintainable over time. This is just
|
|
||||||
a quick and easy way to measure the speed of certain operations, compare HTTP
|
|
||||||
and Foolscap, and see the short-term impact of changes.
|
|
||||||
|
|
||||||
Eventually this will be replaced by a real benchmark suite that can be run over
|
|
||||||
time to measure something more meaningful.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from time import time, process_time
|
|
||||||
from contextlib import contextmanager
|
|
||||||
from tempfile import mkdtemp
|
|
||||||
import os
|
|
||||||
|
|
||||||
from twisted.trial.unittest import TestCase
|
|
||||||
from twisted.internet.defer import gatherResults
|
|
||||||
|
|
||||||
from allmydata.util.deferredutil import async_to_deferred
|
|
||||||
from allmydata.util.consumer import MemoryConsumer
|
|
||||||
from allmydata.test.common_system import SystemTestMixin
|
|
||||||
from allmydata.immutable.upload import Data as UData
|
|
||||||
from allmydata.mutable.publish import MutableData
|
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def timeit(name):
|
|
||||||
start = time()
|
|
||||||
start_cpu = process_time()
|
|
||||||
try:
|
|
||||||
yield
|
|
||||||
finally:
|
|
||||||
print(
|
|
||||||
f"{name}: {time() - start:.3f} elapsed, {process_time() - start_cpu:.3f} CPU"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ImmutableBenchmarks(SystemTestMixin, TestCase):
|
|
||||||
"""Benchmarks for immutables."""
|
|
||||||
|
|
||||||
# To use Foolscap, change to True:
|
|
||||||
FORCE_FOOLSCAP_FOR_STORAGE = False
|
|
||||||
|
|
||||||
# Don't reduce HTTP connection timeouts, that messes up the more aggressive
|
|
||||||
# benchmarks:
|
|
||||||
REDUCE_HTTP_CLIENT_TIMEOUT = False
|
|
||||||
|
|
||||||
@async_to_deferred
|
|
||||||
async def setUp(self):
|
|
||||||
SystemTestMixin.setUp(self)
|
|
||||||
self.basedir = os.path.join(mkdtemp(), "nodes")
|
|
||||||
|
|
||||||
# 2 nodes
|
|
||||||
await self.set_up_nodes(2)
|
|
||||||
|
|
||||||
# 1 share
|
|
||||||
for c in self.clients:
|
|
||||||
c.encoding_params["k"] = 1
|
|
||||||
c.encoding_params["happy"] = 1
|
|
||||||
c.encoding_params["n"] = 1
|
|
||||||
|
|
||||||
print()
|
|
||||||
|
|
||||||
@async_to_deferred
|
|
||||||
async def test_upload_and_download_immutable(self):
|
|
||||||
# To test larger files, change this:
|
|
||||||
DATA = b"Some data to upload\n" * 10
|
|
||||||
|
|
||||||
for i in range(5):
|
|
||||||
# 1. Upload:
|
|
||||||
with timeit(" upload"):
|
|
||||||
uploader = self.clients[0].getServiceNamed("uploader")
|
|
||||||
results = await uploader.upload(UData(DATA, convergence=None))
|
|
||||||
|
|
||||||
# 2. Download:
|
|
||||||
with timeit("download"):
|
|
||||||
uri = results.get_uri()
|
|
||||||
node = self.clients[1].create_node_from_uri(uri)
|
|
||||||
mc = await node.read(MemoryConsumer(), 0, None)
|
|
||||||
self.assertEqual(b"".join(mc.chunks), DATA)
|
|
||||||
|
|
||||||
@async_to_deferred
|
|
||||||
async def test_upload_and_download_mutable(self):
|
|
||||||
# To test larger files, change this:
|
|
||||||
DATA = b"Some data to upload\n" * 10
|
|
||||||
|
|
||||||
for i in range(5):
|
|
||||||
# 1. Upload:
|
|
||||||
with timeit(" upload"):
|
|
||||||
result = await self.clients[0].create_mutable_file(MutableData(DATA))
|
|
||||||
|
|
||||||
# 2. Download:
|
|
||||||
with timeit("download"):
|
|
||||||
data = await result.download_best_version()
|
|
||||||
self.assertEqual(data, DATA)
|
|
||||||
|
|
||||||
@async_to_deferred
|
|
||||||
async def test_upload_mutable_in_parallel(self):
|
|
||||||
# To test larger files, change this:
|
|
||||||
DATA = b"Some data to upload\n" * 1_000_000
|
|
||||||
with timeit(" upload"):
|
|
||||||
await gatherResults([
|
|
||||||
self.clients[0].create_mutable_file(MutableData(DATA))
|
|
||||||
for _ in range(20)
|
|
||||||
])
|
|
|
@ -240,7 +240,7 @@ def _tahoe_runner_optional_coverage(proto, reactor, request, other_args):
|
||||||
allmydata.scripts.runner` and `other_args`, optionally inserting a
|
allmydata.scripts.runner` and `other_args`, optionally inserting a
|
||||||
`--coverage` option if the `request` indicates we should.
|
`--coverage` option if the `request` indicates we should.
|
||||||
"""
|
"""
|
||||||
if request.config.getoption('coverage'):
|
if request.config.getoption('coverage', False):
|
||||||
args = [sys.executable, '-b', '-m', 'coverage', 'run', '-m', 'allmydata.scripts.runner', '--coverage']
|
args = [sys.executable, '-b', '-m', 'coverage', 'run', '-m', 'allmydata.scripts.runner', '--coverage']
|
||||||
else:
|
else:
|
||||||
args = [sys.executable, '-b', '-m', 'allmydata.scripts.runner']
|
args = [sys.executable, '-b', '-m', 'allmydata.scripts.runner']
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Started work on a new end-to-end benchmarking framework.
|
Loading…
Reference in New Issue