Sun Apr 25 13:11:21 PDT 2010  kevan@isnotajoke.com
  * Add tests for 'tahoe censor'

Sun Apr 25 13:11:50 PDT 2010  kevan@isnotajoke.com
  * Make it so that CLI tests work without actually making a node directory
  
  It is not necessary to have a node directory for 'tahoe censor', because
  its operations are all local -- it processes a log file. So I made its
  CensorOptions class subclass something other than VDriveOptions. The
  result of that is that CensorOptions doesn't know how to process a
  node-directory parameter, which this code would send. So, now it looks
  for a 'no_node' kwarg; if this is present and set to True, the
  node-directory option is not sent, and everything works.

New patches:

[Add tests for 'tahoe censor'
kevan@isnotajoke.com**20100425201121
 Ignore-this: 9fe8849052cc261e5d4c40051686bd9b
] {
hunk ./src/allmydata/test/test_cli.py 9
 import urllib
 import re
 import simplejson
+import pickle
+import platform
+import stat
+import bz2
 
 from allmydata.util import fileutil, hashutil, base32
 from allmydata import uri
hunk ./src/allmydata/test/test_cli.py 453
         help = str(cli.AddAliasOptions())
         self.failUnless("add-alias ALIAS DIRCAP" in help, help)
 
+    def test_censor(self):
+        help = str(cli.CensorOptions())
+        self.failUnless("censor SOURCE-LOG DEST-LOG" in help, help)
+
+
 class CLITestMixin:
     def do_cli(self, verb, *args, **kwargs):
         nodeargs = [
hunk ./src/allmydata/test/test_cli.py 2228
             self.failUnlessIn("error:", err)
         d.addCallback(_check)
         return d
+
+
+class Censor(GridTestMixin, CLITestMixin, unittest.TestCase):
+    def write_test_log_to_file(self, f):
+        e1 = {}
+        e1['d'] = {"message": "Tub location set to 62.220.0.0:49628,127.0.0.1:49628"}
+        ips_to_look_for = ["62.220.0.0", "127.0.0.1"]
+        pickle.dump(e1, f)
+        e2 = {}
+        e2['d'] = {"message": "connectTCP to ('134.71.255.255', 44785)"}
+        ips_to_look_for.append("134.71.255.255")
+        pickle.dump(e2, f)
+        e3 = {}
+        e3['d'] = {"message": "pb://todjw7qkb4dgq4fkeo7cqydcu5vneioh@tahoecs2.allmydata.com:52106/introducer"}
+        furls_to_look_for = ["pb://todjw7qkb4dgq4fkeo7cqydcu5vneioh@tahoecs2.allmydata.com:52106/introducer"]
+        pickle.dump(e3, f)
+        e4 = {}
+        e4['d'] = {"message": "<si>dkalsdkjaslkjd</si>"}
+        sis_to_look_for = ["dkalsdkjaslkjd"]
+        pickle.dump(e4, f)
+        e5 = {}
+        e5['d'] = {"message": "<SI>dasdasdsadsads</SI>"}
+        sis_to_look_for.append("dasdasdsadsads")
+        pickle.dump(e5, f)
+        # ([ips], [furls], [sis], # of log messages)
+        return (ips_to_look_for, furls_to_look_for, sis_to_look_for, 5)
+
+
+    def test_censor_with_nonexistent_source(self):
+        # When asked to censor a file that doesn't exist, 'tahoe censor'
+        # should print something useful as an error message.
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_with_nonexistent_source")
+        self.set_up_grid()
+        d = self.do_cli("censor", "does_not_exist", no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 1)
+            self.failUnlessIn("Error", err)
+            self.failUnlessIn("doesn't exist", err)
+        d.addCallback(_check)
+        return d
+
+
+    def test_censor_with_nonsensical_source(self):
+        # 'tahoe censor' works on logs that are actually pickled
+        # dictionaries, as output by foolscap. If asked to censor
+        # something else, it should print something useful as an error
+        # message.
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_with_nonsensical_source")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input")
+        f = open(test_file, "wb")
+        f.write("blahblahblah")
+        f.close()
+        d = self.do_cli("censor", test_file, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 1)
+            self.failUnlessIn("Error", err)
+            self.failUnlessIn("invalid format", err)
+        d.addCallback(_check)
+        def _then(ign):
+            self.test_file = os.path.join(self.basedir, "input2")
+            f = open(self.test_file, "wb")
+            f.write("For some reason, the file contents above result in "
+                    "an IndexError, while these result in an EOFError. In "
+                    "either case, the program should output something useful")
+            f.close()
+        d.addCallback(_then)
+        d.addCallback(lambda ign: self.do_cli("censor", self.test_file,
+                                              no_node=True))
+        d.addCallback(_check)
+        return d
+
+
+    def test_censor_with_empty_source(self):
+        # 'tahoe censor' should complain when presented with an empty
+        # log file to censor.
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_with_empty_source")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input")
+        f = open(test_file, "wb").close()
+        d = self.do_cli("censor", test_file, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 1)
+            self.failUnlessIn("Error", err)
+            self.failUnlessIn("empty", err)
+        d.addCallback(_check)
+        return d
+
+
+    def test_censor_with_unreadable_source(self):
+        # 'tahoe censor' should complain when presented with a file that
+        # OS-level access controls prevent it from reading.
+        if platform.system() == "Windows":
+            raise unittest.SkipTest("os.chmod() can't make a file that this "
+                                    "test can't read on Windows.")
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_with_unreadable_source")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input")
+        f = open(test_file, "wb")
+        self.write_test_log_to_file(f)
+        f.close()
+        os.chmod(test_file, stat.S_IWRITE)
+        d = self.do_cli("censor", test_file, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 1)
+            self.failUnlessIn("Error", err)
+            self.failUnlessIn("read", err)
+        d.addCallback(_check)
+        return d
+
+
+    def test_censor_with_unwritable_destination(self):
+        # 'tahoe censor' should complain when presented with a
+        # destination file that it can't write to.
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_with_unwritable_destination")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input")
+        f = open(test_file, "wb")
+        self.write_test_log_to_file(f)
+        f.close()
+        test_out = os.path.join(self.basedir, "output")
+        open(test_out, "wb").close()
+        # should make test_out readonly on both Windows and *nixes.
+        os.chmod(test_out, stat.S_IREAD)
+        d = self.do_cli("censor", test_file, test_out, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 1)
+            self.failUnlessIn("Error:", err)
+            self.failUnlessIn("writable", err)
+        d.addCallback(_check)
+        # In the case where the source file is also the destination file
+        # (i.e.: we're censoring in-place), 'tahoe censor' should also
+        # complain if it can't write to the source file.
+        def _then(ign):
+            self.second_test_file = os.path.join(self.basedir, "input2")
+            f = open(self.second_test_file, "wb")
+            self.write_test_log_to_file(f)
+            f.close()
+            os.chmod(self.second_test_file, stat.S_IREAD)
+        d.addCallback(_then)
+        d.addCallback(lambda ign: self.do_cli("censor", self.second_test_file,
+                                              no_node=True))
+        d.addCallback(_check)
+        return d
+
+
+    def test_censor_should_censor_IP_addresses(self):
+        # 'tahoe censor' should successfully remove IP addresses from
+        # valid log files.
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_should_censor_IP_addresses")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input")
+        f = open(test_file, "wb")
+        ips, furls, sis, total = self.write_test_log_to_file(f)
+        f.close()
+        d = self.do_cli("censor", test_file, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            f = open(test_file, "rb")
+            while True:
+                try:
+                    e = pickle.load(f)
+                    for ip in ips:
+                        self.failIfIn(ip, e['d']["message"])
+                except EOFError:
+                    break
+            f.close()
+        d.addCallback(_check)
+        return d
+
+
+    def test_censor_should_censor_storage_indices(self):
+        # 'tahoe censor' should successfully remove storage indices
+        # from valid log files if they are of the form <si>SI</si> or
+        # <SI>SI</SI>
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_should_censor_storage_indices")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input")
+        f = open(test_file, "wb")
+        ips, furls, sis, total = self.write_test_log_to_file(f)
+        f.close()
+        d = self.do_cli("censor", test_file, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            f = open(test_file, "rb")
+            while True:
+                try:
+                    e = pickle.load(f)
+                    for si in sis:
+                        self.failIfIn(si, e['d']["message"])
+                except EOFError:
+                    break
+            f.close()
+        d.addCallback(_check)
+        return d
+
+
+    def test_censor_should_censor_furls(self):
+        # 'tahoe censor' should successfully remove furls from valid log
+        # files.
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_should_censor_furls")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input")
+        f = open(test_file, "wb")
+        ips, furls, sis, total = self.write_test_log_to_file(f)
+        f.close()
+        d = self.do_cli("censor", test_file, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            f = open(test_file, "rb")
+            while True:
+                try:
+                    e = pickle.load(f)
+                    for furl in furls:
+                        self.failIfIn(furl, e['d']["message"])
+                except EOFError:
+                    break
+            f.close()
+        d.addCallback(_check)
+        return d
+
+
+    def test_censor_verbose_mode(self):
+        # When run in verbose mode (with the -v or --verbose flags),
+        # 'tahoe censor' should output messages telling the user what it
+        # is doing.
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_verbose_mode")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input")
+        f = open(test_file, "wb")
+        ips, furls, sis, total = self.write_test_log_to_file(f)
+        f.close()
+        d = self.do_cli("censor", "-v", test_file, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            for item in ips + furls + sis:
+                self.failUnlessIn(item, out)
+        d.addCallback(_check)
+        return d
+
+
+    def test_censor_quiet_mode(self):
+        # When run in quiet mode (with the -q or --quiet flags), 'tahoe
+        # censor' should not output anything other than error messages.
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_quiet_mode")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input")
+        f = open(test_file, "wb")
+        self.write_test_log_to_file(f)
+        f.close()
+        d = self.do_cli("censor", "-q", test_file, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            self.failUnlessEqual(out, "")
+        d.addCallback(_check)
+        def _then(ign):
+            self.second_test_file = os.path.join(self.basedir, "input2")
+            f = open(self.second_test_file, "wb")
+            self.write_test_log_to_file(f)
+            f.close()
+            os.chmod(self.second_test_file, stat.S_IREAD)
+        d.addCallback(_then)
+        d.addCallback(lambda ign: self.do_cli("censor",
+                                              "-q",
+                                              self.second_test_file,
+                                              no_node=True))
+        def _check2((rc, out, err)):
+            self.failUnlessEqual(rc, 1)
+            self.failUnlessEqual(out, "")
+            self.failUnlessIn("Error:", err)
+            self.failUnlessIn("write", err)
+        d.addCallback(_check2)
+        return d
+
+
+    def test_censor_bz2(self):
+        # 'tahoe censor' should be capable of censoring both
+        # uncompressed log files and bzipped log files.
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_bz2")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input.bz2")
+        f = bz2.BZ2File(test_file, "wb")
+        ips, furls, sis, total = self.write_test_log_to_file(f)
+        self.items = ips + furls + sis
+        f.close()
+        # First, check to see that we can read from a bz2 file and 
+        # write to a bz2 file.
+        d = self.do_cli("censor", test_file, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            f = bz2.BZ2File(test_file, "rb")
+            while True:
+                try:
+                    e = pickle.load(f)
+                    for item in self.items:
+                        self.failIfIn(item, e['d']["message"])
+                except EOFError:
+                    break
+            f.close()
+        d.addCallback(_check)
+        # Now, check to see that we can write to a bz2 logfile
+        # from a plain logfile
+        def _then(ign):
+            self.second_source = os.path.join(self.basedir, "input2")
+            self.second_dest = os.path.join(self.basedir, "output2.bz2")
+            f = open(self.second_source, "wb")
+            self.write_test_log_to_file(f)
+            f.close()
+        d.addCallback(_then)
+        d.addCallback(lambda ign: self.do_cli("censor",
+                                              self.second_source,
+                                              self.second_dest,
+                                              no_node=True))
+        def _check2((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            f = bz2.BZ2File(self.second_dest, "rb")
+            while True:
+                try:
+                    e = pickle.load(f)
+                    for item in self.items:
+                        self.failIfIn(item, e['d']["message"])
+                except EOFError:
+                    break
+            f.close()
+        d.addCallback(_check2)
+        # Finally, check to see that we can write from a bz2 logfile 
+        # to a plain logfile.
+        def _later(ign):
+            self.third_source = os.path.join(self.basedir, "input3.bz2")
+            self.third_dest = os.path.join(self.basedir, "output3")
+            f = bz2.BZ2File(self.third_source, "wb")
+            self.write_test_log_to_file(f)
+            f.close()
+        d.addCallback(_later)
+        d.addCallback(lambda ign: self.do_cli("censor",
+                                              self.third_source,
+                                              self.third_dest,
+                                              no_node=True))
+        def _check3((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            f = open(self.third_dest, "rb")
+            while True:
+                try:
+                    e = pickle.load(f)
+                    for item in self.items:
+                        self.failIfIn(item, e['d']["message"])
+                except EOFError:
+                    break
+            f.close()
+        d.addCallback(_check3)
+        return d
+
+
+    def test_censor_log_counting(self):
+        # When not run in quiet mode, 'tahoe censor' should output a
+        # useful concluding message, including:
+        #  - The total number of logs processed
+        #  - The total number of SIs censored
+        #  - The total number of IP addresses censored.
+        #  - The total number of furls censored.
+        self.basedir = os.path.join("cli",
+                                    "Censor",
+                                    "test_censor_log_counting")
+        self.set_up_grid()
+        test_file = os.path.join(self.basedir, "input")
+        f = open(test_file, "wb")
+        ips, furls, sis, total = self.write_test_log_to_file(f)
+        f.close()
+        d = self.do_cli("censor", test_file, no_node=True)
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            self.failUnlessIn("total of %d" % total, out)
+            self.failUnlessIn("Storage Indices: %d" % len(sis), out)
+            self.failUnlessIn("IP Addresses: %d" % len(ips), out)
+            self.failUnlessIn("Node URLs: %d" % len(furls), out)
+        d.addCallback(_check)
+        return d
}
[Make it so that CLI tests work without actually making a node directory
kevan@isnotajoke.com**20100425201150
 Ignore-this: ca5467176c9ce15b17dfa27cf6b2885d
 
 It is not necessary to have a node directory for 'tahoe censor', because
 its operations are all local -- it processes a log file. So I made its
 CensorOptions class subclass something other than VDriveOptions. The
 result of that is that CensorOptions doesn't know how to process a
 node-directory parameter, which this code would send. So, now it looks
 for a 'no_node' kwarg; if this is present and set to True, the
 node-directory option is not sent, and everything works.
] hunk ./src/allmydata/test/test_cli.py 460
 
 class CLITestMixin:
     def do_cli(self, verb, *args, **kwargs):
-        nodeargs = [
-            "--node-directory", self.get_clientdir(),
-            ]
+        if "no_node" not in kwargs:
+            nodeargs = [
+                "--node-directory", self.get_clientdir(),
+                ]
+        else:
+            del(kwargs['no_node'])
+            nodeargs = []
         argv = [verb] + nodeargs + list(args)
         stdin = kwargs.get("stdin", "")
         stdout, stderr = StringIO(), StringIO()

Context:

[setup: add licensing declaration for setuptools (noticed by the FSF compliance folks)
zooko@zooko.com**20100309184415
 Ignore-this: 2dfa7d812d65fec7c72ddbf0de609ccb
] 
[setup: fix error in licensing declaration from Shawn Willden, as noted by the FSF compliance division
zooko@zooko.com**20100309163736
 Ignore-this: c0623d27e469799d86cabf67921a13f8
] 
[CREDITS to Jacob Appelbaum
zooko@zooko.com**20100304015616
 Ignore-this: 70db493abbc23968fcc8db93f386ea54
] 
[desert-island-build-with-proper-versions
jacob@appelbaum.net**20100304013858] 
[docs: a few small edits to try to guide newcomers through the docs
zooko@zooko.com**20100303231902
 Ignore-this: a6aab44f5bf5ad97ea73e6976bc4042d
 These edits were suggested by my watching over Jake Appelbaum's shoulder as he completely ignored/skipped/missed install.html and also as he decided that debian.txt wouldn't help him with basic installation. Then I threw in a few docs edits that have been sitting around in my sandbox asking to be committed for months.
] 
[TAG allmydata-tahoe-1.6.1
david-sarah@jacaranda.org**20100228062314
 Ignore-this: eb5f03ada8ea953ee7780e7fe068539
] 
Patch bundle hash:
7e2f8933bb23af7dc504dab74325f01cfbbea2ec