soup not re

This commit is contained in:
meejah 2019-12-27 20:56:22 -07:00
parent b71d4993c6
commit af35483fbf
1 changed files with 21 additions and 11 deletions

View File

@ -3,6 +3,9 @@ from __future__ import print_function
import os.path, re, urllib import os.path, re, urllib
import json import json
from six.moves import StringIO from six.moves import StringIO
from bs4 import BeautifulSoup
from nevow import rend from nevow import rend
from twisted.trial import unittest from twisted.trial import unittest
from allmydata import uri, dirnode from allmydata import uri, dirnode
@ -534,19 +537,26 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
# Make sure the lonely child can be listed in HTML... # Make sure the lonely child can be listed in HTML...
d.addCallback(lambda ign: self.GET(self.rooturl)) d.addCallback(lambda ign: self.GET(self.rooturl))
def _check_html(res): def _check_html(res):
soup = BeautifulSoup(res, 'html5lib')
self.failIfIn("URI:SSK", res) self.failIfIn("URI:SSK", res)
get_lonely = "".join([r'<td>FILE</td>', found = False
r'\s+<td>', for td in soup.find_all(u"td"):
r'<a href="[^"]+%s[^"]+" rel="noreferrer">lonely</a>' % (urllib.quote(lonely_uri),), if td.text != u"FILE":
r'</td>', continue
r'\s+<td align="right">%d</td>' % len("one"), a = td.findNextSibling()(u"a")[0]
]) self.assertIn(urllib.quote(lonely_uri), a[u"href"])
self.failUnless(re.search(get_lonely, res), res) self.assertEqual(u"lonely", a.text)
self.assertEqual(u"{}".format(len("one")), td.findNextSibling().findNextSibling().text)
found = True
self.assertTrue(found)
# find the More Info link for name, should be relative infos = list(
mo = re.search(r'<a href="([^"]+)">More Info</a>', res) a[u"href"]
info_url = mo.group(1) for a in soup.find_all(u"a")
self.failUnless(info_url.endswith(urllib.quote(lonely_uri) + "?t=info"), info_url) if a.text == u"More Info"
)
self.assertEqual(1, len(infos))
self.assertTrue(infos[0].endswith(urllib.quote(lonely_uri) + "?t=info"))
d.addCallback(_check_html) d.addCallback(_check_html)
# ... and in JSON. # ... and in JSON.