from lxml.html.diff import htmldiff
from lxml.html import parse, tostring, open_in_browser, fromstring
def get_page(url):
doc = parse(url).getroot()
doc.make_links_absolute()
return tostring(doc)
def compare_pages(url1, url2, selector='body div'):
basis = parse(url1).getroot()
basis.make_links_absolute()
other = parse(url2).getroot()
other.make_links_absolute()
el1 = basis.cssselect(selector)[0]
el2 = other.cssselect(selector)[0]
diff_content = htmldiff(tostring(el1), tostring(el2))
diff_el = fromstring(diff_content)
el1.getparent().insert(el1.getparent().index(el1), diff_el)
el1.getparent().remove(el1)
return basis
if __name__ == '__main__':
import sys
doc = compare_pages(sys.argv[1], sys.argv[2], sys.argv[3])
open_in_browser(doc)