import xml.dom.minidom document = xml.dom.minidom.parse('unicode.xml') sets = [] entities = {} for group in document.getElementsByTagName('group'): if (group.getAttribute('name') == 'html5' or group.getAttribute('name') == 'mathml'): for set in group.getElementsByTagName('set'): sets.append(set.getAttribute('name')) for entity in document.getElementsByTagName('entity'): assert entity.parentNode.tagName == 'character' assert entity.hasAttribute('set') set = entity.getAttribute('set') if (set in sets): assert entity.hasAttribute('id') name = entity.getAttribute('id') assert len(name) > 0 assert entity.parentNode.hasAttribute('id') value = entity.parentNode.getAttribute('id') if ('-' not in value): assert name not in entities or entities[name] == value, '(name: ' + name + ' old value: ' + entities[name] + ' new value: ' + value + ')' if (name not in entities): entities[name] = value print '
' + name + ';