|
import sys
import re
def glue_xml_lines(xml):
new_xml = ""
previous_line_empty = True
for line in xml:
if re.match(b'^[ \t]*$', line):
new_xml = b''.join((new_xml, line))
previous_line_empty = True
elif previous_line_empty:
new_xml = b''.join((new_xml, line))
previous_line_empty = False
elif line.startswith('<'):
new_xml = b''.join((new_xml, line))
elif new_xml.endswith('>\n'):
new_xml = b''.join((new_xml, line))
else:
new_xml = b' \n'.join((new_xml[:-1], line))
return new_xml
def read_xml_file(filename):
xml = []
with open(filename, 'rb') as fd:
for line in fd:
# These explode etree and are useless
if b'<atom:link rel=' in line:
continue
xml.append(line)
return glue_xml_lines(xml)
for filename in sys.argv[1:]:
result = read_xml_file(filename)
output = open(filename+".new", 'wb')
output.write(result)
output.close()
|