1
0
Fork 0
forked from External/ergo

anope2json, atheme2json: handle non-UTF8 data

Also ignore an unrecognized field type in anope
This commit is contained in:
Shivaram Lingamneni 2021-12-09 22:11:24 -05:00
parent 3e32e3f19e
commit 4f7356f19a
2 changed files with 26 additions and 6 deletions

View file

@ -31,8 +31,16 @@ def convert(infile):
channel_to_founder = defaultdict(lambda: (None, None))
for line in infile:
line = line.rstrip('\r\n')
while True:
line = infile.readline()
if not line:
break
line = line.rstrip(b'\r\n')
try:
line = line.decode('utf-8')
except UnicodeDecodeError:
line = line.decode('utf-8', 'replace')
logging.warning("line contained invalid utf8 data " + line)
parts = line.split(' ')
category = parts[0]
@ -177,7 +185,7 @@ def convert(infile):
def main():
if len(sys.argv) != 3:
raise Exception("Usage: atheme2json.py atheme_db output.json")
with open(sys.argv[1]) as infile:
with open(sys.argv[1], 'rb') as infile:
output = convert(infile)
with open(sys.argv[2], 'w') as outfile:
json.dump(output, outfile)