toUnicode.py 480 B

1234567891011121314151617181920
  1. import glob
  2. import os
  3. import sys
  4. import codecs
  5. rootdir = "./"
  6. source_encoding = "ISO-8859"
  7. target_encoding = "utf-8"
  8. for root, subdirs, files in os.walk(rootdir):
  9. for filename in files:
  10. if filename[-4:] == ".txt":
  11. print(os.path.join(root, filename))
  12. source = open(os.path.join(root, filename), 'r').read()
  13. print(u''.join(source))
  14. final = codecs.open(os.path.join(root, filename), 'w', target_encoding)
  15. print(u''.join(source), file = final)
  16. final.close()