toUnicode.py 481 B

1234567891011121314151617181920
  1. import glob
  2. import os
  3. import sys
  4. import codecs
  5. rootdir = "./units"
  6. source_encoding = "ANSI"
  7. target_encoding = "utf-8"
  8. for root, subdirs, files in os.walk(rootdir):
  9. for filename in files:
  10. if filename[-4:] == ".txt":
  11. print(os.path.join(root, filename))
  12. source = codecs.open(os.path.join(root, filename), 'r', source_encoding).read()
  13. print(u''.join(source))
  14. final = open(os.path.join(root, filename), 'w')
  15. print(u''.join(source), file = final)
  16. final.close()