toUnicode.py 364 B

1234567891011121314151617
  1. import glob
  2. import os
  3. import sys
  4. import codecs
  5. rootdir = "./"
  6. source_encoding = "us-ascii"
  7. target_encoding = "utf-8"
  8. for root, subdirs, files in os.walk(rootdir):
  9. for filename in files:
  10. if filename[-4:] == ".txt":
  11. print(os.path.join(root, filename))
  12. source = codecs.open(os.path.join(root, filename), 'r', source_encoding).read()
  13. print(source)