TextSubfile.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. //
  2. // Created by Иван_Архипов on 24.11.2017.
  3. //
  4. #include "TextSubfile.h"
  5. #include "../BinaryData.h"
  6. #include "../DatFile.h"
  7. #include "../Common/DatException.h"
  8. #include "../SubfileData.h"
  9. #include <algorithm>
  10. #include <codecvt>
  11. #include <locale>
  12. std::u16string to_utf16(long long x) {
  13. std::u16string res;
  14. while (x > 0) {
  15. res += u'0' + (x % 10);
  16. x /= 10ll;
  17. }
  18. std::reverse(res.begin(), res.end());
  19. return res;
  20. }
  21. long long from_utf16(const std::u16string &num) {
  22. long long res = 0;
  23. for (auto c : num) {
  24. res = res * 10ll + (c - u'0');
  25. }
  26. std::cout << "fragment_id: " << res << std::endl;
  27. return res;
  28. }
  29. std::string argumentsFromUtf16(const std::u16string &args) {
  30. std::string res;
  31. size_t pointer = 0;
  32. while (pointer < args.length()) {
  33. size_t pointer1 = args.find(u'-', pointer);
  34. if (pointer1 == std::u16string::npos)
  35. pointer1 = args.length();
  36. if (!res.empty())
  37. res += "-";
  38. res += std::to_string(from_utf16(args.substr(pointer, pointer1 - pointer)));
  39. pointer = pointer1 + 1;
  40. }
  41. std::wstring_convert<std::codecvt_utf8_utf16<char16_t>,char16_t> codecvt;
  42. std::cout << "Arguments old: "<< codecvt.to_bytes(args) << '\n' ;
  43. std::cout << "Arguments new: "<< res << '\n' ;
  44. return res;
  45. }
  46. namespace LOTRO_DAT {
  47. TextSubfile::TextSubfile() = default;
  48. TextSubfile::TextSubfile(DatFile *dat, long long dictionary_offset, long long fragments_count, long long unknown1,
  49. long long file_id, long long file_offset, long long file_size,
  50. long long timestamp,
  51. long long version, long long block_size)
  52. : Subfile(dat, dictionary_offset, fragments_count, unknown1, file_id, file_offset, file_size, timestamp, version, block_size) {
  53. }
  54. FILE_TYPE TextSubfile::FileType() const {
  55. return TEXT;
  56. }
  57. std::string TextSubfile::Extension() const {
  58. return std::string(".txt");
  59. }
  60. SubfileData TextSubfile::PrepareForExport(const BinaryData &file_data) {
  61. SubfileData result;
  62. if (file_size() <= 10) // File is empty, nothing to do;
  63. return result;
  64. long long offset = 9; // first 4 bytes - file_id, then 4 bytes - unknown, then 1 byte - unknown
  65. long long text_fragment_num = file_data.ToNumber<1>(offset);
  66. if ((text_fragment_num & 0x80) != 0) {
  67. text_fragment_num = (((text_fragment_num ^ 0x80) << 8) | file_data.ToNumber<1>(offset + 1));
  68. offset += 1;
  69. }
  70. offset += 1;
  71. for (long long i = 0; i < text_fragment_num; i++) {
  72. long long fragment_id = file_data.ToNumber<8>(offset);
  73. offset += 8;
  74. std::vector<std::u16string> text_pieces = MakePieces(file_data, offset);
  75. std::vector<long long> arg_references = MakeArgumentReferences(file_data, offset);
  76. std::vector<std::vector<BinaryData>> arg_strings = MakeArgumentStrings(file_data, offset);
  77. std::u16string text = u"[";
  78. for (size_t j = 0; j + 1 < text_pieces.size(); j++)
  79. text += text_pieces[j] + u"<--DO_NOT_TOUCH!-->";
  80. text += text_pieces[text_pieces.size() - 1] + u"]";
  81. std::u16string arguments;
  82. for (size_t j = 0; j + 1 < arg_references.size(); j++)
  83. arguments += to_utf16(arg_references[j]) + u"-";
  84. if (!arg_references.empty())
  85. arguments += to_utf16(arg_references[arg_references.size() - 1]);
  86. if (result.text_data.length() > 0)
  87. result.text_data += u"|||";
  88. result.text_data += to_utf16(fragment_id) + u":::";
  89. result.text_data += arguments + u":::";
  90. result.text_data += text;
  91. }
  92. result.options["fid"] = file_id();
  93. result.options["ext"] = Extension();
  94. return result;
  95. }
  96. BinaryData TextSubfile::MakeForImport(const BinaryData &old_data, const SubfileData &data) {
  97. std::unordered_map<long long, SubfileData> patch_fragments = ParsePatchFragments(data);
  98. std::cout << "Made fragments. Fragments are: " << std::flush;
  99. for (auto &i : patch_fragments)
  100. std::cout << i.first << " " << std::flush;
  101. std::cout << std::endl;
  102. BinaryData new_data;
  103. if (file_size() <= 10 + 8) // File is empty, nothing to do;
  104. return old_data;
  105. long long offset = 9 + 8; // first 8 bytes - file_info. After them:
  106. // first 4 bytes - file_id, then 4 bytes - unknown, then 1 byte - unknown
  107. long long text_fragment_num = old_data.ToNumber<1>(offset);
  108. if ((text_fragment_num & 0x80) != 0) {
  109. text_fragment_num = (((text_fragment_num ^ 0x80) << 8) | old_data.ToNumber<1>(offset + 1));
  110. offset += 1;
  111. }
  112. offset += 1;
  113. new_data = new_data + old_data.CutData(0, offset); // Adding file info
  114. for (long long i = 0; i < text_fragment_num; i++) {
  115. long long fragment_id = old_data.ToNumber<8>(offset);
  116. offset += 8;
  117. new_data = new_data + old_data.CutData(offset - 8, offset);
  118. if (patch_fragments.count(fragment_id) == 0) {
  119. std::cerr << "Omg, what..? " << file_id() << " " << fragment_id << " is not in patch? " << std::endl;
  120. try {
  121. // Retrieving old pieces
  122. new_data = new_data + GetPieceData(old_data, offset);
  123. } catch (std::exception &e) {
  124. fprintf(stderr, "Caught %s exception.\n", e.what());
  125. fprintf(stderr, "ERROR TextSubfile::MakeForImport() - unable to get piece data for file_id %lld and fragment_id %lld", file_id(), fragment_id);
  126. throw DatException("Bad TextSubfile::MakeForImport()", IMPORT_EXCEPTION);
  127. }
  128. try {
  129. // Retrieving old references
  130. new_data = new_data + GetArgumentReferenceData(old_data, offset);
  131. } catch (std::exception &e) {
  132. fprintf(stderr, "Caught %s exception.\n", e.what());
  133. fprintf(stderr, "ERROR TextSubfile::MakeForImport() - unable to get argument reference data for file_id %lld and fragment_id %lld", file_id(), fragment_id);
  134. throw DatException("Bad TextSubfile::MakeForImport()", IMPORT_EXCEPTION);
  135. }
  136. try {
  137. // Retrieving old ref_strings
  138. new_data = new_data + GetArgumentStringsData(old_data, offset);
  139. } catch (std::exception &e) {
  140. fprintf(stderr, "Caught %s exception.\n", e.what());
  141. fprintf(stderr, "ERROR TextSubfile::MakeForImport() - unable to get argument string for file_id %lld and fragment_id %lld", file_id(), fragment_id);
  142. throw DatException("Bad TextSubfile::MakeForImport()", IMPORT_EXCEPTION);
  143. }
  144. } else {
  145. try {
  146. // Making and adding new pieces
  147. new_data = new_data + BuildPieces(old_data, patch_fragments[fragment_id], offset);
  148. } catch (std::exception &e) {
  149. fprintf(stderr, "Caught %s exception.\n", e.what());
  150. fprintf(stderr, "ERROR TextSubfile::MakeForImport() - unable to build piece data for file_id %lld and fragment_id %lld", file_id(), fragment_id);
  151. throw DatException("Bad TextSubfile::MakeForImport()", IMPORT_EXCEPTION);
  152. }
  153. try {
  154. // Making and adding new references
  155. new_data = new_data + BuildArgumentReferences(old_data, patch_fragments[fragment_id], offset);
  156. } catch (std::exception &e) {
  157. fprintf(stderr, "Caught %s exception.\n", e.what());
  158. fprintf(stderr, "ERROR TextSubfile::MakeForImport() - unable to build argument references data for file_id %lld and fragment_id %lld", file_id(), fragment_id);
  159. throw DatException("Bad TextSubfile::MakeForImport()", IMPORT_EXCEPTION);
  160. }
  161. try {
  162. // Making and adding new strings
  163. new_data = new_data + BuildArgumentStrings(old_data, patch_fragments[fragment_id], offset);
  164. } catch (std::exception &e) {
  165. fprintf(stderr, "Caught %s exception.\n", e.what());
  166. fprintf(stderr, "ERROR TextSubfile::MakeForImport() - unable to build argument strings data for file_id %lld and fragment_id %lld", file_id(), fragment_id);
  167. throw DatException("Bad TextSubfile::MakeForImport()", IMPORT_EXCEPTION);
  168. }
  169. }
  170. }
  171. new_data = new_data + old_data.CutData(offset); // Adding elapsed file data
  172. return new_data;
  173. }
  174. std::unordered_map<long long, SubfileData> TextSubfile::ParsePatchFragments(const SubfileData &data) {
  175. std::unordered_map<long long, SubfileData> res;
  176. std::u16string text = data.text_data;
  177. size_t pointer = 0;
  178. while (pointer < text.length()) {
  179. // Parsing fragment_id
  180. size_t pointer1 = text.find(u":::", pointer);
  181. if (pointer1 == std::u16string::npos)
  182. throw DatException("Bad TextSubfile::ParsePatchFragments() - Unable to parse fragment id! Cannot find '...' divider");
  183. long long fragment_id = from_utf16(text.substr(pointer, pointer1 - pointer));
  184. pointer = pointer1 + 3;
  185. res[fragment_id] = SubfileData();
  186. res[fragment_id].options["gid"] = fragment_id;
  187. // Parsing arguments
  188. pointer1 = text.find(u":::", pointer);
  189. if (pointer1 == std::u16string::npos)
  190. throw DatException("Bad TextSubfile::ParsePatchFragments() - Unable to parse arguments! Cannot find '...' divider");
  191. std::u16string arguments = text.substr(pointer, pointer1 - pointer);
  192. pointer = pointer1 + 3;
  193. if (arguments.length() > 0) {
  194. res[fragment_id].options["args"] = argumentsFromUtf16(arguments);
  195. }
  196. // Parsing text
  197. pointer1 = text.find(u"|||", pointer);
  198. if (pointer1 == std::u16string::npos)
  199. pointer1 = text.length();
  200. std::u16string text_data = text.substr(pointer, pointer1 - pointer);
  201. pointer = pointer1 + 3;
  202. res[fragment_id].text_data = text_data;
  203. std::wstring_convert<std::codecvt_utf8_utf16<char16_t>,char16_t> codecvt;
  204. std::cout << "Fragment id: " << res[fragment_id].options["gid"].as<long long>() << "\n";
  205. if (res[fragment_id].options["args"])
  206. std::cout << "Args: " << res[fragment_id].options["args"].as<std::string>() << "\n";
  207. else
  208. std::cout << "Args: none \n";
  209. std::cout << "Text: " << codecvt.to_bytes(res[fragment_id].text_data) << '\n' ;
  210. }
  211. return res;
  212. }
  213. // Make pieces/arguments/argument strings functions
  214. std::vector<std::u16string> TextSubfile::MakePieces(const BinaryData &data, long long &offset) {
  215. long long num_pieces = data.ToNumber<4>(offset);
  216. offset += 4;
  217. std::vector<std::u16string> text_pieces;
  218. for (long long j = 0; j < num_pieces; j++) {
  219. long long piece_size = data.ToNumber<1>(offset);
  220. if ((piece_size & 128) != 0) {
  221. piece_size = (((piece_size ^ 128) << 8) | data.ToNumber<1>(offset + 1));
  222. offset += 1;
  223. }
  224. offset += 1;
  225. BinaryData piece_data = data.CutData(offset, offset + piece_size * 2);
  226. std::u16string piece;
  227. for (long long k = 0; k < piece_size; k++) {
  228. char16_t c = char16_t(
  229. ((short(piece_data[2 * unsigned(k) + 1])) << 8) | (short(piece_data[2 * unsigned(k)])));
  230. piece += c;
  231. }
  232. text_pieces.push_back(piece);
  233. offset += piece_size * 2;
  234. }
  235. return text_pieces;
  236. }
  237. std::vector<long long> TextSubfile::MakeArgumentReferences(const BinaryData &data, long long &offset) {
  238. std::vector<long long> arg_references;
  239. long long num_references = data.ToNumber<4>(offset);
  240. offset += 4;
  241. for (long long j = 0; j < num_references; j++) {
  242. arg_references.emplace_back(data.ToNumber<4>(offset));
  243. offset += 4;
  244. }
  245. return arg_references;
  246. }
  247. std::vector<std::vector<BinaryData>> TextSubfile::MakeArgumentStrings(const BinaryData &data, long long &offset) {
  248. std::vector<std::vector<BinaryData> > arg_strings;
  249. long long num_arg_strings = data.ToNumber<1>(offset);
  250. offset += 1;
  251. for (long long j = 0; j < num_arg_strings; j++) {
  252. long long num_args = data.ToNumber<4>(offset);
  253. offset += 4;
  254. arg_strings.emplace_back();
  255. for (long long k = 0; k < num_args; k++) {
  256. long long string_size = data.ToNumber<1>(offset);
  257. if ((string_size & 0x80) != 0) {
  258. string_size = (((string_size ^ 0x80) << 8) | data.ToNumber<1>(offset + 1));
  259. offset += 1;
  260. }
  261. offset += 1;
  262. arg_strings[unsigned(j)].emplace_back(data.CutData(offset, offset + string_size * 2));
  263. offset += string_size * 2;
  264. }
  265. }
  266. return arg_strings;
  267. }
  268. // Build pieces/arguments/argument strings functions from fragment SubfileData
  269. BinaryData TextSubfile::BuildPieces(const BinaryData &data, const SubfileData &new_data, long long &offset) {
  270. try {
  271. // Moving &offset pointer in &data
  272. GetPieceData(data, offset);
  273. } catch (std::exception &e) {
  274. fprintf(stderr, "Caught %s exception.\n", e.what());
  275. fprintf(stderr, "ERROR TextSubfile::BuildPieces() - unable to get piece data for file_id %lld", file_id());
  276. throw DatException("Bad TextSubfile::BuildPieces()", IMPORT_EXCEPTION);
  277. }
  278. // Deleting '[' and ']' brackets
  279. std::u16string text_data = new_data.text_data.substr(1, new_data.text_data.size() - 2);
  280. std::vector<std::u16string> pieces;
  281. std::wstring_convert<std::codecvt_utf8_utf16<char16_t>,char16_t> codecvt;
  282. std::cout << "AAAAA0: " << codecvt.to_bytes(text_data) << '\n' ;
  283. const std::u16string DNT = u"<--DO_NOT_TOUCH!-->";
  284. size_t prev = 0;
  285. size_t next = text_data.find(DNT, prev);
  286. while (next != std::string::npos) {
  287. std::u16string piece = text_data.substr(prev, next - prev);
  288. pieces.push_back(piece);
  289. prev = next + DNT.length();
  290. next = text_data.find(DNT, prev);
  291. }
  292. std::u16string piece = text_data.substr(prev);
  293. pieces.push_back(piece);
  294. std::cout << "AAAAA: " << codecvt.to_bytes(piece) << '\n' ;
  295. // Building BinaryData from pieces
  296. BinaryData result;
  297. BinaryData temp_data(4);
  298. temp_data.FromNumber<4>(pieces.size());
  299. result = result + temp_data;
  300. for (long long i = 0; i < pieces.size(); i++) {
  301. long long piece_size = pieces[i].length();
  302. if (piece_size < 128) {
  303. temp_data.FromNumber<1>(piece_size);
  304. } else {
  305. temp_data.FromNumberRAW<2>((piece_size | 32768));
  306. }
  307. result = result + temp_data;
  308. for (long long j = 0; j < piece_size; j++) {
  309. temp_data.FromNumber<2>(short(pieces[i][j]));
  310. result = result + temp_data;
  311. }
  312. }
  313. return result;
  314. }
  315. BinaryData TextSubfile::BuildArgumentReferences(const BinaryData &data, const SubfileData &new_data,
  316. long long &offset) {
  317. // Moving &offset pointer in &data
  318. GetArgumentReferenceData(data, offset);
  319. // If there are no args - making 4 null-bytes and return;
  320. if (!new_data.options["args"]) {
  321. BinaryData result;
  322. result.FromNumber<4>(0);
  323. return result;
  324. }
  325. // Parsing arguments from list in options["args"]
  326. std::string args_list = new_data.options["args"].as<std::string>();
  327. std::vector<long long> arguments;
  328. size_t prev = 0;
  329. size_t next = args_list.find('-', prev);
  330. while (next != std::string::npos) {
  331. std::string argument = args_list.substr(prev, next - prev);
  332. arguments.push_back(std::stoll(argument));
  333. prev = next + 1;
  334. next = args_list.find('-', prev);
  335. }
  336. std::string argument = args_list.substr(prev);
  337. std::cout << "DDD: Argument is " << argument << std::endl;
  338. std::cout << "DDD: Arguments are" << new_data.options["args"].as<std::string>() << std::endl;
  339. arguments.push_back(std::stoll(argument));
  340. BinaryData result;
  341. BinaryData temp_data(4);
  342. temp_data.FromNumber<4>(arguments.size());
  343. result = result + temp_data;
  344. for (auto arg_reference : arguments) {
  345. temp_data.FromNumber<4>(arg_reference);
  346. result = result + temp_data;
  347. }
  348. return result;
  349. }
  350. BinaryData TextSubfile::BuildArgumentStrings(const BinaryData &data, const SubfileData &new_data,
  351. long long &offset) {
  352. return GetArgumentStringsData(data, offset);
  353. }
  354. // Get BinaryData contents of pieces/arguments/argument strings
  355. BinaryData TextSubfile::GetPieceData(const BinaryData &data, long long &offset) const {
  356. long long old_offset = offset;
  357. long long num_pieces = data.ToNumber<4>(offset);
  358. offset += 4;
  359. for (long long j = 0; j < num_pieces; j++) {
  360. long long piece_size = data.ToNumber<1>(offset);
  361. if ((piece_size & 128) != 0) {
  362. piece_size = (((piece_size ^ 128) << 8) | data.ToNumber<1>(offset + 1));
  363. offset += 1;
  364. }
  365. offset += 1;
  366. offset += piece_size * 2;
  367. }
  368. std::cout << "DDDDDDDDDDDD: " << old_offset << " " << offset << std::endl;
  369. return data.CutData(old_offset, offset);
  370. }
  371. BinaryData TextSubfile::GetArgumentReferenceData(const BinaryData &data, long long &offset) const {
  372. long long old_offset = offset;
  373. long long num_references = data.ToNumber<4>(offset);
  374. offset += 4;
  375. offset += 4 * num_references;
  376. return data.CutData(old_offset, offset);
  377. }
  378. BinaryData TextSubfile::GetArgumentStringsData(const BinaryData &data, long long &offset) const {
  379. long long old_offset = offset;
  380. long long num_arg_strings = data.ToNumber<1>(offset);
  381. offset += 1;
  382. for (long long j = 0; j < num_arg_strings; j++) {
  383. long long num_args = data.ToNumber<4>(offset);
  384. offset += 4;
  385. for (long long k = 0; k < num_args; k++) {
  386. long long string_size = data.ToNumber<1>(offset);
  387. if ((string_size & 0x80) != 0) {
  388. string_size = (((string_size ^ 0x80) << 8) | data.ToNumber<1>(offset + 1));
  389. offset += 1;
  390. }
  391. offset += 1;
  392. offset += string_size * 2;
  393. }
  394. }
  395. return data.CutData(old_offset, offset);
  396. }
  397. };