TextSubfile.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. //
  2. // Created by Иван_Архипов on 24.11.2017.
  3. //
  4. #include "Subfiles/TextSubfile.h"
  5. #include "BinaryData.h"
  6. #include "DatFile.h"
  7. #include "SubfileData.h"
  8. #include "EasyLogging++/easylogging++.h"
  9. #include <codecvt>
  10. #include <locale>
  11. std::u16string to_utf16(long long x) {
  12. std::u16string res;
  13. while (x > 0) {
  14. res += char16_t(u'0' + x % 10);
  15. x /= 10ll;
  16. }
  17. std::reverse(res.begin(), res.end());
  18. return res;
  19. }
  20. long long from_utf16(const std::u16string &num) {
  21. long long res = 0;
  22. for (auto c : num) {
  23. res = res * 10ll + (c - u'0');
  24. }
  25. return res;
  26. }
  27. std::string argumentsFromUtf16(const std::u16string &args) {
  28. std::string res;
  29. size_t pointer = 0;
  30. while (pointer < args.length()) {
  31. size_t pointer1 = args.find(u'-', pointer);
  32. if (pointer1 == std::u16string::npos)
  33. pointer1 = args.length();
  34. if (!res.empty())
  35. res += "-";
  36. res += std::to_string(from_utf16(args.substr(pointer, pointer1 - pointer)));
  37. pointer = pointer1 + 1;
  38. }
  39. return res;
  40. }
  41. namespace LOTRO_DAT {
  42. TextSubfile::TextSubfile() = default;
  43. TextSubfile::TextSubfile(DatFile *dat, long long dictionary_offset, long long unknown1,
  44. long long file_id, long long file_offset, long long file_size,
  45. long long timestamp, long long version, long long block_size, long long unknown2)
  46. : Subfile(dat, dictionary_offset, unknown1, file_id, file_offset, file_size,
  47. timestamp, version, block_size, unknown2) {
  48. }
  49. FILE_TYPE TextSubfile::FileType() const {
  50. return TEXT;
  51. }
  52. std::string TextSubfile::Extension() const {
  53. return std::string(".txt");
  54. }
  55. SubfileData TextSubfile::PrepareForExport(const BinaryData &file_data) {
  56. if (file_data.Empty()) {
  57. return SubfileData();
  58. }
  59. SubfileData result;
  60. long long offset = 9; // first 4 bytes - file_id, then 4 bytes - unknown, then 1 byte - unknown
  61. long long text_fragment_num = file_data.ToNumber<1>(offset);
  62. if ((text_fragment_num & 0x80) != 0) {
  63. text_fragment_num = (((text_fragment_num ^ 0x80) << 8) | file_data.ToNumber<1>(offset + 1));
  64. offset += 1;
  65. }
  66. offset += 1;
  67. for (long long i = 0; i < text_fragment_num; i++) {
  68. long long fragment_id = file_data.ToNumber<8>(offset);
  69. offset += 8;
  70. std::vector<std::u16string> text_pieces = MakePieces(file_data, offset);
  71. std::vector<long long> arg_references = MakeArgumentReferences(file_data, offset);
  72. std::vector<std::vector<BinaryData>> arg_strings = MakeArgumentStrings(file_data, offset);
  73. std::u16string text = u"[";
  74. for (size_t j = 0; j + 1 < text_pieces.size(); j++)
  75. text += text_pieces[j] + u"<--DO_NOT_TOUCH!-->";
  76. text += text_pieces[text_pieces.size() - 1] + u"]";
  77. std::u16string arguments;
  78. for (size_t j = 0; j + 1 < arg_references.size(); j++)
  79. arguments += to_utf16(arg_references[j]) + u"-";
  80. if (!arg_references.empty())
  81. arguments += to_utf16(arg_references[arg_references.size() - 1]);
  82. if (result.text_data.length() > 0)
  83. result.text_data += u"|||";
  84. result.text_data += to_utf16(fragment_id) + u":::";
  85. result.text_data += arguments + u":::";
  86. result.text_data += text;
  87. }
  88. result.options["fid"] = file_id();
  89. result.options["ext"] = Extension();
  90. return result;
  91. }
  92. BinaryData TextSubfile::MakeForImport(const BinaryData &old_data, const SubfileData &data) {
  93. LOG(DEBUG) << "Preparing text file " << file_id() << " for import.";
  94. std::unordered_map<long long, SubfileData> patch_fragments = ParsePatchFragments(data);
  95. BinaryData new_data;
  96. if (file_size() <= 10 + 8) // File is empty, nothing to do;
  97. return old_data;
  98. long long offset = 9 + 8; // first 8 bytes - file_info. After them:
  99. // first 4 bytes - file_id, then 4 bytes - unknown, then 1 byte - unknown
  100. long long text_fragment_num = old_data.ToNumber<1>(offset);
  101. if ((text_fragment_num & 0x80) != 0) {
  102. text_fragment_num = (((text_fragment_num ^ 0x80) << 8) | old_data.ToNumber<1>(offset + 1));
  103. offset += 1;
  104. }
  105. offset += 1;
  106. new_data = new_data + old_data.CutData(0, offset); // Adding file info
  107. for (long long i = 0; i < text_fragment_num; i++) {
  108. long long fragment_id = old_data.ToNumber<8>(offset);
  109. offset += 8;
  110. new_data = new_data + old_data.CutData(offset - 8, offset);
  111. if (patch_fragments.count(fragment_id) == 0) {
  112. // Retrieving old pieces
  113. new_data = new_data + GetPieceData(old_data, offset);
  114. // Retrieving old references
  115. new_data = new_data + GetArgumentReferenceData(old_data, offset);
  116. // Retrieving old ref_strings
  117. new_data = new_data + GetArgumentStringsData(old_data, offset);
  118. } else {
  119. // Making and adding new pieces
  120. new_data = new_data + BuildPieces(old_data, patch_fragments[fragment_id], offset);
  121. // Making and adding new references
  122. new_data = new_data + BuildArgumentReferences(old_data, patch_fragments[fragment_id], offset);
  123. // Making and adding new strings
  124. new_data = new_data + BuildArgumentStrings(old_data, patch_fragments[fragment_id], offset);
  125. }
  126. }
  127. new_data = new_data + old_data.CutData(offset); // Adding elapsed file data
  128. return new_data;
  129. }
  130. std::unordered_map<long long, SubfileData> TextSubfile::ParsePatchFragments(const SubfileData &data) {
  131. LOG(DEBUG) << "Started parsing patch fragments";
  132. std::unordered_map<long long, SubfileData> res;
  133. std::u16string text = data.text_data;
  134. size_t pointer = 0;
  135. while (pointer < text.length()) {
  136. // Parsing fragment_id
  137. size_t pointer1 = text.find(u":::", pointer);
  138. if (pointer1 == std::u16string::npos) {
  139. LOG(ERROR) << "Unable to parse fragment id! Cannot find '...' divider. File_id = " << file_id_;
  140. return res;
  141. }
  142. long long fragment_id = from_utf16(text.substr(pointer, pointer1 - pointer));
  143. pointer = pointer1 + 3;
  144. res[fragment_id] = SubfileData();
  145. res[fragment_id].options["gid"] = fragment_id;
  146. // Parsing arguments
  147. pointer1 = text.find(u":::", pointer);
  148. if (pointer1 == std::u16string::npos) {
  149. LOG(ERROR) << "Unable to parse arguments! Cannot find '...' divider. File_id = " << file_id_;
  150. return res;
  151. }
  152. std::u16string arguments = text.substr(pointer, pointer1 - pointer);
  153. pointer = pointer1 + 3;
  154. if (arguments.length() > 0) {
  155. res[fragment_id].options["args"] = argumentsFromUtf16(arguments);
  156. }
  157. // Parsing text
  158. pointer1 = text.find(u"|||", pointer);
  159. if (pointer1 == std::u16string::npos)
  160. pointer1 = text.length();
  161. std::u16string text_data = text.substr(pointer, pointer1 - pointer);
  162. pointer = pointer1 + 3;
  163. res[fragment_id].text_data = text_data;
  164. }
  165. LOG(DEBUG) << "Finished parsing text patch fragments";
  166. return res;
  167. }
  168. // Make pieces/arguments/argument strings functions
  169. std::vector<std::u16string> TextSubfile::MakePieces(const BinaryData &data, long long &offset) {
  170. LOG(DEBUG) << "Started making pieces";
  171. long long num_pieces = data.ToNumber<4>(offset);
  172. offset += 4;
  173. std::vector<std::u16string> text_pieces;
  174. for (long long j = 0; j < num_pieces; j++) {
  175. long long piece_size = data.ToNumber<1>(offset);
  176. if ((piece_size & 128) != 0) {
  177. piece_size = (((piece_size ^ 128) << 8) | data.ToNumber<1>(offset + 1));
  178. offset += 1;
  179. }
  180. offset += 1;
  181. BinaryData piece_data = data.CutData(offset, offset + piece_size * 2);
  182. std::u16string piece;
  183. for (long long k = 0; k < piece_size; k++) {
  184. char16_t c = char16_t(
  185. ((short(piece_data[2 * unsigned(k) + 1])) << 8) | (short(piece_data[2 * unsigned(k)])));
  186. piece += c;
  187. }
  188. text_pieces.push_back(piece);
  189. offset += piece_size * 2;
  190. }
  191. LOG(DEBUG) << "Finished making pieces";
  192. return text_pieces;
  193. }
  194. std::vector<long long> TextSubfile::MakeArgumentReferences(const BinaryData &data, long long &offset) {
  195. LOG(DEBUG) << "Started making argument references";
  196. std::vector<long long> arg_references;
  197. long long num_references = data.ToNumber<4>(offset);
  198. offset += 4;
  199. for (long long j = 0; j < num_references; j++) {
  200. arg_references.emplace_back(data.ToNumber<4>(offset));
  201. offset += 4;
  202. }
  203. LOG(DEBUG) << "Finished making argument references";
  204. return arg_references;
  205. }
  206. std::vector<std::vector<BinaryData>> TextSubfile::MakeArgumentStrings(const BinaryData &data, long long &offset) {
  207. LOG(DEBUG) << "Started making argument strings";
  208. std::vector<std::vector<BinaryData> > arg_strings;
  209. long long num_arg_strings = data.ToNumber<1>(offset);
  210. offset += 1;
  211. for (long long j = 0; j < num_arg_strings; j++) {
  212. long long num_args = data.ToNumber<4>(offset);
  213. offset += 4;
  214. arg_strings.emplace_back();
  215. for (long long k = 0; k < num_args; k++) {
  216. long long string_size = data.ToNumber<1>(offset);
  217. if ((string_size & 0x80) != 0) {
  218. string_size = (((string_size ^ 0x80) << 8) | data.ToNumber<1>(offset + 1));
  219. offset += 1;
  220. }
  221. offset += 1;
  222. arg_strings[unsigned(j)].emplace_back(data.CutData(offset, offset + string_size * 2));
  223. offset += string_size * 2;
  224. }
  225. }
  226. LOG(DEBUG) << "Finished making argument strings";
  227. return arg_strings;
  228. }
  229. // Build pieces/arguments/argument strings functions from fragment SubfileData
  230. BinaryData TextSubfile::BuildPieces(const BinaryData &data, const SubfileData &new_data, long long &offset) {
  231. LOG(DEBUG) << "Started building pieces";
  232. // Moving &offset pointer in &data
  233. GetPieceData(data, offset);
  234. // Deleting '[' and ']' brackets
  235. std::u16string text_data = new_data.text_data.substr(1, new_data.text_data.size() - 2);
  236. std::vector<std::u16string> pieces;
  237. const std::u16string DNT = u"<--DO_NOT_TOUCH!-->";
  238. size_t prev = 0;
  239. size_t next = text_data.find(DNT, prev);
  240. while (next != std::string::npos) {
  241. std::u16string piece = text_data.substr(prev, next - prev);
  242. pieces.emplace_back(piece);
  243. prev = next + DNT.length();
  244. next = text_data.find(DNT, prev);
  245. }
  246. pieces.emplace_back(text_data.substr(prev));
  247. // Building BinaryData from pieces
  248. BinaryData result;
  249. BinaryData temp_data = BinaryData::FromNumber<4>(pieces.size());
  250. result = result + temp_data;
  251. for (auto piece : pieces) {
  252. long long piece_size = piece.length();
  253. if (piece_size < 128) {
  254. temp_data = BinaryData::FromNumber<1>(piece_size);
  255. } else {
  256. temp_data = BinaryData::FromNumberRAW<2>((piece_size | 32768));
  257. }
  258. result = result + temp_data;
  259. for (long long j = 0; j < piece_size; j++) {
  260. temp_data = BinaryData::FromNumber<2>(short(piece[j]));
  261. result = result + temp_data;
  262. }
  263. }
  264. LOG(DEBUG) << "Pieces built successfully";
  265. return result;
  266. }
  267. BinaryData TextSubfile::BuildArgumentReferences(const BinaryData &data, const SubfileData &new_data,
  268. long long &offset) {
  269. LOG(DEBUG) << "Started building argument refs";
  270. // Moving &offset pointer in &data
  271. GetArgumentReferenceData(data, offset);
  272. // If there are no args - making 4 null-bytes and return;
  273. if (!new_data.options["args"]) {
  274. BinaryData result = BinaryData::FromNumber<4>(0);
  275. return result;
  276. }
  277. // Parsing arguments from list in options["args"]
  278. std::string args_list = new_data.options["args"].as<std::string>();
  279. std::vector<long long> arguments;
  280. size_t prev = 0;
  281. size_t next = args_list.find('-', prev);
  282. while (next != std::string::npos) {
  283. std::string argument = args_list.substr(prev, next - prev);
  284. arguments.push_back(std::stoll(argument));
  285. prev = next + 1;
  286. next = args_list.find('-', prev);
  287. }
  288. std::string argument = args_list.substr(prev);
  289. arguments.push_back(std::stoll(argument));
  290. BinaryData result;
  291. BinaryData temp_data = BinaryData::FromNumber<4>(arguments.size());
  292. result = result + temp_data;
  293. for (auto arg_reference : arguments) {
  294. temp_data = BinaryData::FromNumber<4>(arg_reference);
  295. result = result + temp_data;
  296. }
  297. LOG(DEBUG) << "Argument refs built successfully";
  298. return result;
  299. }
  300. BinaryData TextSubfile::BuildArgumentStrings(const BinaryData &data, const SubfileData &, long long &offset) {
  301. LOG(DEBUG) << "Started building argument strings";
  302. LOG(DEBUG) << "Built arg strings successfully";
  303. return GetArgumentStringsData(data, offset);
  304. }
  305. // Get BinaryData contents of pieces/arguments/argument strings
  306. BinaryData TextSubfile::GetPieceData(const BinaryData &data, long long &offset) const {
  307. LOG(DEBUG) << "Started getting piece data";
  308. long long old_offset = offset;
  309. long long num_pieces = data.ToNumber<4>(offset);
  310. offset += 4;
  311. for (long long j = 0; j < num_pieces; j++) {
  312. long long piece_size = data.ToNumber<1>(offset);
  313. if ((piece_size & 128) != 0) {
  314. piece_size = (((piece_size ^ 128) << 8) | data.ToNumber<1>(offset + 1));
  315. offset += 1;
  316. }
  317. offset += 1;
  318. offset += piece_size * 2;
  319. }
  320. LOG(DEBUG) << "Got piece data";
  321. return data.CutData(old_offset, offset);
  322. }
  323. BinaryData TextSubfile::GetArgumentReferenceData(const BinaryData &data, long long &offset) const {
  324. LOG(DEBUG) << "Started getting arg refs data";
  325. long long old_offset = offset;
  326. long long num_references = data.ToNumber<4>(offset);
  327. offset += 4;
  328. offset += 4 * num_references;
  329. LOG(DEBUG) << "Finished getting arg refs data";
  330. return data.CutData(old_offset, offset);
  331. }
  332. BinaryData TextSubfile::GetArgumentStringsData(const BinaryData &data, long long &offset) const {
  333. LOG(DEBUG) << "Started getting arg strings data";
  334. long long old_offset = offset;
  335. long long num_arg_strings = data.ToNumber<1>(offset);
  336. offset += 1;
  337. for (long long j = 0; j < num_arg_strings; j++) {
  338. long long num_args = data.ToNumber<4>(offset);
  339. offset += 4;
  340. for (long long k = 0; k < num_args; k++) {
  341. long long string_size = data.ToNumber<1>(offset);
  342. if ((string_size & 0x80) != 0) {
  343. string_size = (((string_size ^ 0x80) << 8) | data.ToNumber<1>(offset + 1));
  344. offset += 1;
  345. }
  346. offset += 1;
  347. offset += string_size * 2;
  348. }
  349. }
  350. LOG(DEBUG) << "Finished getting arg strings data";
  351. return data.CutData(old_offset, offset);
  352. }
  353. };