v50 Steam/Premium information for editors
  • v50 information can now be added to pages in the main namespace. v0.47 information can still be found in the DF2014 namespace. See here for more details on the new versioning policy.
  • Use this page to report any issues related to the migration.
This notice may be cached—the current version can be found here.

User:Quietust/rawextract.pl

From Dwarf Fortress Wiki
< User:Quietust
Revision as of 22:51, 29 November 2023 by Quietust (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
#!/usr/bin/env perl

use strict;

use File::Temp qw/tempfile/;
use Compress::Zlib;

my %versions = (
	1287, '0.31.01',
	1288, '0.31.02',
	1289, '0.31.03',
	1292, '0.31.04',
	1295, '0.31.05',
	1297, '0.31.06',
	1300, '0.31.08',
	1304, '0.31.09',
	1305, '0.31.10',
	1310, '0.31.11',
	1311, '0.31.12',
	1323, '0.31.13',
	1325, '0.31.14',
	1326, '0.31.15',
	1327, '0.31.16',
	1340, '0.31.17',
	1341, '0.31.18',
	1351, '0.31.19',
	1353, '0.31.20',
	1354, '0.31.21',
	1359, '0.31.22',
	1360, '0.31.23',
	1361, '0.31.24',
	1362, '0.31.25',

	1372, '0.34.01',
	1374, '0.34.02',
	1376, '0.34.03',
	1377, '0.34.04',
	1378, '0.34.05',
	1382, '0.34.06',
	1383, '0.34.07',
	1400, '0.34.08',
	1402, '0.34.09',
	1403, '0.34.10',
	1404, '0.34.11',

	1441, '0.40.01',
	1442, '0.40.02',
	1443, '0.40.03',
	1444, '0.40.04',
	1445, '0.40.05',
	1446, '0.40.06',
	1448, '0.40.07',
	1449, '0.40.08',
	1451, '0.40.09',
	1452, '0.40.10',
	1456, '0.40.11',
	1459, '0.40.12',
	1462, '0.40.13',
	1469, '0.40.14',
	1470, '0.40.15',
	1471, '0.40.16',
	1472, '0.40.17',
	1473, '0.40.18',
	1474, '0.40.19',
	1477, '0.40.20',
	1478, '0.40.21',
	1479, '0.40.22',
	1480, '0.40.23',
	1481, '0.40.24',

	1531, '0.42.01',
	1532, '0.42.02',
	1533, '0.42.03',
	1534, '0.42.04',
	1537, '0.42.05',
	1542, '0.42.06',

	1551, '0.43.01',
	1552, '0.43.02',
	1553, '0.43.03',
	1555, '0.43.04',
	1556, '0.43.05',

	1596, '0.44.01',
	1597, '0.44.02',
	1600, '0.44.03',
	1603, '0.44.04',
	1604, '0.44.05',
	1611, '0.44.06',
	1612, '0.44.07',
	1613, '0.44.08',
	1614, '0.44.09',
	1620, '0.44.10',
	1623, '0.44.11',
	1625, '0.44.12',

	1710, '0.47.01',
	1711, '0.47.02',
	1712, '0.47.03',
	1715, '0.47.04',
	1716, '0.47.05',

	2078, '0.50.01',
	2079, '0.50.02',
#	2080, '0.50.03',
	2080, '0.50.04',
	2081, '0.50.11',
);

sub decompress {
	my ($version, $file) = @_;
	my $out = tempfile();
	binmode($out);

	# write 'uncompressed' header
	print($out pack('VV', $version, 0));

	while (1) {
		my $len = readLong($file);
		if (eof($file)) { last; }
		read($file, my $block, $len);
		print($out uncompress($block));
	}
	seek($out, 8, 0);
	close($file);
	return $out;
}

sub readLong {
	my $file = shift;
	read($file, my $x, 4);
	return unpack('V', $x);
}

sub readShort {
	my $file = shift;
	read($file, my $x, 2);
	return unpack('v', $x);
}

sub readByte {
	my $file = shift;
	read($file, my $x, 1);
	return unpack('C', $x);
}

my $dir = '';
if (@ARGV > 0) { $dir = $ARGV[0] .'/'; }
my $version = -1;
my $raw;

if (open($raw, '<', $dir.'world.dat')) {
	binmode($raw);
	$version = readLong($raw);
	if (exists($versions{$version})) { print "World is from version $versions{$version}\n"; }
	else { print "World is from an unrecognized version '$version'!\n"; }
	my $comp = readLong($raw);
	if ($comp == 1) { $raw = decompress($version, $raw); }
	elsif ($comp > 0) { die("world.dat file is corrupted!"); }

	seek($raw, 0x46, 1);
	if ($version >= 1372) { seek($raw, 0x14, 1); }
	if ($version >= 1384) { seek($raw, 0x4, 1); }
	if ($version >= 1441) { seek($raw, 0x14, 1); }
	if ($version >= 1484) { seek($raw, 0xC, 1); }
	if ($version >= 1485) { seek($raw, 0x8, 1); }
	if ($version >= 1493) { seek($raw, 0x4, 1); }
	if ($version >= 1571) { seek($raw, 0x4, 1); }
	if ($version >= 1662) { seek($raw, 0x8, 1); }
	if (readByte($raw)) {
		seek($raw, readShort($raw), 1);
		seek($raw, readShort($raw), 1);
		seek($raw, 0x30, 1);
	}
	seek($raw, 0xF, 1);

	my $len = readShort($raw);
	read($raw, my $world, $len);

	if ($version >= 2072) {
		seek($raw, 0x8, 1);
		seek($raw, readShort($raw), 1);
		seek($raw, readShort($raw), 1);
		seek($raw, 0x10, 1);
		seek($raw, readShort($raw), 1);
		seek($raw, 0x4, 1);
	}

	print "Extracting randomly generated raws of $world:\n\n";
}
elsif (open($raw, '<', $dir.'world.sav')) {
	binmode($raw);

	$version = readLong($raw);
	if (exists($versions{$version})) { print "World is from version $versions{$version}\n"; }
	else { print "World is from an unrecognized version '$version'!\n"; }
	my $comp = readLong($raw);
	if ($comp == 1) { $raw = decompress($version, $raw); }
	elsif ($comp > 0) { die("world.sav file is corrupted!"); }

	seek($raw, 0x56, 1);
	if ($version >= 1372) { seek($raw, 0x14, 1); }
	if ($version >= 1384) { seek($raw, 0x4, 1); }
	if ($version >= 1441) { seek($raw, 0x14, 1); }
	if ($version >= 1484) { seek($raw, 0xC, 1); }
	if ($version >= 1485) { seek($raw, 0x8, 1); }
	if ($version >= 1493) { seek($raw, 0x4, 1); }
	if ($version >= 1571) { seek($raw, 0x4, 1); }
	if ($version >= 1662) { seek($raw, 0x8, 1); }

	my $len = readShort($raw);
	read($raw, my $name, $len);

	$len = readShort($raw);
	read($raw, my $world, $len);
	my $year = readLong($raw);

	if ($version >= 2072) {
		seek($raw, 0x8, 1);
		seek($raw, readShort($raw), 1);
		seek($raw, readShort($raw), 1);
		seek($raw, 0x10, 1);
		seek($raw, readShort($raw), 1);
		seek($raw, 0x4, 1);
	}

	print "Extracting randomly generated raws of $world year $year:\n\n";
}
else { die("Could not open world.dat or world.sav!"); }

sub extractList {
	my ($raw, $type, $token) = @_;

	my $count = readLong($raw);
	print "Extracting $count $type definitions...\n";

	for (my $i = 0; $i < $count; $i++) {
		my $data = '';
		my $file = $type.'_'.$i;
		my $numLines = readLong($raw);
		for (my $j = 0; $j < $numLines; $j++) {
			my $len = readShort($raw);
			read($raw, my $str, $len);
			$data .= "$str\n";
			if ($str =~ /\[$token:(.*?)\]/) { $file = $1; }
		}
		print "$file...\n";
		open (OUTFILE, '>', $file.'.txt');
		print(OUTFILE $data);
		close(OUTFILE);
	}
}

if ($version >= 1372) { extractList($raw, 'material', 'INORGANIC'); }

if ($version >= 1441) { extractList($raw, 'plant', 'PLANT'); }

if ($version >= 1390) { extractList($raw, 'item', 'ITEM_.*?'); }

extractList($raw, 'creature', 'CREATURE');

if ($version >= 1441) { extractList($raw, 'entity', 'ENTITY'); }

if ($version >= 1484) { extractList($raw, 'reaction', 'REACTION'); }

if ($version >= 1372) { extractList($raw, 'interaction', 'INTERACTION'); }

if ($version >= 1423) { extractList($raw, 'language', 'TRANSLATION'); }

print "done!\n";