v50 Steam/Premium information for editors
  • v50 information can now be added to pages in the main namespace. v0.47 information can still be found in the DF2014 namespace. See here for more details on the new versioning policy.
  • Use this page to report any issues related to the migration.
This notice may be cached—the current version can be found here.

Difference between revisions of "User:Quietust/rawextract.php"

From Dwarf Fortress Wiki
Jump to navigation Jump to search
(and improved even further - now handles worlds with active saved games!)
m
 
(40 intermediate revisions by 2 users not shown)
Line 1: Line 1:
  <nowiki><?
+
  <nowiki><?php
function decompress ($file)
+
$versions = array(
 +
1287 => '0.31.01',
 +
1288 => '0.31.02',
 +
1289 => '0.31.03',
 +
1292 => '0.31.04',
 +
1295 => '0.31.05',
 +
1297 => '0.31.06',
 +
1300 => '0.31.08',
 +
1304 => '0.31.09',
 +
1305 => '0.31.10',
 +
1310 => '0.31.11',
 +
1311 => '0.31.12',
 +
1323 => '0.31.13',
 +
1325 => '0.31.14',
 +
1326 => '0.31.15',
 +
1327 => '0.31.16',
 +
1340 => '0.31.17',
 +
1341 => '0.31.18',
 +
1351 => '0.31.19',
 +
1353 => '0.31.20',
 +
1354 => '0.31.21',
 +
1359 => '0.31.22',
 +
1360 => '0.31.23',
 +
1361 => '0.31.24',
 +
1362 => '0.31.25',
 +
 
 +
1372 => '0.34.01',
 +
1374 => '0.34.02',
 +
1376 => '0.34.03',
 +
1377 => '0.34.04',
 +
1378 => '0.34.05',
 +
1382 => '0.34.06',
 +
1383 => '0.34.07',
 +
1400 => '0.34.08',
 +
1402 => '0.34.09',
 +
1403 => '0.34.10',
 +
1404 => '0.34.11',
 +
 
 +
1441 => '0.40.01',
 +
1442 => '0.40.02',
 +
1443 => '0.40.03',
 +
1444 => '0.40.04',
 +
1445 => '0.40.05',
 +
1446 => '0.40.06',
 +
1448 => '0.40.07',
 +
1449 => '0.40.08',
 +
1451 => '0.40.09',
 +
1452 => '0.40.10',
 +
1456 => '0.40.11',
 +
1459 => '0.40.12',
 +
1462 => '0.40.13',
 +
1469 => '0.40.14',
 +
1470 => '0.40.15',
 +
1471 => '0.40.16',
 +
1472 => '0.40.17',
 +
1473 => '0.40.18',
 +
1474 => '0.40.19',
 +
1477 => '0.40.20',
 +
1478 => '0.40.21',
 +
1479 => '0.40.22',
 +
1480 => '0.40.23',
 +
1481 => '0.40.24',
 +
 
 +
1531 => '0.42.01',
 +
1532 => '0.42.02',
 +
1533 => '0.42.03',
 +
1534 => '0.42.04',
 +
1537 => '0.42.05',
 +
1542 => '0.42.06',
 +
 
 +
1551 => '0.43.01',
 +
1552 => '0.43.02',
 +
1553 => '0.43.03',
 +
1555 => '0.43.04',
 +
1556 => '0.43.05',
 +
 
 +
1596 => '0.44.01',
 +
1597 => '0.44.02',
 +
1600 => '0.44.03',
 +
1603 => '0.44.04',
 +
1604 => '0.44.05',
 +
1611 => '0.44.06',
 +
1612 => '0.44.07',
 +
1613 => '0.44.08',
 +
1614 => '0.44.09',
 +
1620 => '0.44.10',
 +
1623 => '0.44.11',
 +
1625 => '0.44.12',
 +
 
 +
1710 => '0.47.01',
 +
1711 => '0.47.02',
 +
1712 => '0.47.03',
 +
1715 => '0.47.04',
 +
1716 => '0.47.05',
 +
 
 +
2078 => '0.50.01',
 +
2079 => '0.50.02',
 +
// 2080 => '0.50.03',
 +
2080 => '0.50.04',
 +
2081 => '0.50.11',
 +
);
 +
 
 +
function decompress ($version, $file)
 
{
 
{
$out = fopen('temp', 'w+'); // tmpfile();
+
$out = tmpfile();
 
// write 'uncompressed' header
 
// write 'uncompressed' header
fwrite($out, pack('VV', 0x0509, 0));
+
fwrite($out, pack('VV', $version, 0));
 
while (1)
 
while (1)
 
{
 
{
$len = readLong($file);
+
$len = @readLong($file);
 
if (feof($file))
 
if (feof($file))
 
break;
 
break;
// not sure what this is, but it isn't part of the compressed data
+
$block = fread($file, $len);
$unk = readShort($file);
+
fwrite($out, gzuncompress($block));
$block = fread($file, $len - 2);
 
fwrite($out, gzinflate($block));
 
 
}
 
}
 
rewind($out);
 
rewind($out);
 +
// seek past the headers
 +
fseek($out, 8, SEEK_CUR);
 +
fclose($file);
 
return $out;
 
return $out;
 
}
 
}
Line 22: Line 125:
 
{
 
{
 
$x = fread($file, 4);
 
$x = fread($file, 4);
$pck = unpack('Vdata', $x);
+
$pck = @unpack('Vdata', $x);
 
return $pck['data'];
 
return $pck['data'];
 
}
 
}
Line 28: Line 131:
 
{
 
{
 
$x = fread($file, 2);
 
$x = fread($file, 2);
$pck = unpack('vdata', $x);
+
$pck = @unpack('vdata', $x);
 +
return $pck['data'];
 +
}
 +
function readByte ($file)
 +
{
 +
$x = fread($file, 1);
 +
$pck = @unpack('Cdata', $x);
 
return $pck['data'];
 
return $pck['data'];
 
}
 
}
  
$raw = @fopen('world.dat', 'rb');
+
$dir = '';
 +
if ($argc > 1)
 +
$dir = $argv[1] .'\\';
 +
 
 +
$raw = @fopen($dir.'world.dat', 'rb');
 
if ($raw)
 
if ($raw)
 
{
 
{
$sig = readLong($raw);
+
$version = readLong($raw);
if ($sig != 0x0509)
+
if (isset($versions[$version]))
die("world.dat file header not found!");
+
echo "World is from version $versions[$version]\n";
 
+
else echo "World is from an unrecognized version '$version'!\n";
 
$comp = readLong($raw);
 
$comp = readLong($raw);
 
if ($comp == 1)
 
if ($comp == 1)
$raw = decompress($raw);
+
$raw = decompress($version, $raw);
 
elseif ($comp > 0)
 
elseif ($comp > 0)
 
die("world.dat file is corrupted!");
 
die("world.dat file is corrupted!");
  
fseek($raw, 0x92, SEEK_SET);
+
fseek($raw, 0x46, SEEK_CUR);
 +
if ($version >= 1372)
 +
fseek($raw, 0x14, SEEK_CUR);
 +
if ($version >= 1384)
 +
fseek($raw, 0x4, SEEK_CUR);
 +
if ($version >= 1441)
 +
fseek($raw, 0x14, SEEK_CUR);
 +
if ($version >= 1484)
 +
fseek($raw, 0xC, SEEK_CUR);
 +
if ($version >= 1485)
 +
fseek($raw, 0x8, SEEK_CUR);
 +
if ($version >= 1493)
 +
fseek($raw, 0x4, SEEK_CUR);
 +
if ($version >= 1571)
 +
fseek($raw, 0x4, SEEK_CUR);
 +
if ($version >= 1662)
 +
fseek($raw, 0x8, SEEK_CUR);
 +
 
 +
if (readByte($raw))
 +
{
 +
fseek($raw, readShort($raw), SEEK_CUR);
 +
fseek($raw, readShort($raw), SEEK_CUR);
 +
fseek($raw, 0x30, SEEK_CUR);
 +
}
 +
fseek($raw, 0xF, SEEK_CUR);
  
 
$len = readShort($raw);
 
$len = readShort($raw);
 
$world = fread($raw, $len);
 
$world = fread($raw, $len);
echo "Listing raws for randomly generated creatures of $world:\n\n";
+
 
 +
if ($version >= 2072)
 +
{
 +
fseek($raw, 0x8, SEEK_CUR);
 +
fseek($raw, readShort($raw), SEEK_CUR);
 +
fseek($raw, readShort($raw), SEEK_CUR);
 +
fseek($raw, 0x10, SEEK_CUR);
 +
fseek($raw, readShort($raw), SEEK_CUR);
 +
fseek($raw, 0x4, SEEK_CUR);
 +
}
 +
 
 +
echo "Extracting randomly generated raws of $world:\n\n";
 
}
 
}
 
else
 
else
 
{
 
{
$raw = @fopen('world.sav', 'rb');
+
$raw = @fopen($dir.'world.sav', 'rb');
 
if (!$raw)
 
if (!$raw)
 
die("Could not open world.dat or world.sav!");
 
die("Could not open world.dat or world.sav!");
  
$sig = readLong($raw);
+
$version = readLong($raw);
if ($sig != 0x0509)
+
if (isset($versions[$version]))
die("world.sav file header not found!");
+
echo "World is from version $versions[$version]\n";
 
+
else echo "World is from an unrecognized version '$version'!\n";
 
$comp = readLong($raw);
 
$comp = readLong($raw);
 
if ($comp == 1)
 
if ($comp == 1)
$raw = decompress($raw);
+
$raw = decompress($version, $raw);
 
elseif ($comp > 0)
 
elseif ($comp > 0)
 
die("world.sav file is corrupted!");
 
die("world.sav file is corrupted!");
  
fseek($raw, 0x5E, SEEK_SET);
+
fseek($raw, 0x56, SEEK_CUR);
 +
if ($version >= 1372)
 +
fseek($raw, 0x14, SEEK_CUR);
 +
if ($version >= 1384)
 +
fseek($raw, 0x4, SEEK_CUR);
 +
if ($version >= 1441)
 +
fseek($raw, 0x14, SEEK_CUR);
 +
if ($version >= 1484)
 +
fseek($raw, 0xC, SEEK_CUR);
 +
if ($version >= 1485)
 +
fseek($raw, 0x8, SEEK_CUR);
 +
if ($version >= 1493)
 +
fseek($raw, 0x4, SEEK_CUR);
 +
if ($version >= 1571)
 +
fseek($raw, 0x4, SEEK_CUR);
 +
if ($version >= 1662)
 +
fseek($raw, 0x8, SEEK_CUR);
  
 
$len = readShort($raw);
 
$len = readShort($raw);
Line 74: Line 238:
 
$len = readShort($raw);
 
$len = readShort($raw);
 
$world = fread($raw, $len);
 
$world = fread($raw, $len);
echo "Listing raws for randomly generated creatures of $world:\n\n";
+
$year = readLong($raw);
$skip = readLong($raw);
+
 
 +
if ($version >= 2072)
 +
{
 +
fseek($raw, 0x8, SEEK_CUR);
 +
fseek($raw, readShort($raw), SEEK_CUR);
 +
fseek($raw, readShort($raw), SEEK_CUR);
 +
fseek($raw, 0x10, SEEK_CUR);
 +
fseek($raw, readShort($raw), SEEK_CUR);
 +
fseek($raw, 0x4, SEEK_CUR);
 +
}
 +
 
 +
echo "Extracting randomly generated raws of $world year $year:\n\n";
 
}
 
}
  
$numCreatures = readLong($raw);
+
function extractList ($raw, $type, $tokens)
 +
{
 +
$count = readLong($raw);
 +
echo "Extracting $count $type definitions...\n";
  
for ($i = 0; $i < $numCreatures; $i++)
+
for ($i = 0; $i < $count; $i++)
{
 
$numLines = readLong($raw);
 
for ($j = 0; $j < $numLines; $j++)
 
 
{
 
{
$len = readShort($raw);
+
$data = '';
$str = fread($raw, $len);
+
$file = $type.'_'.$i;
echo "$str\n";
+
$numLines = readLong($raw);
 +
for ($j = 0; $j < $numLines; $j++)
 +
{
 +
$len = readShort($raw);
 +
$str = fread($raw, $len);
 +
$data .= "$str\r\n";
 +
foreach ($tokens as $token)
 +
if (preg_match('/\['.$token.':(.*?)\]/', $str, $x))
 +
$file = $x[1];
 +
}
 +
echo "$file...\n";
 +
file_put_contents($file .'.txt', $data);
 
}
 
}
echo "----------\n";
 
 
}
 
}
 +
 +
if ($version >= 1372)
 +
extractList($raw, 'material', array('INORGANIC'));
 +
 +
if ($version >= 1441)
 +
extractList($raw, 'plant', array('PLANT'));
 +
 +
if ($version >= 1390)
 +
extractList($raw, 'item', array('ITEM_.*?'));
 +
 +
extractList($raw, 'creature', array('CREATURE'));
 +
 +
if ($version >= 1441)
 +
extractList($raw, 'entity', array('ENTITY'));
 +
 +
if ($version >= 1484)
 +
extractList($raw, 'reaction', array('REACTION'));
 +
 +
if ($version >= 1372)
 +
extractList($raw, 'interaction', array('INTERACTION'));
 +
 +
if ($version >= 1423)
 +
extractList($raw, 'language', array('TRANSLATION'));
 +
 +
echo "done!\n";
 
?></nowiki>
 
?></nowiki>

Latest revision as of 22:47, 29 November 2023

<?php
$versions = array(
	1287 => '0.31.01',
	1288 => '0.31.02',
	1289 => '0.31.03',
	1292 => '0.31.04',
	1295 => '0.31.05',
	1297 => '0.31.06',
	1300 => '0.31.08',
	1304 => '0.31.09',
	1305 => '0.31.10',
	1310 => '0.31.11',
	1311 => '0.31.12',
	1323 => '0.31.13',
	1325 => '0.31.14',
	1326 => '0.31.15',
	1327 => '0.31.16',
	1340 => '0.31.17',
	1341 => '0.31.18',
	1351 => '0.31.19',
	1353 => '0.31.20',
	1354 => '0.31.21',
	1359 => '0.31.22',
	1360 => '0.31.23',
	1361 => '0.31.24',
	1362 => '0.31.25',

	1372 => '0.34.01',
	1374 => '0.34.02',
	1376 => '0.34.03',
	1377 => '0.34.04',
	1378 => '0.34.05',
	1382 => '0.34.06',
	1383 => '0.34.07',
	1400 => '0.34.08',
	1402 => '0.34.09',
	1403 => '0.34.10',
	1404 => '0.34.11',

	1441 => '0.40.01',
	1442 => '0.40.02',
	1443 => '0.40.03',
	1444 => '0.40.04',
	1445 => '0.40.05',
	1446 => '0.40.06',
	1448 => '0.40.07',
	1449 => '0.40.08',
	1451 => '0.40.09',
	1452 => '0.40.10',
	1456 => '0.40.11',
	1459 => '0.40.12',
	1462 => '0.40.13',
	1469 => '0.40.14',
	1470 => '0.40.15',
	1471 => '0.40.16',
	1472 => '0.40.17',
	1473 => '0.40.18',
	1474 => '0.40.19',
	1477 => '0.40.20',
	1478 => '0.40.21',
	1479 => '0.40.22',
	1480 => '0.40.23',
	1481 => '0.40.24',

	1531 => '0.42.01',
	1532 => '0.42.02',
	1533 => '0.42.03',
	1534 => '0.42.04',
	1537 => '0.42.05',
	1542 => '0.42.06',

	1551 => '0.43.01',
	1552 => '0.43.02',
	1553 => '0.43.03',
	1555 => '0.43.04',
	1556 => '0.43.05',

	1596 => '0.44.01',
	1597 => '0.44.02',
	1600 => '0.44.03',
	1603 => '0.44.04',
	1604 => '0.44.05',
	1611 => '0.44.06',
	1612 => '0.44.07',
	1613 => '0.44.08',
	1614 => '0.44.09',
	1620 => '0.44.10',
	1623 => '0.44.11',
	1625 => '0.44.12',

	1710 => '0.47.01',
	1711 => '0.47.02',
	1712 => '0.47.03',
	1715 => '0.47.04',
	1716 => '0.47.05',

	2078 => '0.50.01',
	2079 => '0.50.02',
//	2080 => '0.50.03',
	2080 => '0.50.04',
	2081 => '0.50.11',
);

function decompress ($version, $file)
{
	$out = tmpfile();
	// write 'uncompressed' header
	fwrite($out, pack('VV', $version, 0));
	while (1)
	{
		$len = @readLong($file);
		if (feof($file))
			break;
		$block = fread($file, $len);
		fwrite($out, gzuncompress($block));
	}
	rewind($out);
	// seek past the headers
	fseek($out, 8, SEEK_CUR);
	fclose($file);
	return $out;
}

function readLong ($file)
{
	$x = fread($file, 4);
	$pck = @unpack('Vdata', $x);
	return $pck['data'];
}
function readShort ($file)
{
	$x = fread($file, 2);
	$pck = @unpack('vdata', $x);
	return $pck['data'];
}
function readByte ($file)
{
	$x = fread($file, 1);
	$pck = @unpack('Cdata', $x);
	return $pck['data'];
}

$dir = '';
if ($argc > 1)
	$dir = $argv[1] .'\\';

$raw = @fopen($dir.'world.dat', 'rb');
if ($raw)
{
	$version = readLong($raw);
	if (isset($versions[$version]))
		echo "World is from version $versions[$version]\n";
	else	echo "World is from an unrecognized version '$version'!\n";
	$comp = readLong($raw);
	if ($comp == 1)
		$raw = decompress($version, $raw);
	elseif ($comp > 0)
		die("world.dat file is corrupted!");

	fseek($raw, 0x46, SEEK_CUR);
	if ($version >= 1372)
		fseek($raw, 0x14, SEEK_CUR);
	if ($version >= 1384)
		fseek($raw, 0x4, SEEK_CUR);
	if ($version >= 1441)
		fseek($raw, 0x14, SEEK_CUR);
	if ($version >= 1484)
		fseek($raw, 0xC, SEEK_CUR);
	if ($version >= 1485)
		fseek($raw, 0x8, SEEK_CUR);
	if ($version >= 1493)
		fseek($raw, 0x4, SEEK_CUR);
	if ($version >= 1571)
		fseek($raw, 0x4, SEEK_CUR);
	if ($version >= 1662)
		fseek($raw, 0x8, SEEK_CUR);

	if (readByte($raw))
	{
		fseek($raw, readShort($raw), SEEK_CUR);
		fseek($raw, readShort($raw), SEEK_CUR);
		fseek($raw, 0x30, SEEK_CUR);
	}
	fseek($raw, 0xF, SEEK_CUR);

	$len = readShort($raw);
	$world = fread($raw, $len);

	if ($version >= 2072)
	{
		fseek($raw, 0x8, SEEK_CUR);
		fseek($raw, readShort($raw), SEEK_CUR);
		fseek($raw, readShort($raw), SEEK_CUR);
		fseek($raw, 0x10, SEEK_CUR);
		fseek($raw, readShort($raw), SEEK_CUR);
		fseek($raw, 0x4, SEEK_CUR);
	}

	echo "Extracting randomly generated raws of $world:\n\n";
}
else
{
	$raw = @fopen($dir.'world.sav', 'rb');
	if (!$raw)
		die("Could not open world.dat or world.sav!");

	$version = readLong($raw);
	if (isset($versions[$version]))
		echo "World is from version $versions[$version]\n";
	else	echo "World is from an unrecognized version '$version'!\n";
	$comp = readLong($raw);
	if ($comp == 1)
		$raw = decompress($version, $raw);
	elseif ($comp > 0)
		die("world.sav file is corrupted!");

	fseek($raw, 0x56, SEEK_CUR);
	if ($version >= 1372)
		fseek($raw, 0x14, SEEK_CUR);
	if ($version >= 1384)
		fseek($raw, 0x4, SEEK_CUR);
	if ($version >= 1441)
		fseek($raw, 0x14, SEEK_CUR);
	if ($version >= 1484)
		fseek($raw, 0xC, SEEK_CUR);
	if ($version >= 1485)
		fseek($raw, 0x8, SEEK_CUR);
	if ($version >= 1493)
		fseek($raw, 0x4, SEEK_CUR);
	if ($version >= 1571)
		fseek($raw, 0x4, SEEK_CUR);
	if ($version >= 1662)
		fseek($raw, 0x8, SEEK_CUR);

	$len = readShort($raw);
	$name = fread($raw, $len);

	$len = readShort($raw);
	$world = fread($raw, $len);
	$year = readLong($raw);

	if ($version >= 2072)
	{
		fseek($raw, 0x8, SEEK_CUR);
		fseek($raw, readShort($raw), SEEK_CUR);
		fseek($raw, readShort($raw), SEEK_CUR);
		fseek($raw, 0x10, SEEK_CUR);
		fseek($raw, readShort($raw), SEEK_CUR);
		fseek($raw, 0x4, SEEK_CUR);
	}

	echo "Extracting randomly generated raws of $world year $year:\n\n";
}

function extractList ($raw, $type, $tokens)
{
	$count = readLong($raw);
	echo "Extracting $count $type definitions...\n";

	for ($i = 0; $i < $count; $i++)
	{
		$data = '';
		$file = $type.'_'.$i;
		$numLines = readLong($raw);
		for ($j = 0; $j < $numLines; $j++)
		{
			$len = readShort($raw);
			$str = fread($raw, $len);
			$data .= "$str\r\n";
			foreach ($tokens as $token)
				if (preg_match('/\['.$token.':(.*?)\]/', $str, $x))
					$file = $x[1];
		}
		echo "$file...\n";
		file_put_contents($file .'.txt', $data);
	}
}

if ($version >= 1372)
	extractList($raw, 'material', array('INORGANIC'));

if ($version >= 1441)
	extractList($raw, 'plant', array('PLANT'));

if ($version >= 1390)
	extractList($raw, 'item', array('ITEM_.*?'));

extractList($raw, 'creature', array('CREATURE'));

if ($version >= 1441)
	extractList($raw, 'entity', array('ENTITY'));

if ($version >= 1484)
	extractList($raw, 'reaction', array('REACTION'));

if ($version >= 1372)
	extractList($raw, 'interaction', array('INTERACTION'));

if ($version >= 1423)
	extractList($raw, 'language', array('TRANSLATION'));

echo "done!\n";
?>