Skip to content

Commit

Permalink
Keep the resource and property names in a compressed binary file
Browse files Browse the repository at this point in the history
zstd level 15 was one of the fastest decompression algorithms, with the best compression ratio.
  • Loading branch information
henriquegemignani committed Jul 21, 2023
1 parent a8ebbe4 commit d5d6f27
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 6 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,10 @@ dmypy.json
# PyCharm
/.idea

# Version
# Generated files
src/mercury_engine_data_structures/version.py
src/mercury_engine_data_structures/formats/dread_types.py
src/mercury_engine_data_structures/*.bin

# Deny certain files at root
/*.txt
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ requires = [
"setuptools_scm[toml]>=3.4.3",
"wheel>=0.37.0",
"construct>=2.10.0",
"zstd",
]
build-backend = "setuptools.build_meta"

Expand All @@ -23,7 +24,8 @@ requires-python = ">=3.8"
dynamic = ["version"]

dependencies = [
"construct>=2.10.0"
"construct>=2.10.0",
"zstd",
]


Expand Down
9 changes: 9 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[options.package_data]
mercury_engine_data_structures =
*.bin

[options.exclude_package_data]
mercury_engine_data_structures =
dread_property_names.json
dread_resource_names.json
sr_resource_names.json
50 changes: 50 additions & 0 deletions src/mercury_engine_data_structures/_dread_data_construct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import struct
import typing

import construct


class CompressedZSTD(construct.Tunnel):
def __init__(self, subcon, level: int = 3):
super().__init__(subcon)
import zstd
self.lib = zstd
self.level = level

def _decode(self, data, context, path):
return self.lib.decompress(data)

def _encode(self, data, context, path):
return self.lib.compress(data, self.level)


class HashesDict(construct.Construct):
def __init__(self):
super().__init__()
self._build_construct = construct.PrefixedArray(
construct.Int32un,
construct.Sequence(
construct.PascalString(construct.Int16un, "ascii"), # key
construct.Int64un, # hash
)
)

def _parse(self, stream, context, path) -> typing.Dict[str, int]:
key_size_struct = struct.Struct("=H")
value_size_struct = struct.Struct("=Q")

count = construct.Int32un._parse(stream, None, "")

result = {}
for _ in range(count):
key = stream.read(key_size_struct.unpack(stream.read(2))[0]).decode()
value = value_size_struct.unpack(stream.read(8))[0]
result[key] = value

return result

def _build(self, obj: typing.Dict[str, int], stream, context, path):
return self._build_construct._build(list(obj.items()), stream, context, path)


KnownHashes = CompressedZSTD(HashesDict(), 15)
18 changes: 15 additions & 3 deletions src/mercury_engine_data_structures/dread_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,25 @@
from pathlib import Path
from typing import Dict, Optional

from mercury_engine_data_structures._dread_data_construct import KnownHashes

_root = Path(__file__).parent


@functools.lru_cache
def get_raw_types() -> Dict[str, typing.Any]:
path = Path(__file__).parent.joinpath("dread_types.json")
path = _root.joinpath("dread_types.json")
with path.open() as f:
return json.load(f)


@functools.lru_cache
def all_name_to_asset_id() -> Dict[str, int]:
path = Path(__file__).parent.joinpath("dread_resource_names.json")
bin_path = _root.joinpath("dread_resource_names.bin")
if bin_path.exists():
return dict(KnownHashes.parse_file(bin_path))

path = _root.joinpath("dread_resource_names.json")
with path.open() as names_file:
return json.load(names_file)

Expand All @@ -33,7 +41,11 @@ def name_for_asset_id(asset_id: int) -> Optional[str]:

@functools.lru_cache
def all_name_to_property_id() -> Dict[str, int]:
path = Path(__file__).parent.joinpath("dread_property_names.json")
bin_path = _root.joinpath("dread_property_names.bin")
if bin_path.exists():
return dict(KnownHashes.parse_file(bin_path))

path = _root.joinpath("dread_property_names.json")
with path.open() as names_file:
return json.load(names_file)

Expand Down
12 changes: 12 additions & 0 deletions src/mercury_engine_data_structures/samus_returns_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
from pathlib import Path
from typing import Dict, Optional

from mercury_engine_data_structures._dread_data_construct import KnownHashes

_root = Path(__file__).parent


@functools.lru_cache
def get_raw_types() -> Dict[str, typing.Any]:
Expand All @@ -12,6 +16,10 @@ def get_raw_types() -> Dict[str, typing.Any]:

@functools.lru_cache
def all_name_to_asset_id() -> Dict[str, int]:
bin_path = _root.joinpath("sr_resource_names.bin")
if bin_path.exists():
return dict(KnownHashes.parse_file(bin_path))

path = Path(__file__).parent.joinpath("sr_resource_names.json")

with path.open() as names_file:
Expand All @@ -32,6 +40,10 @@ def name_for_asset_id(asset_id: int) -> Optional[str]:

@functools.lru_cache
def all_name_to_property_id() -> Dict[str, int]:
bin_path = _root.joinpath("sr_property_names.bin")
if bin_path.exists():
return dict(KnownHashes.parse_file(bin_path))

path = Path(__file__).parent.joinpath("sr_property_names.json")
with path.open() as names_file:
return json.load(names_file)
Expand Down
60 changes: 59 additions & 1 deletion tools/create_class_definitions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import collections
import copy
import io
import json
import struct
from pathlib import Path

import construct
Expand All @@ -16,6 +18,11 @@
type_lib = construct.Container(dread_types=dread_types)
exec(compile(type_lib_source, type_lib_path, "exec"), type_lib)

dread_data_construct_path = meds_root.joinpath("_dread_data_construct.py")
dread_data_construct = construct.Container()
exec(compile(dread_data_construct_path.read_text(), dread_data_construct_path, "exec"), dread_data_construct)


primitive_to_construct = {
type_lib.PrimitiveKind.VECTOR_2: "common_types.CVector2D",
type_lib.PrimitiveKind.VECTOR_3: "common_types.CVector3D",
Expand Down Expand Up @@ -57,7 +64,8 @@ def children_for(self, type_name: str, recursive: bool = True):
yield from self.children_for(child)

def _debug(self, msg: str):
print(" " * len(self._types_being_exported) + f"* {msg}")
pass
# print(" " * len(self._types_being_exported) + f"* {msg}")

def _export_enum_type(self, type_variable: str, type_name: str):
data = self.all_types[type_name]
Expand Down Expand Up @@ -224,6 +232,47 @@ def export_code(self):
return code


class CompressedZSTD(construct.Tunnel):
def __init__(self, subcon, level: int = 3):
super().__init__(subcon)
import zstd
self.lib = zstd
self.level = level

def _decode(self, data, context, path):
return self.lib.decompress(data)

def _encode(self, data, context, path):
return self.lib.compress(data, self.level)


def raw_data():
return construct.PrefixedArray(
construct.Int32un,
construct.Sequence(
construct.PascalString(construct.Int16un, "ascii"), # key
construct.Int64un, # hash
)
)


def parse(data: bytes):
stream = io.BytesIO(data)

key_size_struct = struct.Struct("=H")
value_size_struct = struct.Struct("=Q")

count = construct.Int32un._parse(stream, None, "")

result = {}
for _ in range(count):
key = stream.read(key_size_struct.unpack(stream.read(2))[0]).decode()
value = value_size_struct.unpack(stream.read(8))[0]
result[key] = value

return result


def main():
output_path = meds_root.joinpath("formats", "dread_types.py")

Expand All @@ -235,6 +284,15 @@ def main():

output_path.write_text(type_exporter.export_code())

for file_name in ["dread_resource_names", "dread_property_names", "sr_resource_names"]:
with meds_root.joinpath(f"{file_name}.json").open() as f:
file_data: dict[str, int] = json.load(f)

dread_data_construct.KnownHashes.build_file(
file_data,
meds_root.joinpath(f"{file_name}.bin")
)


if __name__ == '__main__':
main()

0 comments on commit d5d6f27

Please sign in to comment.