From 67b654e56b4a487f56856d5a1688696ba274966d Mon Sep 17 00:00:00 2001 From: Moritz Baron Date: Mon, 15 Jan 2024 06:31:53 +0100 Subject: [PATCH] refactor --- src/archive/archiveinmemory.rs | 58 +++++ src/archive/archivereadonly.rs | 310 ++++++++++++++++++++++++++ src/archive/mod.rs | 390 ++++----------------------------- 3 files changed, 408 insertions(+), 350 deletions(-) create mode 100644 src/archive/archiveinmemory.rs create mode 100644 src/archive/archivereadonly.rs diff --git a/src/archive/archiveinmemory.rs b/src/archive/archiveinmemory.rs new file mode 100644 index 0000000..36606fc --- /dev/null +++ b/src/archive/archiveinmemory.rs @@ -0,0 +1,58 @@ +///////////////////////////////////////////////////////////////////////////////////////// +// IN-MEMORY ARCHIVE +// +//https://learn.microsoft.com/en-us/dotnet/api/system.io.compression.ziparchivemode?view=net-8.0 +// +// When you set the mode to Update, the underlying file or stream must support reading, writing, and seeking. +// The content of the entire archive is held in memory, +// and no data is written to the underlying file or stream until the archive is disposed. +// +// We don't implement a wrapped stream here. Archive needs to be written manually for now. +// +///////////////////////////////////////////////////////////////////////////////////////// + +use std::{collections::HashMap, io::Result, path::Path}; + +use super::*; + +#[derive(Debug)] +pub struct ZipArchiveMemory { + /// The files inside an archive + pub entries: HashMap, + pub dependencies: Vec, +} + +///////////////////////////////////////////////////////////////////////////////////////// +// IMPL + +impl ZipArchiveMemory { + fn write(&mut self) { + todo!() + } + + /// Compresses and adds a file to the archive. + /// + /// # Errors + /// + /// This function will return an error if compression or io fails, or if the mode is Read. + pub fn create_entry>( + &mut self, + _file_path: P, + _compression_level: CompressionLevel, + ) -> Result { + // can only add entries in update mode + + // write? + + todo!() + } + + /// Deletes an entry from the archive + pub fn delete_entry(&mut self, hash: &u64) -> Option { + // can only delete entries in update mode + + // Set dirty + + self.entries.remove(hash) + } +} diff --git a/src/archive/archivereadonly.rs b/src/archive/archivereadonly.rs new file mode 100644 index 0000000..fbda714 --- /dev/null +++ b/src/archive/archivereadonly.rs @@ -0,0 +1,310 @@ +///////////////////////////////////////////////////////////////////////////////////////// +// READ ONLY ARCHIVE +// +//https://learn.microsoft.com/en-us/dotnet/api/system.io.compression.ziparchivemode?view=net-8.0 +// +// When you set the mode to Read, the underlying file or stream must support reading, but does not have to support seeking. +// If the underlying file or stream supports seeking, the files are read from the archive as they are requested. +// If the underlying file or stream does not support seeking, the entire archive is held in memory. +// +// We only implement Read + Seek and never hold anything in memory here. +// +///////////////////////////////////////////////////////////////////////////////////////// + +use std::{ + borrow::BorrowMut, + collections::HashMap, + fs::{create_dir_all, File}, + io::{self, BufWriter, Read, Result, Seek, SeekFrom, Write}, + path::Path, +}; + +use byteorder::{LittleEndian, ReadBytesExt}; + +use crate::*; +use crate::{fnv1a64_hash_string, io::FromReader}; + +use super::*; + +#[derive(Debug)] +pub struct ZipArchiveReadonly +where + R: Read + Seek, +{ + /// wraps a read-only stream + stream: R, + /// The files inside an archive + pub entries: HashMap, + pub dependencies: Vec, +} + +///////////////////////////////////////////////////////////////////////////////////////// +// IMPL + +impl ZipArchiveReadonly +where + R: Read + Seek, +{ + /// Get an entry in the archive by resource path. + pub fn get_entry(&self, name: &str) -> Option<&ZipEntry> { + self.entries.get(&fnv1a64_hash_string(&name.to_owned())) + } + + /// Get an entry in the archive by hash (FNV1a64 of resource path). + pub fn get_entry_by_hash(&self, hash: &u64) -> Option<&ZipEntry> { + self.entries.get(hash) + } + + /// Extracts a single entry to a directory path. + /// + /// # Errors + /// + /// This function will return an error if the entry cannot be found or any io fails. + pub fn extract_entry>( + &mut self, + entry: ZipEntry, + destination_directory_name: &P, + overwrite_files: bool, + hash_map: &HashMap, + ) -> Result<()> { + let Some(info) = entry.get_resolved_name(&hash_map) else { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Could not get entry info from archive.", + )); + }; + + // name or hash is a relative path + let outfile = destination_directory_name.as_ref().join(info); + create_dir_all(outfile.parent().expect("Could not create an out_dir"))?; + + // extract to stream + let mut fs = if overwrite_files { + File::create(outfile)? + } else { + File::options() + .read(true) + .write(true) + .create_new(true) + .open(outfile)? + }; + + let writer = BufWriter::new(&mut fs); + self.extract_segments(&entry, writer)?; + + Ok(()) + } + + /// Extracts a single entry by hash to a directory path. + /// + /// # Errors + /// + /// This function will return an error if the entry cannot be found or any io fails. + pub fn extract_entry_by_hash>( + &mut self, + hash: u64, + destination_directory_name: &P, + overwrite_files: bool, + hash_map: &HashMap, + ) -> Result<()> { + if let Some(entry) = self.get_entry_by_hash(&hash) { + self.extract_entry( + entry.clone(), + destination_directory_name, + overwrite_files, + hash_map, + ) + } else { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Could not find entry.", + )); + } + } + + /// Extracts a single entry by resource path to a directory path. + /// + /// # Errors + /// + /// This function will return an error if the entry cannot be found or any io fails. + pub fn extract_entry_by_name>( + &mut self, + name: String, + destination_directory_name: &P, + overwrite_files: bool, + hash_map: &HashMap, + ) -> Result<()> { + if let Some(entry) = self.get_entry(&name) { + self.extract_entry( + entry.clone(), + destination_directory_name, + overwrite_files, + hash_map, + ) + } else { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Could not find entry.", + )); + } + } + + /// Returns an open read stream to an entry of this [`ZipArchive`]. + pub fn open_entry(&mut self, entry: ZipEntry, writer: W) -> Result<()> { + self.extract_segments(&entry, writer)?; + + Ok(()) + } + + /// Extracts all entries to the given directory. + /// + /// # Errors + /// + /// This function will return an error if io fails. + pub fn extract_to_directory>( + &mut self, + destination_directory_name: &P, + overwrite_files: bool, + hash_map: Option>, + ) -> Result<()> { + let hash_map = if let Some(hash_map) = hash_map { + hash_map + } else { + get_red4_hashes() + }; + + // collect info + let mut entries: Vec = vec![]; + for (_hash, entry) in &self.entries { + entries.push(entry.clone()); + } + + for entry in entries { + self.extract_entry( + entry, + destination_directory_name, + overwrite_files, + &hash_map, + )?; + } + + Ok(()) + } + + // getters + + fn reader_mut(&mut self) -> &mut R { + self.stream.borrow_mut() + } + + // methods + + /// Extracts segments to a writer, expects correct offset info. + /// + /// # Errors + /// + /// This function will return an error if io fails + fn extract_segments(&mut self, entry: &ZipEntry, mut writer: W) -> Result<()> { + let segment = entry.segment; + let buffers = entry.buffers.clone(); + + if segment.size() == segment.z_size() { + // just copy + self.reader_mut().seek(SeekFrom::Start(segment.offset()))?; + let mut buffer = vec![0; segment.z_size() as usize]; + self.reader_mut().read_exact(&mut buffer[..])?; + writer.write_all(&buffer)?; + } else { + decompress_segment(self.reader_mut(), &segment, &mut writer)?; + } + for segment in buffers { + self.reader_mut().seek(SeekFrom::Start(segment.offset()))?; + let mut buffer = vec![0; segment.z_size() as usize]; + self.reader_mut().read_exact(&mut buffer[..])?; + writer.write_all(&buffer)?; + } + + Ok(()) + } + + /// Opens an archive, needs to be read-only + pub(crate) fn from_reader_consume(mut reader: R) -> Result> { + // read header + let header = Header::from_reader(&mut reader)?; + + // read custom data + let mut file_names: HashMap = HashMap::default(); + if let Ok(custom_data_length) = reader.read_u32::() { + if custom_data_length > 0 { + reader.seek(io::SeekFrom::Start(Header::HEADER_EXTENDED_SIZE))?; + if let Ok(footer) = LxrsFooter::from_reader(&mut reader) { + // add files to hashmap + for f in footer.files() { + let hash = fnv1a64_hash_string(f); + file_names.insert(hash, f.to_owned()); + } + } + } + } + + // read index + // move to offset Header.IndexPosition + reader.seek(io::SeekFrom::Start(header.index_position()))?; + let index = Index::from_reader(&mut reader)?; + + // read tables + let mut file_entries: HashMap = HashMap::default(); + for _i in 0..index.file_entry_count() { + let entry = FileEntry::from_reader(&mut reader)?; + file_entries.insert(entry.name_hash_64(), entry); + } + + let mut file_segments = Vec::default(); + for _i in 0..index.file_segment_count() { + file_segments.push(FileSegment::from_reader(&mut reader)?); + } + + // dependencies can't be connected to individual files anymore + let mut dependencies = Vec::default(); + for _i in 0..index.resource_dependency_count() { + dependencies.push(Dependency::from_reader(&mut reader)?); + } + + // construct wrapper + let mut entries = HashMap::default(); + for (hash, entry) in file_entries.iter() { + let resolved = if let Some(name) = file_names.get(hash) { + Some(name.to_owned()) + } else { + None + }; + + let start_index = entry.segments_start(); + let next_index = entry.segments_end(); + if let Some(segment) = file_segments.get(start_index as usize) { + let mut buffers: Vec = vec![]; + for i in start_index + 1..next_index { + if let Some(buffer) = file_segments.get(i as usize) { + buffers.push(*buffer); + } + } + + let zip_entry = ZipEntry { + hash: *hash, + name: resolved, + entry: *entry, + segment: *segment, + buffers, + }; + entries.insert(*hash, zip_entry); + } + } + + let archive = ZipArchiveReadonly:: { + stream: reader, + entries, + dependencies, + }; + Ok(archive) + } +} diff --git a/src/archive/mod.rs b/src/archive/mod.rs index 0e4d6c0..c53df11 100644 --- a/src/archive/mod.rs +++ b/src/archive/mod.rs @@ -3,10 +3,9 @@ ///////////////////////////////////////////////////////////////////////////////////////// use std::{ - borrow::BorrowMut, collections::HashMap, - fs::{create_dir_all, File}, - io::{self, BufWriter, Cursor, Error, ErrorKind, Read, Result, Seek, SeekFrom, Write}, + fs::File, + io::{BufWriter, Cursor, Error, ErrorKind, Read, Result, Seek, SeekFrom, Write}, path::{Path, PathBuf}, }; @@ -14,11 +13,17 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use strum::IntoEnumIterator; use walkdir::WalkDir; +use crate::fnv1a64_hash_string; use crate::kraken::*; use crate::{cr2w::*, *}; -use crate::{fnv1a64_hash_string, io::FromReader}; -use self::{dependency::*, file_entry::*, file_segment::*, header::*, index::*, lxrs::*}; +use self::{ + archiveinmemory::*, archivereadonly::*, dependency::*, file_entry::*, file_segment::*, + header::*, index::*, lxrs::*, +}; + +mod archiveinmemory; +mod archivereadonly; mod dependency; mod file_entry; @@ -107,7 +112,7 @@ where P: AsRef, R: Read + Seek + 'static, { - let mut archive = ZipArchive::from_reader_consume(source, ArchiveMode::Read)?; + let mut archive = ZipArchiveReadonly::from_reader_consume(source)?; archive.extract_to_directory(destination_directory_name, overwrite_files, hash_map) } @@ -139,7 +144,7 @@ where /// # Errors /// /// This function will return an error if any io fails. -pub fn open

(archive_file_name: P, mode: ArchiveMode) -> Result> +/*pub fn open

(archive_file_name: P, mode: ArchiveMode) -> Result> where P: AsRef, { @@ -154,8 +159,7 @@ where ZipArchive::from_reader_consume(file, mode) } } -} - +}*/ // public static System.IO.Compression.ZipArchive OpenRead (string archiveFileName); /// Opens an archive for reading at the specified path. @@ -163,12 +167,12 @@ where /// # Errors /// /// This function will return an error if any io fails. -pub fn open_read

(archive_file_name: P) -> Result> +pub fn open_read

(archive_file_name: P) -> Result> where P: AsRef, { let file = File::open(archive_file_name)?; - ZipArchive::from_reader_consume(file, ArchiveMode::Read) + ZipArchiveReadonly::from_reader_consume(file) } /// Packs redengine 4 resource file in a folder to an archive @@ -506,353 +510,39 @@ fn pad_until_page(writer: &mut W) -> Result<()> { // API ///////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, Default, PartialEq)] -pub enum ArchiveMode { - #[default] - Create, - Read, - Update, -} - -#[derive(Debug)] -pub struct ZipArchive { - /// wraps a stream - stream: S, - - /// The read-write mode of the archive - mode: ArchiveMode, - dirty: bool, - /// The files inside an archive - entries: HashMap, - pub dependencies: Vec, -} - -impl ZipArchive { - /// Get an entry in the archive by resource path. - pub fn get_entry(&self, name: &str) -> Option<&ZipEntry> { - self.entries.get(&fnv1a64_hash_string(&name.to_owned())) - } - - /// Get an entry in the archive by hash (FNV1a64 of resource path). - pub fn get_entry_by_hash(&self, hash: &u64) -> Option<&ZipEntry> { - self.entries.get(hash) - } -} - -impl ZipArchive -where - R: Read + Seek, -{ - /// Extracts a single entry to a directory path. - /// - /// # Errors - /// - /// This function will return an error if the entry cannot be found or any io fails. - pub fn extract_entry>( - &mut self, - entry: ZipEntry, - destination_directory_name: &P, - overwrite_files: bool, - hash_map: &HashMap, - ) -> Result<()> { - let Some(info) = entry.get_resolved_name(&hash_map) else { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "Could not get entry info from archive.", - )); - }; - - // name or hash is a relative path - let outfile = destination_directory_name.as_ref().join(info); - create_dir_all(outfile.parent().expect("Could not create an out_dir"))?; +/* - // extract to stream - let mut fs = if overwrite_files { - File::create(outfile)? - } else { - File::options() - .read(true) - .write(true) - .create_new(true) - .open(outfile)? - }; +https://learn.microsoft.com/en-us/dotnet/api/system.io.compression.ziparchivemode?view=net-8.0 - let writer = BufWriter::new(&mut fs); - self.extract_segments(&entry, writer)?; +When you set the mode to Read, the underlying file or stream must support reading, but does not have to support seeking. If the underlying file or stream supports seeking, the files are read from the archive as they are requested. If the underlying file or stream does not support seeking, the entire archive is held in memory. - Ok(()) - } +When you set the mode to Create, the underlying file or stream must support writing, but does not have to support seeking. Each entry in the archive can be opened only once for writing. If you create a single entry, the data is written to the underlying stream or file as soon as it is available. If you create multiple entries, such as by calling the CreateFromDirectory method, the data is written to the underlying stream or file after all the entries are created. - /// Extracts a single entry by hash to a directory path. - /// - /// # Errors - /// - /// This function will return an error if the entry cannot be found or any io fails. - pub fn extract_entry_by_hash>( - &mut self, - hash: u64, - destination_directory_name: &P, - overwrite_files: bool, - hash_map: &HashMap, - ) -> Result<()> { - if let Some(entry) = self.get_entry_by_hash(&hash) { - self.extract_entry( - entry.clone(), - destination_directory_name, - overwrite_files, - hash_map, - ) - } else { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "Could not find entry.", - )); - } - } +When you set the mode to Update, the underlying file or stream must support reading, writing, and seeking. +The content of the entire archive is held in memory, and no data is written to the underlying file or stream until the archive is disposed. - /// Extracts a single entry by resource path to a directory path. - /// - /// # Errors - /// - /// This function will return an error if the entry cannot be found or any io fails. - pub fn extract_entry_by_name>( - &mut self, - name: String, - destination_directory_name: &P, - overwrite_files: bool, - hash_map: &HashMap, - ) -> Result<()> { - if let Some(entry) = self.get_entry(&name) { - self.extract_entry( - entry.clone(), - destination_directory_name, - overwrite_files, - hash_map, - ) - } else { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "Could not find entry.", - )); - } - } +*/ - /// Returns an open read stream to an entry of this [`ZipArchive`]. - pub fn open_entry(&mut self, entry: ZipEntry, writer: W) -> Result<()> { - self.extract_segments(&entry, writer)?; +// do we need this+ +// pub enum EArchive { +// ZipArchiveReadonly(ZipArchiveReadonly), +// ZipArchiveMemory(ZipArchiveMemory), +// } - Ok(()) - } +// do we need this +// pub trait IArchive { - /// Extracts all entries to the given directory. - /// - /// # Errors - /// - /// This function will return an error if io fails. - pub fn extract_to_directory>( - &mut self, - destination_directory_name: &P, - overwrite_files: bool, - hash_map: Option>, - ) -> Result<()> { - let hash_map = if let Some(hash_map) = hash_map { - hash_map - } else { - get_red4_hashes() - }; - - // collect info - let mut entries: Vec = vec![]; - for (_hash, entry) in &self.entries { - entries.push(entry.clone()); - } +// } - for entry in entries { - self.extract_entry( - entry, - destination_directory_name, - overwrite_files, - &hash_map, - )?; - } - - Ok(()) - } - - // getters - - fn reader_mut(&mut self) -> &mut R { - self.stream.borrow_mut() - } - - // methods - - /// Extracts segments to a writer, expects correct offset info. - /// - /// # Errors - /// - /// This function will return an error if io fails - fn extract_segments(&mut self, entry: &ZipEntry, mut writer: W) -> Result<()> { - let segment = entry.segment; - let buffers = entry.buffers.clone(); - - if segment.size() == segment.z_size() { - // just copy - self.reader_mut().seek(SeekFrom::Start(segment.offset()))?; - let mut buffer = vec![0; segment.z_size() as usize]; - self.reader_mut().read_exact(&mut buffer[..])?; - writer.write_all(&buffer)?; - } else { - decompress_segment(self.reader_mut(), &segment, &mut writer)?; - } - for segment in buffers { - self.reader_mut().seek(SeekFrom::Start(segment.offset()))?; - let mut buffer = vec![0; segment.z_size() as usize]; - self.reader_mut().read_exact(&mut buffer[..])?; - writer.write_all(&buffer)?; - } - - Ok(()) - } - - /// Opens an archive, needs to be read-only - fn from_reader_consume(mut reader: R, mode: ArchiveMode) -> Result> { - // checks - if mode == ArchiveMode::Create { - return Ok(ZipArchive:: { - stream: reader, - mode, - dirty: true, - entries: HashMap::default(), - dependencies: Vec::default(), - }); - } - - // read header - let header = Header::from_reader(&mut reader)?; - - // read custom data - let mut file_names: HashMap = HashMap::default(); - if let Ok(custom_data_length) = reader.read_u32::() { - if custom_data_length > 0 { - reader.seek(io::SeekFrom::Start(Header::HEADER_EXTENDED_SIZE))?; - if let Ok(footer) = LxrsFooter::from_reader(&mut reader) { - // add files to hashmap - for f in footer.files() { - let hash = fnv1a64_hash_string(f); - file_names.insert(hash, f.to_owned()); - } - } - } - } - - // read index - // move to offset Header.IndexPosition - reader.seek(io::SeekFrom::Start(header.index_position()))?; - let index = Index::from_reader(&mut reader)?; - - // read tables - let mut file_entries: HashMap = HashMap::default(); - for _i in 0..index.file_entry_count() { - let entry = FileEntry::from_reader(&mut reader)?; - file_entries.insert(entry.name_hash_64(), entry); - } - - let mut file_segments = Vec::default(); - for _i in 0..index.file_segment_count() { - file_segments.push(FileSegment::from_reader(&mut reader)?); - } - - // dependencies can't be connected to individual files anymore - let mut dependencies = Vec::default(); - for _i in 0..index.resource_dependency_count() { - dependencies.push(Dependency::from_reader(&mut reader)?); - } - - // construct wrapper - let mut entries = HashMap::default(); - for (hash, entry) in file_entries.iter() { - let resolved = if let Some(name) = file_names.get(hash) { - Some(name.to_owned()) - } else { - None - }; - - let start_index = entry.segments_start(); - let next_index = entry.segments_end(); - if let Some(segment) = file_segments.get(start_index as usize) { - let mut buffers: Vec = vec![]; - for i in start_index + 1..next_index { - if let Some(buffer) = file_segments.get(i as usize) { - buffers.push(*buffer); - } - } - - let zip_entry = ZipEntry { - hash: *hash, - name: resolved, - entry: *entry, - segment: *segment, - buffers, - }; - entries.insert(*hash, zip_entry); - } - } - - let archive = ZipArchive:: { - stream: reader, - mode, - entries, - dependencies, - dirty: false, - }; - Ok(archive) - } -} - -impl ZipArchive { - fn write(&mut self) { - todo!() - } - - /// Compresses and adds a file to the archive. - /// - /// # Errors - /// - /// This function will return an error if compression or io fails, or if the mode is Read. - pub fn create_entry>( - &mut self, - _file_path: P, - _compression_level: CompressionLevel, - ) -> Result { - // can only add entries in update mode - if self.mode != ArchiveMode::Update { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "Archive is in read-only mode.", - )); - } - - // write? - - // set dirty - self.dirty = true; - - todo!() - } - - /// Deletes an entry from the archive - pub fn delete_entry(&mut self, hash: &u64) -> Option { - // can only delete entries in update mode - if self.mode != ArchiveMode::Update { - return None; - } - - // Set dirty - self.dirty = true; - - self.entries.remove(hash) - } +#[derive(Debug, Clone, Default, PartialEq)] +pub enum ArchiveMode { + #[default] + /// Only reading archive entries is permitted. + Read, + /// Only creating new archive entries is permitted. + Create, + /// Both read and write operations are permitted for archive entries. + Update, } #[derive(Debug, Clone)] @@ -951,8 +641,8 @@ mod integration_tests { use crate::archive::open_read; - use super::FromReader; use super::LxrsFooter; + use crate::io::FromReader; #[test] fn read_srxl() {