diff --git a/LICENSE.txt b/LICENSE.txt index 5bb6b92..95e85c0 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -18,4 +18,38 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. + +---------------------------------------------------------------------- + +This software contains derivative works from https://github.com/herumi/msoffice +which is licensed under the BSD 3-Clause License. + +https://github.com/herumi/msoffice/blob/c3cdb1ea0a5285a2a1718fee2dc893fd884bdad0/COPYRIGHT + +Copyright (c) 2007-2015 Cybozu Labs, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. +Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +Neither the name of the Cybozu Labs, Inc. nor the names of its contributors may +be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index 4d6e3e6..2dda965 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,10 @@ pip install msoffcrypto-tool ### As CLI tool (with password) +#### Decryption + +Specify the password with `-p` flag: + ``` msoffcrypto-tool encrypted.docx decrypted.docx -p Passw0rd ``` @@ -41,16 +45,31 @@ $ msoffcrypto-tool encrypted.docx decrypted.docx -p Password: ``` -Test if the file is encrypted or not (exit code 0 or 1 is returned): +To check if the file is encrypted or not, use `-t` flag: ``` msoffcrypto-tool document.doc --test -v ``` +It returns `1` if the file is encrypted, `0` if not. + +#### Encryption (OOXML only, experimental) + +> [!IMPORTANT] +> Encryption feature is experimental. Please use it at your own risk. + +To password-protect a document, use `-e` flag along with `-p` flag: + +``` +msoffcrypto-tool -e -p Passw0rd plain.docx encrypted.docx +``` + ### As library Password and more key types are supported with library functions. +#### Decryption + Basic usage: ```python @@ -67,7 +86,7 @@ with open("decrypted.docx", "wb") as f: encrypted.close() ``` -Basic usage (in-memory): +In-memory: ```python import msoffcrypto @@ -104,6 +123,40 @@ file.load_key(secret_key=binascii.unhexlify("AE8C36E68B4BB9EA46E5544A5FDB6693875 file.decrypt(open("decrypted.docx", "wb"), verify_integrity=True) ``` +#### Encryption (OOXML only, experimental) + +> [!IMPORTANT] +> Encryption feature is experimental. Please use it at your own risk. + +Basic usage: + +```python +from msoffcrypto.format.ooxml import OOXMLFile + +plain = open("plain.docx", "rb") +file = OOXMLFile(plain) + +with open("encrypted.docx", "wb") as f: + file.encrypt("Passw0rd", f) + +plain.close() +``` + +In-memory: + +```python +from msoffcrypto.format.ooxml import OOXMLFile +import io + +encrypted = io.BytesIO() + +with open("plain.xlsx", "rb") as f: + file = OOXMLFile(f) + file.encrypt("Passw0rd", encrypted) + +# Do stuff with encrypted buffer; it contains an OLE container with an encrypted stream +``` + ## Supported encryption methods ### MS-OFFCRYPTO specs @@ -155,7 +208,8 @@ poetry run coverage run -m pytest -v * [x] Improve error types (v4.12.0) * [ ] Redesign APIs (v6.0.0) * [ ] Introduce something like `ctypes.Structure` -* [ ] Support encryption +* [x] Support OOXML encryption +* [ ] Support other encryption * [ ] Isolate parser ## Resources diff --git a/msoffcrypto/__main__.py b/msoffcrypto/__main__.py index 931e577..0e8d63d 100644 --- a/msoffcrypto/__main__.py +++ b/msoffcrypto/__main__.py @@ -8,6 +8,7 @@ import olefile from msoffcrypto import OfficeFile, exceptions +from msoffcrypto.format.ooxml import OOXMLFile logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) @@ -53,6 +54,7 @@ def is_encrypted(file): group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-p", "--password", nargs="?", const="", dest="password", help="password text") group.add_argument("-t", "--test", dest="test_encrypted", action="store_true", help="test if the file is encrypted") +parser.add_argument("-e", dest="encrypt", action="store_true", help="encryption mode (default is false)") parser.add_argument("-v", dest="verbose", action="store_true", help="print verbose information") parser.add_argument("infile", nargs="?", type=argparse.FileType("rb"), help="input file") parser.add_argument("outfile", nargs="?", type=argparse.FileType("wb"), help="output file (if blank, stdout is used)") @@ -75,16 +77,10 @@ def main(): logger.debug("{}: encrypted".format(args.infile.name)) return - if not olefile.isOleFile(args.infile): - raise exceptions.FileFormatError("Not OLE file") - - file = OfficeFile(args.infile) - if args.password: - file.load_key(password=args.password) + password = args.password else: password = getpass.getpass() - file.load_key(password=password) if args.outfile is None: ifWIN32SetBinary(sys.stdout) @@ -93,7 +89,19 @@ def main(): else: args.outfile = sys.stdout - file.decrypt(args.outfile) + if args.encrypt: + # OOXML is the only format we support for encryption + file = OOXMLFile(args.infile) + + file.encrypt(password, args.outfile) + else: + if not olefile.isOleFile(args.infile): + raise exceptions.FileFormatError("Not OLE file") + + file = OfficeFile(args.infile) + file.load_key(password=password) + + file.decrypt(args.outfile) if __name__ == "__main__": diff --git a/msoffcrypto/exceptions/__init__.py b/msoffcrypto/exceptions/__init__.py index 28516c1..c2b7fbb 100644 --- a/msoffcrypto/exceptions/__init__.py +++ b/msoffcrypto/exceptions/__init__.py @@ -1,22 +1,28 @@ class FileFormatError(Exception): - """Raised when the format of given file is unsupported or unrecognized. - """ + """Raised when the format of given file is unsupported or unrecognized.""" + pass class ParseError(Exception): - """Raised when the file cannot be parsed correctly. - """ + """Raised when the file cannot be parsed correctly.""" + pass class DecryptionError(Exception): - """Raised when the file cannot be decrypted. - """ + """Raised when the file cannot be decrypted.""" + + pass + + +class EncryptionError(Exception): + """Raised when the file cannot be encrypted.""" + pass class InvalidKeyError(DecryptionError): - """Raised when the given password or key is incorrect or cannot be verified. - """ + """Raised when the given password or key is incorrect or cannot be verified.""" + pass diff --git a/msoffcrypto/format/ooxml.py b/msoffcrypto/format/ooxml.py index 428502c..0441326 100644 --- a/msoffcrypto/format/ooxml.py +++ b/msoffcrypto/format/ooxml.py @@ -240,6 +240,26 @@ def decrypt(self, ofile, verify_integrity=False): if not zipfile.is_zipfile(io.BytesIO(obuf)): raise exceptions.InvalidKeyError("The file could not be decrypted with this password") + def encrypt(self, password, ofile): + """ + >>> from msoffcrypto.format.ooxml import OOXMLFile + >>> from io import BytesIO; ofile = BytesIO() + >>> with open("tests/outputs/example.docx", "rb") as f: + ... officefile = OOXMLFile(f) + ... officefile.encrypt("1234", ofile) + """ + if self.is_encrypted(): + raise exceptions.EncryptionError("File is already encrypted") + + self.file.seek(0) + + buf = ECMA376Agile.encrypt(password, self.file) + + if not olefile.isOleFile(buf): + raise exceptions.EncryptionError("Unable to encrypt this file") + + ofile.write(buf) + def is_encrypted(self): """ >>> with open("tests/inputs/example_password.docx", "rb") as f: diff --git a/msoffcrypto/method/container/ecma376_encrypted.py b/msoffcrypto/method/container/ecma376_encrypted.py new file mode 100644 index 0000000..b363d75 --- /dev/null +++ b/msoffcrypto/method/container/ecma376_encrypted.py @@ -0,0 +1,607 @@ +import io +from datetime import datetime +from struct import pack + +import olefile + +# An encrypted ECMA376 file is stored as an OLE container. +# +# At this point, creating an Ole file is somewhat of a chore, since +# the latest OleFile (v0.47) does not really do it. +# +# See https://github.com/decalage2/olefile/issues/6 +# +# This file is not meant to support all manners of OLE files; it creates +# what we need (an OLE file with an encrypted stream + supporting streams). +# Nothing more, nothing less. So, unlike OleFile, we can take _a lot_ of +# shortcuts. +# +# Probably very brittle. +# +# File format: +# +# https://github.com/libyal/libolecf/blob/main/documentation/OLE%20Compound%20File%20format.asciidoc +# +# Initial C++ code from https://github.com/herumi/msoffice (BSD-3) + + +def datetime2filetime(dt): + """ + Convert Python datetime.datetime to FILETIME (64 bits unsigned int) + + A file time is a 64-bit value that represents the number of 100-nanosecond intervals that have elapsed + since 12:00 A.M. January 1, 1601 Coordinated Universal Time (UTC). + + https://learn.microsoft.com/en-us/windows/win32/sysinfo/file-times + """ + _FILETIME_NULL_DATE = datetime(1601, 1, 1, 0, 0, 0) + return int((dt - _FILETIME_NULL_DATE).total_seconds() * 10000000) + + +class RedBlack: + RED = 0 # Note that this is per-spec; olefile.py shows the opposite + BLACK = 1 + + +class DirectoryEntryType: + EMPTY = 0 + STORAGE = 1 + STREAM = 2 + LOCK_BYTES = 3 + PROPERTY = 4 + ROOT_STORAGE = 5 + + +class SectorTypes: + MAXREGSECT = 0xFFFFFFFA + DIFSECT = 0xFFFFFFFC + FATSECT = 0xFFFFFFFD + ENDOFCHAIN = 0xFFFFFFFE + FREESECT = 0xFFFFFFFF + NOSTREAM = 0xFFFFFFFF + + +class DSPos: + # Order in the directories array; must be in sync with getDirectoryEntries() + + iRoot = 0 + iEncryptionPackage = 1 + iDataSpaces = 2 + iVersion = 3 + iDataSpaceMap = 4 + iDataSpaceInfo = 5 + iStongEncryptionDataSpace = 6 + iTransformInfo = 7 + iStrongEncryptionTransform = 8 + iPrimary = 9 + iEncryptionInfo = 10 + dirNum = 11 + + +class DefaultContent: + # Lifted off of Herumi/msoffice (C++ package) + # https://github.com/herumi/msoffice/blob/master/include/resource.hpp + + Version = b"\x3c\x00\x00\x00\x4d\x00\x69\x00\x63\x00\x72\x00\x6f\x00\x73\x00\x6f\x00\x66\x00\x74\x00\x2e\x00\x43\x00\x6f\x00\x6e\x00\x74\x00\x61\x00\x69\x00\x6e\x00\x65\x00\x72\x00\x2e\x00\x44\x00\x61\x00\x74\x00\x61\x00\x53\x00\x70\x00\x61\x00\x63\x00\x65\x00\x73\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00" + Primary = b"\x58\x00\x00\x00\x01\x00\x00\x00\x4c\x00\x00\x00\x7b\x00\x46\x00\x46\x00\x39\x00\x41\x00\x33\x00\x46\x00\x30\x00\x33\x00\x2d\x00\x35\x00\x36\x00\x45\x00\x46\x00\x2d\x00\x34\x00\x36\x00\x31\x00\x33\x00\x2d\x00\x42\x00\x44\x00\x44\x00\x35\x00\x2d\x00\x35\x00\x41\x00\x34\x00\x31\x00\x43\x00\x31\x00\x44\x00\x30\x00\x37\x00\x32\x00\x34\x00\x36\x00\x7d\x00\x4e\x00\x00\x00\x4d\x00\x69\x00\x63\x00\x72\x00\x6f\x00\x73\x00\x6f\x00\x66\x00\x74\x00\x2e\x00\x43\x00\x6f\x00\x6e\x00\x74\x00\x61\x00\x69\x00\x6e\x00\x65\x00\x72\x00\x2e\x00\x45\x00\x6e\x00\x63\x00\x72\x00\x79\x00\x70\x00\x74\x00\x69\x00\x6f\x00\x6e\x00\x54\x00\x72\x00\x61\x00\x6e\x00\x73\x00\x66\x00\x6f\x00\x72\x00\x6d\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00" + DataSpaceMap = b"\x08\x00\x00\x00\x01\x00\x00\x00\x68\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x45\x00\x6e\x00\x63\x00\x72\x00\x79\x00\x70\x00\x74\x00\x65\x00\x64\x00\x50\x00\x61\x00\x63\x00\x6b\x00\x61\x00\x67\x00\x65\x00\x32\x00\x00\x00\x53\x00\x74\x00\x72\x00\x6f\x00\x6e\x00\x67\x00\x45\x00\x6e\x00\x63\x00\x72\x00\x79\x00\x70\x00\x74\x00\x69\x00\x6f\x00\x6e\x00\x44\x00\x61\x00\x74\x00\x61\x00\x53\x00\x70\x00\x61\x00\x63\x00\x65\x00\x00\x00" + StrongEncryptionDataSpace = b"\x08\x00\x00\x00\x01\x00\x00\x00\x32\x00\x00\x00\x53\x00\x74\x00\x72\x00\x6f\x00\x6e\x00\x67\x00\x45\x00\x6e\x00\x63\x00\x72\x00\x79\x00\x70\x00\x74\x00\x69\x00\x6f\x00\x6e\x00\x54\x00\x72\x00\x61\x00\x6e\x00\x73\x00\x66\x00\x6f\x00\x72\x00\x6d\x00\x00\x00" + + +class Header: + FIRSTNUMDIFAT = 109 + BUFFER_SIZE = 512 # Size taken when writing out to disk/buffer + + def __init__(self): + self.minorVersion = 0x003E + self.majorVersion = 3 + self.sectorShift = 9 + self.numDirectorySectors = 0 + self.numFatSectors = 0 + self.firstDirectorySectorLocation = SectorTypes.ENDOFCHAIN + self.transactionSignatureNumber = 0 + self.firstMiniFatSectorLocation = SectorTypes.ENDOFCHAIN + self.numMiniFatSectors = 0 + self.firstDifatSectorLocation = SectorTypes.ENDOFCHAIN + self.numDifatSectors = 0 + self.sectorSize = 1 << self.sectorShift + self.difat = [] + + def write_to(self, obuf): + obuf.write(olefile.MAGIC) + obuf.write(b"\0" * 16) # CLSID + + byteOrder = 0xFFFE # Little-Endian + miniSectorShift = 6 + miniStreamCutoffSize = 0x1000 + reserved = 0 + + obuf.write( + pack( + " 2 else 0)) + obuf.write(pack("> 32)) + + @property + def Name(self): + return self._Name + + @Name.setter + def Name(self, n): + if len(n) > 31: + raise ValueError("Name cannot be longer than 31 characters") + + if set("!:/").intersection(n): + raise ValueError("Name contains invalid characters (!:/)") + + self._Name = n + + @property + def CLSID(self): + return self._CLSID + + @CLSID.setter + def CLSID(self, c): + if c and len(c) != 16: + raise ValueError("CLSID must be blank, or 16 characters long") + + self._CLSID = c + + @property + def LeftSiblingId(self): + return self._LeftSiblingId + + @LeftSiblingId.setter + def LeftSiblingId(self, id): + self._valid_id(id) + self._LeftSiblingId = id + + @property + def RightSiblingId(self): + return self._RightSiblingId + + @RightSiblingId.setter + def RightSiblingId(self, id): + self._valid_id(id) + self._RightSiblingId = id + + @property + def ChildId(self): + return self._ChildId + + @ChildId.setter + def ChildId(self, id): + self._valid_id(id) + self._ChildId = id + + def _valid_id(self, id): + if not ((id <= SectorTypes.MAXREGSECT) or (id == SectorTypes.NOSTREAM)): + raise ValueError("Invalid id received") + + +class ECMA376EncryptedLayout: + def __init__(self, sectorSize): + self.sectorSize = sectorSize + self.miniFatNum = 0 + self.miniFatDataSectorNum = 0 + self.miniFatSectors = 0 + self.numMiniFatSectors = 1 + self.difatSectorNum = 0 + self.fatSectorNum = 0 + self.difatPos = 0 + self.directoryEntrySectorNum = 0 + self.encryptionPackageSectorNum = 0 + + @property + def fatPos(self): + return self.difatPos + self.difatSectorNum + + @property + def miniFatPos(self): + return self.fatPos + self.fatSectorNum + + @property + def directoryEntryPos(self): + return self.miniFatPos + self.numMiniFatSectors + + @property + def miniFatDataPos(self): + return self.directoryEntryPos + self.directoryEntrySectorNum + + @property + def contentSectorNum(self): + return self.numMiniFatSectors + self.directoryEntrySectorNum + self.miniFatDataSectorNum + self.encryptionPackageSectorNum + + @property + def miniFatDataPos(self): + return self.directoryEntryPos + self.directoryEntrySectorNum + + @property + def encryptionPackagePos(self): + return self.miniFatDataPos + self.miniFatDataSectorNum + + @property + def totalSectors(self): + return self.difatSectorNum + self.fatSectorNum + self.contentSectorNum + + @property + def totalSize(self): + return Header.BUFFER_SIZE + self.totalSectors * self.sectorSize + + @property + def offsetDirectoryEntries(self): + return Header.BUFFER_SIZE + self.directoryEntryPos * self.sectorSize + + @property + def offsetMiniFatData(self): + return Header.BUFFER_SIZE + self.miniFatDataPos * self.sectorSize + + @property + def offsetFat(self): + return Header.BUFFER_SIZE + self.fatPos * self.sectorSize + + @property + def offsetMiniFat(self): + return Header.BUFFER_SIZE + self.miniFatPos * self.sectorSize + + def offsetDifat(self, n): + return Header.BUFFER_SIZE + (self.difatPos + n) * self.sectorSize + + def offsetData(self, startingSectorLocation): + return Header.BUFFER_SIZE + startingSectorLocation * self.sectorSize + + def offsetMiniData(self, startingSectorLocation): + return self.offsetMiniFatData + startingSectorLocation * 64 + + +class ECMA376Encrypted: + def __init__(self, encryptedPackage=b"", encryptionInfo=b""): + self._header = self._get_default_header() + self._dirs = self._get_directory_entries() + + self.set_payload(encryptedPackage, encryptionInfo) + + def write_to(self, obuf): + """ + Writes the encrypted data to obuf + """ + + # Create a temporary buffer with seek/tell capabilities, we do not want to assume the passed-in buffer has such + # capabilities (ie: piping to stdout). + _obuf = io.BytesIO() + + self._write_to(_obuf) + + # Finalize and write to client buffer. + obuf.write(_obuf.getvalue()) + + def set_payload(self, encryptedPackage, encryptionInfo): + self._dirs[DSPos.iEncryptionPackage].Content = encryptedPackage + self._dirs[DSPos.iEncryptionInfo].Content = encryptionInfo + + def _get_default_header(self): + return Header() + + def _get_directory_entries(self): + ft = datetime2filetime(datetime.now()) + + directories = [ # Must follow DSPos ordering + DirectoryEntry("Root Entry", DirectoryEntryType.ROOT_STORAGE, RedBlack.RED, ct=ft, mt=ft, childId=DSPos.iEncryptionInfo), + DirectoryEntry("EncryptedPackage", DirectoryEntryType.STREAM, RedBlack.RED, ct=ft, mt=ft), + DirectoryEntry("\x06DataSpaces", DirectoryEntryType.STORAGE, RedBlack.RED, ct=ft, mt=ft, childId=DSPos.iDataSpaceMap), + DirectoryEntry("Version", DirectoryEntryType.STREAM, RedBlack.BLACK, ct=ft, mt=ft, content=DefaultContent.Version), + DirectoryEntry( + "DataSpaceMap", + DirectoryEntryType.STREAM, + RedBlack.BLACK, + ct=ft, + mt=ft, + leftId=DSPos.iVersion, + rightId=DSPos.iDataSpaceInfo, + content=DefaultContent.DataSpaceMap, + ), + DirectoryEntry( + "DataSpaceInfo", + DirectoryEntryType.STORAGE, + RedBlack.BLACK, + ct=ft, + mt=ft, + rightId=DSPos.iTransformInfo, + childId=DSPos.iStongEncryptionDataSpace, + ), + DirectoryEntry( + "StrongEncryptionDataSpace", + DirectoryEntryType.STREAM, + RedBlack.BLACK, + ct=ft, + mt=ft, + content=DefaultContent.StrongEncryptionDataSpace, + ), + DirectoryEntry( + "TransformInfo", DirectoryEntryType.STORAGE, RedBlack.RED, ct=ft, mt=ft, childId=DSPos.iStrongEncryptionTransform + ), + DirectoryEntry("StrongEncryptionTransform", DirectoryEntryType.STORAGE, RedBlack.BLACK, ct=ft, mt=ft, childId=DSPos.iPrimary), + DirectoryEntry("\x06Primary", DirectoryEntryType.STREAM, RedBlack.BLACK, ct=ft, mt=ft, content=DefaultContent.Primary), + DirectoryEntry( + "EncryptionInfo", + DirectoryEntryType.STREAM, + RedBlack.BLACK, + ct=ft, + mt=ft, + leftId=DSPos.iDataSpaces, + rightId=DSPos.iEncryptionPackage, + ), + ] + + return directories + + def _write_to(self, obuf): + layout = ECMA376EncryptedLayout(self._header.sectorSize) + + self._set_sector_locations_of_streams(layout) + self._detect_sector_num(layout) + + self._header.firstDirectorySectorLocation = layout.directoryEntryPos + self._header.firstMiniFatSectorLocation = layout.miniFatPos + self._header.numMiniFatSectors = layout.numMiniFatSectors + + self._dirs[DSPos.iRoot].StartingSectorLocation = layout.miniFatDataPos + self._dirs[DSPos.iRoot].Content = b"\0" * (64 * layout.miniFatNum) + self._dirs[DSPos.iEncryptionPackage].StartingSectorLocation = layout.encryptionPackagePos + + for i in range(min(layout.fatSectorNum, Header.FIRSTNUMDIFAT)): + self._header.difat.append(layout.fatPos + i) + + self._header.numFatSectors = layout.fatSectorNum + self._header.numDifatSectors = layout.difatSectorNum + + if layout.difatSectorNum > 0: + self._header.firstDifatSectorLocation = layout.difatPos + + # Zero out the output buffer; some sections pad, some sections don't ... but we need the buffer to have the proper size + # so we can jump around + obuf.write(b"\0" * layout.totalSize) + obuf.seek(0) + + self._header.write_to(obuf) + + self._write_DIFAT(obuf, layout) + self._write_FAT_start(obuf, layout) + self._write_MiniFAT(obuf, layout) + + self._write_directory_entries(obuf, layout) + self._write_Content(obuf, layout) + + def _write_directory_entries(self, obuf, layout: ECMA376EncryptedLayout): + obuf.seek(layout.offsetDirectoryEntries) + + for d in self._dirs: + d.write_header_to(obuf) # This must write 128 bytes, no more, no less. + + if obuf.tell() != (layout.offsetDirectoryEntries + len(self._dirs) * 128): + # TODO: Use appropriate custom exception + raise Exception("Buffer did not advance as expected when writing out directory entries") + + def _write_Content(self, obuf, layout: ECMA376EncryptedLayout): + for d in self._dirs: + size = len(d.Content) + + if size: + if size <= 4096: # Small content goes in the minifat section + obuf.seek(layout.offsetMiniData(d.StartingSectorLocation)) + obuf.write(d.Content) + else: + obuf.seek(layout.offsetData(d.StartingSectorLocation)) + obuf.write(d.Content) + + def _write_FAT_start(self, obuf, layout: ECMA376EncryptedLayout): + v = ([SectorTypes.DIFSECT] * layout.difatSectorNum) + ([SectorTypes.FATSECT] * layout.fatSectorNum) + v += [layout.numMiniFatSectors, layout.directoryEntrySectorNum, layout.miniFatDataSectorNum, layout.encryptionPackageSectorNum] + + obuf.seek(layout.offsetFat) + self._write_FAT(obuf, v, layout.fatSectorNum * layout.sectorSize) + + def _write_MiniFAT(self, obuf, layout: ECMA376EncryptedLayout): + obuf.seek(layout.offsetMiniFat) + self._write_FAT(obuf, layout.miniFatSectors, layout.numMiniFatSectors * layout.sectorSize) + + def _write_FAT(self, obuf, entries, blockSize): + v = 0 + + startPos = obuf.tell() + max_n = blockSize // 4 # 4 bytes per entry with max_n: + raise Exception("Attempting to write beyond block size") + + obuf.write(pack(" layout.difatSectorNum + layout.fatSectorNum: + for k in range(j, layout.sectorSize // 4 - 1): + obuf.write(pack(" 1: + raise ValueError("Unexpected layout size; too large") + + layout.miniFatNum = miniFatNum + layout.miniFatDataSectorNum = miniFatDataSectorNum + layout.miniFatSectors = miniFatSectors + + layout.directoryEntrySectorNum = self._get_block_num(len(self._dirs), 4) + layout.encryptionPackageSectorNum = self._get_block_num(len(self._dirs[DSPos.iEncryptionPackage].Content), layout.sectorSize) + + def _get_MiniFAT_sector_number(self, size): + return self._get_block_num(size, 64) + + def _get_block_num(self, x, block): + return (x + block - 1) // block diff --git a/msoffcrypto/method/ecma376_agile.py b/msoffcrypto/method/ecma376_agile.py index f7c2643..dcc5a3c 100644 --- a/msoffcrypto/method/ecma376_agile.py +++ b/msoffcrypto/method/ecma376_agile.py @@ -1,7 +1,9 @@ +import base64 import functools import hmac import io import logging +import secrets from hashlib import sha1, sha256, sha384, sha512 from struct import pack, unpack @@ -10,6 +12,9 @@ from cryptography.hazmat.primitives.asymmetric import padding from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +from msoffcrypto import exceptions +from msoffcrypto.method.container.ecma376_encrypted import ECMA376Encrypted + logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) @@ -20,6 +25,35 @@ "SHA512": sha512, } +blkKey_VerifierHashInput = bytearray([0xFE, 0xA7, 0xD2, 0x76, 0x3B, 0x4B, 0x9E, 0x79]) +blkKey_encryptedVerifierHashValue = bytearray([0xD7, 0xAA, 0x0F, 0x6D, 0x30, 0x61, 0x34, 0x4E]) +blkKey_encryptedKeyValue = bytearray([0x14, 0x6E, 0x0B, 0xE7, 0xAB, 0xAC, 0xD0, 0xD6]) +blkKey_dataIntegrity1 = bytearray([0x5F, 0xB2, 0xAD, 0x01, 0x0C, 0xB9, 0xE1, 0xF6]) +blkKey_dataIntegrity2 = bytearray([0xA0, 0x67, 0x7F, 0x02, 0xB2, 0x2C, 0x84, 0x33]) + + +def _random_buffer(sz): + return secrets.token_bytes(sz) + + +def _get_num_blocks(sz, block): + return (sz + block - 1) // block + + +def _round_up(sz, block): + return _get_num_blocks(sz, block) * block + + +def _resize_buffer(buf, n, c=b"\0"): + if len(buf) >= n: + return buf[:n] + + return buf + c * (n - len(buf)) + + +def _normalize_key(key, n): + return _resize_buffer(key, n, b"\x36") + def _get_hash_func(algorithm): return ALGORITHM_HASH.get(algorithm, sha1) @@ -32,6 +66,96 @@ def _decrypt_aes_cbc(data, key, iv): return decrypted +def _encrypt_aes_cbc(data, key, iv): + aes = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend()) + + encryptor = aes.encryptor() + encrypted = encryptor.update(data) + encryptor.finalize() + + return encrypted + + +def _encrypt_aes_cbc_padded(data, key, iv, blockSize): + buf = data + + if len(buf) % blockSize: + buf = _resize_buffer(buf, _round_up(len(buf), blockSize)) + + return _encrypt_aes_cbc(buf, key, iv) + + +def _get_salt(salt_value=None, salt_size=16): + if not salt_value is None: + if len(salt_value) != salt_size: + raise exceptions.EncryptionError(f"Invalid salt value size, should be {salt_size}") + + return salt_value + + return _random_buffer(salt_size) + + +# Hardcoded to AES256 + SHA512 for OOXML. +class ECMA376AgileCipherParams: + def __init__(self): + self.cipherName = "AES" + self.hashName = "SHA512" + self.saltSize = 16 + self.blockSize = 16 + self.keyBits = 256 + self.hashSize = 64 + self.saltValue = None + + +def _enc64(b): + return base64.b64encode(b).decode("UTF-8") + + +class ECMA376AgileEncryptionInfo: + def __init__(self): + self.spinCount = 100000 + self.keyData = ECMA376AgileCipherParams() + self.encryptedHmacKey = None + self.encryptedHmacValue = None + + self.encryptedKey = ECMA376AgileCipherParams() + self.encryptedVerifierHashInput = None + self.encryptedVerifierHashValue = None + self.encryptedKeyValue = None + + def getEncryptionDescriptorHeader(self): + # https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/87020a34-e73f-4139-99bc-bbdf6cf6fa55 + return pack(" + + + + + + + + + +""" + + +def _generate_iv(params: ECMA376AgileCipherParams, blkKey, salt_value): + if not blkKey: + return _normalize_key(salt_value, params.blockSize) + + hashCalc = _get_hash_func(params.hashName) + + return _normalize_key(hashCalc(salt_value + blkKey).digest(), params.blockSize) + + class ECMA376Agile: def __init__(self): pass @@ -108,6 +232,143 @@ def decrypt(key, keyDataSalt, hashAlgorithm, ibuf): break return obuf.getvalue() # return obuf.getbuffer() + @staticmethod + def encrypt(key, ibuf, salt_value=None, spin_count=100000): + """ + Return an OLE compound file buffer (complete with headers) which contains ibuf encrypted into a single stream. + + When salt_value is not specified (the default), we generate a random one. + """ + + # Encryption ported from C++ (https://github.com/herumi/msoffice, BSD-3) + + info, secret_key = ECMA376Agile.generate_encryption_parameters(key, salt_value, spin_count) + encrypted_data = ECMA376Agile.encrypt_payload(ibuf, info.encryptedKey, secret_key, info.keyData.saltValue) + encryption_info = ECMA376Agile.get_encryption_information(info, encrypted_data, secret_key) + + obuf = io.BytesIO() + ECMA376Encrypted(encrypted_data, encryption_info).write_to(obuf) + + return obuf.getvalue() + + @staticmethod + def get_encryption_information(info: ECMA376AgileEncryptionInfo, encrypted_data, secretKey): + """ + Return the content of an EncryptionInfo Stream, including the short header, per the specifications at + + https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/87020a34-e73f-4139-99bc-bbdf6cf6fa55 + """ + hmacKey, hmacValue = ECMA376Agile.generate_integrity_parameter(encrypted_data, info.keyData, secretKey, info.keyData.saltValue) + + info.encryptedHmacKey = hmacKey + info.encryptedHmacValue = hmacValue + + xml_descriptor = info.toEncryptionDescriptor().encode("UTF-8") + header_descriptor = info.getEncryptionDescriptorHeader() + + return header_descriptor + xml_descriptor + + @staticmethod + def generate_encryption_parameters(key, salt_value=None, spin_count=100000): + """ + Generates encryption parameters used to encrypt a payload. + + Returns the information + a secret key. + """ + info = ECMA376AgileEncryptionInfo() + info.spinCount = spin_count + + info.encryptedKey.saltValue = _get_salt(salt_value, info.encryptedKey.saltSize) + + h = ECMA376Agile._derive_iterated_hash_from_password( + key, info.encryptedKey.saltValue, info.encryptedKey.hashName, info.spinCount + ).digest() + + key1 = ECMA376Agile._derive_encryption_key(h, blkKey_VerifierHashInput, info.encryptedKey.hashName, info.encryptedKey.keyBits) + key2 = ECMA376Agile._derive_encryption_key( + h, blkKey_encryptedVerifierHashValue, info.encryptedKey.hashName, info.encryptedKey.keyBits + ) + key3 = ECMA376Agile._derive_encryption_key(h, blkKey_encryptedKeyValue, info.encryptedKey.hashName, info.encryptedKey.keyBits) + + verifierHashInput = _random_buffer(info.encryptedKey.saltSize) + verifierHashInput = _resize_buffer(verifierHashInput, _round_up(len(verifierHashInput), info.encryptedKey.blockSize)) + + info.encryptedVerifierHashInput = _encrypt_aes_cbc(verifierHashInput, key1, info.encryptedKey.saltValue) + + hashedVerifier = _get_hash_func(info.encryptedKey.hashName)(verifierHashInput).digest() + hashedVerifier = _resize_buffer(hashedVerifier, _round_up(len(hashedVerifier), info.encryptedKey.blockSize)) + + info.encryptedVerifierHashValue = _encrypt_aes_cbc(hashedVerifier, key2, info.encryptedKey.saltValue) + + secret_key = _random_buffer(info.encryptedKey.saltSize) + secret_key = _normalize_key(secret_key, info.encryptedKey.keyBits // 8) + + info.encryptedKeyValue = _encrypt_aes_cbc(secret_key, key3, info.encryptedKey.saltValue) + + info.keyData.saltValue = _get_salt(salt_size=info.keyData.saltSize) + + return info, secret_key + + @staticmethod + def encrypt_payload(ibuf, params: ECMA376AgileCipherParams, secret_key, salt_value): + """ + Encrypts a payload using the params and secrets passed in. + + Returns the encrypted data as a byte array. + """ + # Specifications calls for storing the original (unpadded) size as a 64 bit little-endian + # number at the start of the buffer. We'll loop while there's data, and come back at the + # end to update the total size, instead of seeking to the end of ibuf to get the size, + # just in case ibuf is a streaming buffer... + total_size = 0 + obuf = io.BytesIO() + obuf.write(pack(">> ECMA376Agile.makekey_from_password(password, saltValue, hashAlgorithm, encryptedKeyValue, spinValue, keyBits) == expected True """ - block3 = bytearray([0x14, 0x6E, 0x0B, 0xE7, 0xAB, 0xAC, 0xD0, 0xD6]) h = ECMA376Agile._derive_iterated_hash_from_password(password, saltValue, hashAlgorithm, spinValue) - encryption_key = ECMA376Agile._derive_encryption_key(h.digest(), block3, hashAlgorithm, keyBits) + encryption_key = ECMA376Agile._derive_encryption_key(h.digest(), blkKey_encryptedKeyValue, hashAlgorithm, keyBits) skey = _decrypt_aes_cbc(encryptedKeyValue, encryption_key, saltValue) diff --git a/tests/test_cli.sh b/tests/test_cli.sh index 0abc29e..0881407 100755 --- a/tests/test_cli.sh +++ b/tests/test_cli.sh @@ -2,7 +2,13 @@ set -ev -cd tests +cd "$(dirname "$0")" + +msoffcrypto-tool () { + python ../msoffcrypto "$@" +} + +# Decryption msoffcrypto-tool --test inputs/example_password.docx && : ; [ $? = 0 ] msoffcrypto-tool --test outputs/example.docx && : ; [ $? = 1 ] @@ -33,3 +39,15 @@ msoffcrypto-tool --test inputs/rc4cryptoapi_password.ppt && : ; [ $? = 0 ] msoffcrypto-tool --test outputs/rc4cryptoapi_password_plain.ppt && : ; [ $? = 1 ] msoffcrypto-tool -p Password1234_ inputs/rc4cryptoapi_password.ppt /tmp/rc4cryptoapi_password_plain.ppt diff /tmp/rc4cryptoapi_password_plain.ppt outputs/rc4cryptoapi_password_plain.ppt + +# Encryption + +msoffcrypto-tool -e -p Password1234_ outputs/example.docx /tmp/example_password.docx +msoffcrypto-tool --test /tmp/example_password.docx && : ; [ $? = 0 ] +msoffcrypto-tool -p Password1234_ /tmp/example_password.docx /tmp/example.docx +diff /tmp/example.docx outputs/example.docx + +msoffcrypto-tool -e -p Password1234_ outputs/example.xlsx /tmp/example_password.xlsx +msoffcrypto-tool --test /tmp/example_password.xlsx && : ; [ $? = 0 ] +msoffcrypto-tool -p Password1234_ /tmp/example_password.xlsx /tmp/example.xlsx +diff /tmp/example.xlsx outputs/example.xlsx