-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Try to extract creation time of generic media from internal metadata
The generic media provider pulls images from the filesystem, but file creation times are unreliable when working from copies of the source, so try to examine built-in metadata. Start with creation time as it's very important for RIME but plan to extend to other metadata such as location.
- Loading branch information
Nicholas FitzRoy-Dale
committed
Aug 12, 2024
1 parent
992d8d9
commit 4edd9fe
Showing
6 changed files
with
202 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
""" | ||
Attempt to read metadata from a file | ||
""" | ||
import dataclasses | ||
import datetime | ||
import json | ||
import struct | ||
import shutil | ||
import subprocess | ||
|
||
BAIL_AFTER_BYTES = 1024 * 1024 * 10 | ||
|
||
|
||
@dataclasses.dataclass | ||
class Metadata: | ||
ctime: datetime.datetime | None = None | ||
|
||
|
||
def from_mp4(handle): | ||
""" | ||
Attempt to read metadata from an MP4 file. | ||
""" | ||
def read_box(handle): | ||
box_size = struct.unpack('>I', handle.read(4))[0] | ||
box_type = handle.read(4).decode('ascii') | ||
return box_size, box_type | ||
|
||
handle.seek(0, 0) | ||
size, typ = read_box(handle) | ||
if typ != 'ftyp': | ||
raise ValueError(f'Expected ftyp, got {typ}') | ||
|
||
# Find the moov box | ||
while typ != 'moov' and size > 0: | ||
handle.seek(size - 8, 1) | ||
size, typ = read_box(handle) | ||
if handle.tell() >= BAIL_AFTER_BYTES: | ||
raise ValueError('Bailed out of searching for moov box') | ||
|
||
size, typ = read_box(handle) | ||
if typ != 'mvhd': | ||
raise ValueError(f'Expected mvhd, got {typ}') | ||
|
||
version_and_flags = struct.unpack('>I', handle.read(4))[0] | ||
version = version_and_flags >> 24 | ||
|
||
if version != 0: | ||
raise ValueError(f'Expected version 0, got {version}') | ||
|
||
ctime_secs = struct.unpack('>I', handle.read(4))[0] | ||
|
||
if ctime_secs == 0: | ||
raise ValueError('No creation time found') | ||
|
||
# Convert from seconds since 1904-01-01 to seconds since 1970-01-01 | ||
ctime = datetime.datetime(1904, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc) | ||
ctime += datetime.timedelta(seconds=ctime_secs) | ||
|
||
return Metadata(ctime=ctime) | ||
|
||
|
||
def from_ffprobe(ffprobe, handle): | ||
""" | ||
Attempt to read metadata from a file using ffprobe. | ||
""" | ||
handle.seek(0) | ||
p = subprocess.run([ffprobe, '-v', 'quiet', '-print_format', 'json', '-show_format', '-'], input=handle.read(), | ||
stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
|
||
if p.returncode != 0: | ||
raise ValueError(f'ffprobe failed: {p.stderr.decode()}') | ||
|
||
data = json.loads(p.stdout) | ||
ctime = data['format']['tags'].get('creation_time') | ||
|
||
if ctime is not None: | ||
ctime = datetime.datetime.fromisoformat(ctime) | ||
return Metadata(ctime=ctime) | ||
|
||
raise ValueError('No creation time found') | ||
|
||
|
||
def try_extract_video_metadata(mime_type, handle): | ||
# Try to use the built-in decoders and fall back to ffprobe if it exists. | ||
ffprobe = shutil.which('ffprobe') | ||
|
||
# Try to extract using native decoder if possible. | ||
try: | ||
if mime_type == 'video/mp4': | ||
return from_mp4(handle) | ||
except Exception: | ||
pass | ||
|
||
if ffprobe is not None: | ||
try: | ||
return from_ffprobe(ffprobe, handle) | ||
except Exception: | ||
pass | ||
|
||
return None | ||
|
||
|
||
def try_extract_metadata(mime_type, handle): | ||
try: | ||
if mime_type.startswith('video/'): | ||
return try_extract_video_metadata(mime_type, handle) | ||
else: | ||
pass | ||
except ValueError: | ||
pass | ||
|
||
return None | ||
|
||
|
||
def can_try_extract_metadata(mime_type): | ||
return mime_type.startswith('video/') or mime_type.startswith('image/') or mime_type.startswith('audio/') | ||
|
||
|
||
if __name__ == '__main__': | ||
import sys | ||
with open(sys.argv[1], 'rb') as handle: | ||
metadata = try_extract_metadata('video/mp4', handle) | ||
|
||
print(metadata) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters