[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes
This commit is contained in:
Yen Chi Hsuan 2016-08-20 00:12:32 +08:00
parent a9a3b4a081
commit 70852b47ca
No known key found for this signature in database
GPG Key ID: 3FDDD575826C5C30
3 changed files with 21 additions and 1 deletions

View File

@ -1,7 +1,9 @@
version <unreleased> version <unreleased>
Core Core
* Support m3u8 manifests in HTML5 multimedia tags + Recognize file size strings with full unit names (for example "8.5
megabytes")
+ Support m3u8 manifests in HTML5 multimedia tags
* Fix js_to_json(): correct octal or hexadecimal number detection * Fix js_to_json(): correct octal or hexadecimal number detection
Extractors Extractors

View File

@ -823,6 +823,7 @@ def test_parse_filesize(self):
self.assertEqual(parse_filesize('1.2tb'), 1200000000000) self.assertEqual(parse_filesize('1.2tb'), 1200000000000)
self.assertEqual(parse_filesize('1,24 KB'), 1240) self.assertEqual(parse_filesize('1,24 KB'), 1240)
self.assertEqual(parse_filesize('1,24 kb'), 1240) self.assertEqual(parse_filesize('1,24 kb'), 1240)
self.assertEqual(parse_filesize('8.5 megabytes'), 8500000)
def test_parse_count(self): def test_parse_count(self):
self.assertEqual(parse_count(None), None) self.assertEqual(parse_count(None), None)

View File

@ -1504,46 +1504,63 @@ def parse_filesize(s):
_UNIT_TABLE = { _UNIT_TABLE = {
'B': 1, 'B': 1,
'b': 1, 'b': 1,
'bytes': 1,
'KiB': 1024, 'KiB': 1024,
'KB': 1000, 'KB': 1000,
'kB': 1024, 'kB': 1024,
'Kb': 1000, 'Kb': 1000,
'kb': 1000, 'kb': 1000,
'kilobytes': 1000,
'kibibytes': 1024,
'MiB': 1024 ** 2, 'MiB': 1024 ** 2,
'MB': 1000 ** 2, 'MB': 1000 ** 2,
'mB': 1024 ** 2, 'mB': 1024 ** 2,
'Mb': 1000 ** 2, 'Mb': 1000 ** 2,
'mb': 1000 ** 2, 'mb': 1000 ** 2,
'megabytes': 1000 ** 2,
'mebibytes': 1024 ** 2,
'GiB': 1024 ** 3, 'GiB': 1024 ** 3,
'GB': 1000 ** 3, 'GB': 1000 ** 3,
'gB': 1024 ** 3, 'gB': 1024 ** 3,
'Gb': 1000 ** 3, 'Gb': 1000 ** 3,
'gb': 1000 ** 3, 'gb': 1000 ** 3,
'gigabytes': 1000 ** 3,
'gibibytes': 1024 ** 3,
'TiB': 1024 ** 4, 'TiB': 1024 ** 4,
'TB': 1000 ** 4, 'TB': 1000 ** 4,
'tB': 1024 ** 4, 'tB': 1024 ** 4,
'Tb': 1000 ** 4, 'Tb': 1000 ** 4,
'tb': 1000 ** 4, 'tb': 1000 ** 4,
'terabytes': 1000 ** 4,
'tebibytes': 1024 ** 4,
'PiB': 1024 ** 5, 'PiB': 1024 ** 5,
'PB': 1000 ** 5, 'PB': 1000 ** 5,
'pB': 1024 ** 5, 'pB': 1024 ** 5,
'Pb': 1000 ** 5, 'Pb': 1000 ** 5,
'pb': 1000 ** 5, 'pb': 1000 ** 5,
'petabytes': 1000 ** 5,
'pebibytes': 1024 ** 5,
'EiB': 1024 ** 6, 'EiB': 1024 ** 6,
'EB': 1000 ** 6, 'EB': 1000 ** 6,
'eB': 1024 ** 6, 'eB': 1024 ** 6,
'Eb': 1000 ** 6, 'Eb': 1000 ** 6,
'eb': 1000 ** 6, 'eb': 1000 ** 6,
'exabytes': 1000 ** 6,
'exbibytes': 1024 ** 6,
'ZiB': 1024 ** 7, 'ZiB': 1024 ** 7,
'ZB': 1000 ** 7, 'ZB': 1000 ** 7,
'zB': 1024 ** 7, 'zB': 1024 ** 7,
'Zb': 1000 ** 7, 'Zb': 1000 ** 7,
'zb': 1000 ** 7, 'zb': 1000 ** 7,
'zettabytes': 1000 ** 7,
'zebibytes': 1024 ** 7,
'YiB': 1024 ** 8, 'YiB': 1024 ** 8,
'YB': 1000 ** 8, 'YB': 1000 ** 8,
'yB': 1024 ** 8, 'yB': 1024 ** 8,
'Yb': 1000 ** 8, 'Yb': 1000 ** 8,
'yb': 1000 ** 8, 'yb': 1000 ** 8,
'yottabytes': 1000 ** 8,
'yobibytes': 1024 ** 8,
} }
return lookup_unit_table(_UNIT_TABLE, s) return lookup_unit_table(_UNIT_TABLE, s)