Since the piratebay has the file size in human readable form it is hard to sort by file size. Now we have a de-human size function in our utils that can be used to convert these sizes to byte form.

This commit is contained in:
2017-11-19 00:07:40 +01:00
parent 8ec95cf216
commit cc672efed2
2 changed files with 45 additions and 3 deletions

View File

@@ -3,11 +3,20 @@
# @Author: KevinMidboe
# @Date: 2017-11-01 15:57:23
# @Last Modified by: KevinMidboe
# @Last Modified time: 2017-11-02 16:20:29
# @Last Modified time: 2017-11-19 00:05:10
import re
from datetime import datetime
SYMBOLS = {
'customary' : ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa',
'zetta', 'iotta'),
'iec' : ('BiB', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'),
'iec_ext' : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi',
'zebi', 'yobi'),
}
def sanitize(string, ignore_characters=None, replace_characters=None):
"""Sanitize a string to strip special characters.
@@ -49,6 +58,38 @@ def pagesToCount(multiple, total):
return total
return multiple
def deHumansize(s):
"""
Attempts to guess the string format based on default symbols
set and return the corresponding bytes as an integer.
When unable to recognize the format ValueError is raised.
:param str s: human file size that we want to convert
:return: the guessed bytes in from the human file size
:rtype: int
"""
init = s
num = ""
while s and s[0:1].isdigit() or s[0:1] == '.':
num += s[0]
s = s[1:]
num = float(num)
letter = s.strip()
for name, sset in SYMBOLS.items():
if letter in sset:
break
else:
if letter == 'k':
# treat 'k' as an alias for 'K' as per: http://goo.gl/kTQMs
sset = SYMBOLS['customary']
letter = letter.upper()
else:
raise ValueError("can't interpret %r" % init)
prefix = {sset[0]:1}
for i, s in enumerate(sset[1:]):
prefix[s] = 1 << (i+1)*10
return int(num * prefix[letter])
def humansize(nbytes):
suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']