Since the piratebay has the file size in human readable form it is hard to sort by file size. Now we have a de-human size function in our utils that can be used to convert these sizes to byte form.
This commit is contained in:
@@ -4,7 +4,7 @@ import re, logging
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from http_utils import convert_query_to_percent_encoded_octets, build_url, fetch_url
|
from http_utils import convert_query_to_percent_encoded_octets, build_url, fetch_url
|
||||||
from utils import return_re_match
|
from utils import return_re_match, deHumansize
|
||||||
from torrent import Torrent
|
from torrent import Torrent
|
||||||
|
|
||||||
class Piratebay(object):
|
class Piratebay(object):
|
||||||
@@ -85,6 +85,7 @@ class Piratebay(object):
|
|||||||
|
|
||||||
date = return_re_match(info_text, r"(\d+\-\d+\s\d+)|(Y\-day\s\d{2}\:\d{2})")
|
date = return_re_match(info_text, r"(\d+\-\d+\s\d+)|(Y\-day\s\d{2}\:\d{2})")
|
||||||
size = return_re_match(info_text, r"(\d+(\.\d+)?\s[a-zA-Z]+)")
|
size = return_re_match(info_text, r"(\d+(\.\d+)?\s[a-zA-Z]+)")
|
||||||
|
byteSize = deHumansize(size)
|
||||||
|
|
||||||
# COULD NOT FIND HREF!
|
# COULD NOT FIND HREF!
|
||||||
if (magnet is None):
|
if (magnet is None):
|
||||||
@@ -94,7 +95,7 @@ class Piratebay(object):
|
|||||||
seed = seed_and_leech[0].get_text()
|
seed = seed_and_leech[0].get_text()
|
||||||
leech = seed_and_leech[1].get_text()
|
leech = seed_and_leech[1].get_text()
|
||||||
|
|
||||||
torrent = Torrent(name, magnet['href'], size, uploader, date, seed, leech, url)
|
torrent = Torrent(name, magnet['href'], byteSize, uploader, date, seed, leech, url)
|
||||||
|
|
||||||
torrents_found.append(torrent)
|
torrents_found.append(torrent)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -3,11 +3,20 @@
|
|||||||
# @Author: KevinMidboe
|
# @Author: KevinMidboe
|
||||||
# @Date: 2017-11-01 15:57:23
|
# @Date: 2017-11-01 15:57:23
|
||||||
# @Last Modified by: KevinMidboe
|
# @Last Modified by: KevinMidboe
|
||||||
# @Last Modified time: 2017-11-02 16:20:29
|
# @Last Modified time: 2017-11-19 00:05:10
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
SYMBOLS = {
|
||||||
|
'customary' : ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
|
||||||
|
'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa',
|
||||||
|
'zetta', 'iotta'),
|
||||||
|
'iec' : ('BiB', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'),
|
||||||
|
'iec_ext' : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi',
|
||||||
|
'zebi', 'yobi'),
|
||||||
|
}
|
||||||
|
|
||||||
def sanitize(string, ignore_characters=None, replace_characters=None):
|
def sanitize(string, ignore_characters=None, replace_characters=None):
|
||||||
"""Sanitize a string to strip special characters.
|
"""Sanitize a string to strip special characters.
|
||||||
|
|
||||||
@@ -49,6 +58,38 @@ def pagesToCount(multiple, total):
|
|||||||
return total
|
return total
|
||||||
return multiple
|
return multiple
|
||||||
|
|
||||||
|
def deHumansize(s):
|
||||||
|
"""
|
||||||
|
Attempts to guess the string format based on default symbols
|
||||||
|
set and return the corresponding bytes as an integer.
|
||||||
|
When unable to recognize the format ValueError is raised.
|
||||||
|
|
||||||
|
:param str s: human file size that we want to convert
|
||||||
|
:return: the guessed bytes in from the human file size
|
||||||
|
:rtype: int
|
||||||
|
"""
|
||||||
|
|
||||||
|
init = s
|
||||||
|
num = ""
|
||||||
|
while s and s[0:1].isdigit() or s[0:1] == '.':
|
||||||
|
num += s[0]
|
||||||
|
s = s[1:]
|
||||||
|
num = float(num)
|
||||||
|
letter = s.strip()
|
||||||
|
for name, sset in SYMBOLS.items():
|
||||||
|
if letter in sset:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if letter == 'k':
|
||||||
|
# treat 'k' as an alias for 'K' as per: http://goo.gl/kTQMs
|
||||||
|
sset = SYMBOLS['customary']
|
||||||
|
letter = letter.upper()
|
||||||
|
else:
|
||||||
|
raise ValueError("can't interpret %r" % init)
|
||||||
|
prefix = {sset[0]:1}
|
||||||
|
for i, s in enumerate(sset[1:]):
|
||||||
|
prefix[s] = 1 << (i+1)*10
|
||||||
|
return int(num * prefix[letter])
|
||||||
|
|
||||||
def humansize(nbytes):
|
def humansize(nbytes):
|
||||||
suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
|
suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
|
||||||
|
|||||||
Reference in New Issue
Block a user