Since the piratebay has the file size in human readable form it is hard to sort by file size. Now we have a de-human size function in our utils that can be used to convert these sizes to byte form.
This commit is contained in:
		@@ -4,7 +4,7 @@ import re, logging
 | 
				
			|||||||
from bs4 import BeautifulSoup
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from http_utils import convert_query_to_percent_encoded_octets, build_url, fetch_url
 | 
					from http_utils import convert_query_to_percent_encoded_octets, build_url, fetch_url
 | 
				
			||||||
from utils import return_re_match
 | 
					from utils import return_re_match, deHumansize
 | 
				
			||||||
from torrent import Torrent
 | 
					from torrent import Torrent
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Piratebay(object):
 | 
					class Piratebay(object):
 | 
				
			||||||
@@ -85,6 +85,7 @@ class Piratebay(object):
 | 
				
			|||||||
				
 | 
									
 | 
				
			||||||
				date = return_re_match(info_text, r"(\d+\-\d+\s\d+)|(Y\-day\s\d{2}\:\d{2})")
 | 
									date = return_re_match(info_text, r"(\d+\-\d+\s\d+)|(Y\-day\s\d{2}\:\d{2})")
 | 
				
			||||||
				size = return_re_match(info_text, r"(\d+(\.\d+)?\s[a-zA-Z]+)")
 | 
									size = return_re_match(info_text, r"(\d+(\.\d+)?\s[a-zA-Z]+)")
 | 
				
			||||||
 | 
									byteSize = deHumansize(size)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				# COULD NOT FIND HREF!
 | 
									# COULD NOT FIND HREF!
 | 
				
			||||||
				if (magnet is None):
 | 
									if (magnet is None):
 | 
				
			||||||
@@ -94,7 +95,7 @@ class Piratebay(object):
 | 
				
			|||||||
				seed = seed_and_leech[0].get_text()
 | 
									seed = seed_and_leech[0].get_text()
 | 
				
			||||||
				leech = seed_and_leech[1].get_text()
 | 
									leech = seed_and_leech[1].get_text()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				torrent = Torrent(name, magnet['href'], size, uploader, date, seed, leech, url)
 | 
									torrent = Torrent(name, magnet['href'], byteSize, uploader, date, seed, leech, url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				torrents_found.append(torrent)
 | 
									torrents_found.append(torrent)
 | 
				
			||||||
			else:
 | 
								else:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,11 +3,20 @@
 | 
				
			|||||||
# @Author: KevinMidboe
 | 
					# @Author: KevinMidboe
 | 
				
			||||||
# @Date:   2017-11-01 15:57:23
 | 
					# @Date:   2017-11-01 15:57:23
 | 
				
			||||||
# @Last Modified by:   KevinMidboe
 | 
					# @Last Modified by:   KevinMidboe
 | 
				
			||||||
# @Last Modified time: 2017-11-02 16:20:29
 | 
					# @Last Modified time: 2017-11-19 00:05:10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from datetime import datetime
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SYMBOLS = {
 | 
				
			||||||
 | 
						'customary'     : ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
 | 
				
			||||||
 | 
						'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa',
 | 
				
			||||||
 | 
										   'zetta', 'iotta'),
 | 
				
			||||||
 | 
						'iec'           : ('BiB', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'),
 | 
				
			||||||
 | 
						'iec_ext'       : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi',
 | 
				
			||||||
 | 
										   'zebi', 'yobi'),
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def sanitize(string, ignore_characters=None, replace_characters=None):
 | 
					def sanitize(string, ignore_characters=None, replace_characters=None):
 | 
				
			||||||
	"""Sanitize a string to strip special characters.
 | 
						"""Sanitize a string to strip special characters.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -49,6 +58,38 @@ def pagesToCount(multiple, total):
 | 
				
			|||||||
		return total
 | 
							return total
 | 
				
			||||||
	return multiple
 | 
						return multiple
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def deHumansize(s):
 | 
				
			||||||
 | 
						""" 
 | 
				
			||||||
 | 
						Attempts to guess the string format based on default symbols
 | 
				
			||||||
 | 
						set and return the corresponding bytes as an integer.
 | 
				
			||||||
 | 
						When unable to recognize the format ValueError is raised.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						:param str s: human file size that we want to convert
 | 
				
			||||||
 | 
						:return: the guessed bytes in from the human file size
 | 
				
			||||||
 | 
						:rtype: int
 | 
				
			||||||
 | 
						"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						init = s
 | 
				
			||||||
 | 
						num = ""
 | 
				
			||||||
 | 
						while s and s[0:1].isdigit() or s[0:1] == '.':
 | 
				
			||||||
 | 
							num += s[0]
 | 
				
			||||||
 | 
							s = s[1:]
 | 
				
			||||||
 | 
						num = float(num)
 | 
				
			||||||
 | 
						letter = s.strip()
 | 
				
			||||||
 | 
						for name, sset in SYMBOLS.items():
 | 
				
			||||||
 | 
							if letter in sset:
 | 
				
			||||||
 | 
								break
 | 
				
			||||||
 | 
						else:
 | 
				
			||||||
 | 
							if letter == 'k':
 | 
				
			||||||
 | 
								# treat 'k' as an alias for 'K' as per: http://goo.gl/kTQMs
 | 
				
			||||||
 | 
								sset = SYMBOLS['customary']
 | 
				
			||||||
 | 
								letter = letter.upper()
 | 
				
			||||||
 | 
							else:
 | 
				
			||||||
 | 
								raise ValueError("can't interpret %r" % init)
 | 
				
			||||||
 | 
						prefix = {sset[0]:1}
 | 
				
			||||||
 | 
						for i, s in enumerate(sset[1:]):
 | 
				
			||||||
 | 
							prefix[s] = 1 << (i+1)*10
 | 
				
			||||||
 | 
						return int(num * prefix[letter])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def humansize(nbytes):
 | 
					def humansize(nbytes):
 | 
				
			||||||
	suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
 | 
						suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user