Skip to content

Commit a9c5dab

Browse files
committed
[Search engine] Update extratorrent plugin. Closes qbittorrent#6261
1 parent 5b35981 commit a9c5dab

4 files changed

Lines changed: 40 additions & 192 deletions

File tree

Lines changed: 19 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#VERSION: 2.06
1+
#VERSION: 3.00
22
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
33
#CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
44

@@ -26,7 +26,7 @@
2626
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2727
# POSSIBILITY OF SUCH DAMAGE.
2828

29-
from HTMLParser import HTMLParser
29+
from xml.dom import minidom
3030
#qBt
3131
from novaprinter import prettyPrinter
3232
from helpers import download_file, retrieve_url
@@ -49,102 +49,26 @@ def download_torrent(self, info):
4949
""" Downloader """
5050
print(download_file(info))
5151

52-
class MyHtmlParseWithBlackJack(HTMLParser):
53-
""" Parser class """
54-
def __init__(self, list_searches, url):
55-
HTMLParser.__init__(self)
56-
self.url = url
57-
self.list_searches = list_searches
58-
self.current_item = None
59-
self.cur_item_name = None
60-
self.pending_size = False
61-
self.next_queries = True
62-
self.pending_next_queries = False
63-
self.next_queries_set = set()
64-
65-
def handle_starttag(self, tag, attrs):
66-
if self.current_item:
67-
if tag == "a":
68-
params = dict(attrs)
69-
link = params['href']
70-
71-
if link.startswith("/torrent/"):
72-
#description
73-
self.current_item["desc_link"] = "".join((self.url, link))
74-
#remove view at the beginning
75-
self.current_item["name"] = params["title"][5:-8].replace("&", "&")
76-
self.pending_size = True
77-
elif link.startswith("magnet"):
78-
#magnet link
79-
self.current_item["link"] = link
80-
81-
elif tag == "td":
82-
if self.pending_size:
83-
self.cur_item_name = "size"
84-
self.current_item["size"] = ""
85-
self.pending_size = False
86-
87-
for attr in attrs:
88-
if attr[0] == "class":
89-
if attr[1][0] == "s":
90-
self.cur_item_name = "seeds"
91-
self.current_item["seeds"] = ""
92-
elif attr[1][0] == "l":
93-
self.cur_item_name = "leech"
94-
self.current_item["leech"] = ""
95-
break
96-
97-
98-
elif tag == "tr":
99-
for attr in attrs:
100-
if attr[0] == "class" and attr[1].startswith("tl"):
101-
self.current_item = dict()
102-
self.current_item["engine_url"] = self.url
103-
break
104-
105-
elif self.pending_next_queries:
106-
if tag == "a":
107-
params = dict(attrs)
108-
if params["title"] in self.next_queries_set:
109-
return
110-
self.list_searches.append(params['href'])
111-
self.next_queries_set.add(params["title"])
112-
if params["title"] == "10":
113-
self.pending_next_queries = False
114-
else:
115-
self.pending_next_queries = False
116-
117-
elif self.next_queries:
118-
if tag == "b" and ("class", "pager_no_link") in attrs:
119-
self.next_queries = False
120-
self.pending_next_queries = True
121-
122-
def handle_data(self, data):
123-
if self.cur_item_name:
124-
self.current_item[self.cur_item_name] = data
125-
if not self.cur_item_name == "size":
126-
self.cur_item_name = None
127-
128-
def handle_endtag(self, tag):
129-
if self.current_item:
130-
if tag == "tr":
131-
prettyPrinter(self.current_item)
132-
self.current_item = None
133-
13452
def search(self, what, cat="all"):
13553
""" Performs search """
136-
query = "".join((self.url, "/advanced_search/?with=", what, "&s_cat=", self.supported_categories[cat]))
137-
54+
query = "".join((self.url, "/rss.xml?type=search&search=", what, "&cid=", self.supported_categories[cat]))
13855
response = retrieve_url(query)
13956

140-
list_searches = []
141-
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
142-
parser.feed(response)
143-
parser.close()
144-
145-
for search_query in list_searches:
146-
response = retrieve_url(self.url + search_query)
147-
parser.feed(response)
148-
parser.close()
57+
xmldoc = minidom.parseString(response)
58+
itemlist = xmldoc.getElementsByTagName('item')
59+
for item in itemlist:
60+
current_item = current_item = {"engine_url" : self.url}
61+
current_item['name'] = item.getElementsByTagName('title')[0].childNodes[0].data
62+
current_item["link"] = item.getElementsByTagName('enclosure')[0].attributes['url'].value
63+
current_item["desc_link"] = item.getElementsByTagName('link')[0].childNodes[0].data
64+
current_item["size"] = item.getElementsByTagName('size')[0].childNodes[0].data
65+
current_item["leech"] = item.getElementsByTagName('leechers')[0].childNodes[0].data
66+
if not current_item["leech"].isdigit():
67+
current_item["leech"] = ''
68+
current_item["seeds"] = item.getElementsByTagName('seeders')[0].childNodes[0].data
69+
if not current_item["seeds"].isdigit():
70+
current_item["seeds"] = ''
71+
72+
prettyPrinter(current_item)
14973

15074
return

src/searchengine/nova/engines/versions.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
demonoid: 1.22
2-
extratorrent: 2.06
2+
extratorrent: 3.00
33
legittorrents: 2.01
44
mininova: 2.02
55
piratebay: 2.15
Lines changed: 19 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#VERSION: 2.06
1+
#VERSION: 3.00
22
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
33
#CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
44

@@ -26,7 +26,7 @@
2626
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2727
# POSSIBILITY OF SUCH DAMAGE.
2828

29-
from html.parser import HTMLParser
29+
from xml.dom import minidom
3030
#qBt
3131
from novaprinter import prettyPrinter
3232
from helpers import download_file, retrieve_url
@@ -49,102 +49,26 @@ def download_torrent(self, info):
4949
""" Downloader """
5050
print(download_file(info))
5151

52-
class MyHtmlParseWithBlackJack(HTMLParser):
53-
""" Parser class """
54-
def __init__(self, list_searches, url):
55-
HTMLParser.__init__(self)
56-
self.url = url
57-
self.list_searches = list_searches
58-
self.current_item = None
59-
self.cur_item_name = None
60-
self.pending_size = False
61-
self.next_queries = True
62-
self.pending_next_queries = False
63-
self.next_queries_set = set()
64-
65-
def handle_starttag(self, tag, attrs):
66-
if self.current_item:
67-
if tag == "a":
68-
params = dict(attrs)
69-
link = params['href']
70-
71-
if link.startswith("/torrent/"):
72-
#description
73-
self.current_item["desc_link"] = "".join((self.url, link))
74-
#remove view at the beginning
75-
self.current_item["name"] = params["title"][5:-8].replace("&", "&")
76-
self.pending_size = True
77-
elif link.startswith("magnet"):
78-
#magnet link
79-
self.current_item["link"] = link
80-
81-
elif tag == "td":
82-
if self.pending_size:
83-
self.cur_item_name = "size"
84-
self.current_item["size"] = ""
85-
self.pending_size = False
86-
87-
for attr in attrs:
88-
if attr[0] == "class":
89-
if attr[1][0] == "s":
90-
self.cur_item_name = "seeds"
91-
self.current_item["seeds"] = ""
92-
elif attr[1][0] == "l":
93-
self.cur_item_name = "leech"
94-
self.current_item["leech"] = ""
95-
break
96-
97-
98-
elif tag == "tr":
99-
for attr in attrs:
100-
if attr[0] == "class" and attr[1].startswith("tl"):
101-
self.current_item = dict()
102-
self.current_item["engine_url"] = self.url
103-
break
104-
105-
elif self.pending_next_queries:
106-
if tag == "a":
107-
params = dict(attrs)
108-
if params["title"] in self.next_queries_set:
109-
return
110-
self.list_searches.append(params['href'])
111-
self.next_queries_set.add(params["title"])
112-
if params["title"] == "10":
113-
self.pending_next_queries = False
114-
else:
115-
self.pending_next_queries = False
116-
117-
elif self.next_queries:
118-
if tag == "b" and ("class", "pager_no_link") in attrs:
119-
self.next_queries = False
120-
self.pending_next_queries = True
121-
122-
def handle_data(self, data):
123-
if self.cur_item_name:
124-
self.current_item[self.cur_item_name] = data
125-
if not self.cur_item_name == "size":
126-
self.cur_item_name = None
127-
128-
def handle_endtag(self, tag):
129-
if self.current_item:
130-
if tag == "tr":
131-
prettyPrinter(self.current_item)
132-
self.current_item = None
133-
13452
def search(self, what, cat="all"):
13553
""" Performs search """
136-
query = "".join((self.url, "/advanced_search/?with=", what, "&s_cat=", self.supported_categories[cat]))
137-
54+
query = "".join((self.url, "/rss.xml?type=search&search=", what, "&cid=", self.supported_categories[cat]))
13855
response = retrieve_url(query)
13956

140-
list_searches = []
141-
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
142-
parser.feed(response)
143-
parser.close()
144-
145-
for search_query in list_searches:
146-
response = retrieve_url(self.url + search_query)
147-
parser.feed(response)
148-
parser.close()
57+
xmldoc = minidom.parseString(response)
58+
itemlist = xmldoc.getElementsByTagName('item')
59+
for item in itemlist:
60+
current_item = current_item = {"engine_url" : self.url}
61+
current_item['name'] = item.getElementsByTagName('title')[0].childNodes[0].data
62+
current_item["link"] = item.getElementsByTagName('enclosure')[0].attributes['url'].value
63+
current_item["desc_link"] = item.getElementsByTagName('link')[0].childNodes[0].data
64+
current_item["size"] = item.getElementsByTagName('size')[0].childNodes[0].data
65+
current_item["leech"] = item.getElementsByTagName('leechers')[0].childNodes[0].data
66+
if not current_item["leech"].isdigit():
67+
current_item["leech"] = ''
68+
current_item["seeds"] = item.getElementsByTagName('seeders')[0].childNodes[0].data
69+
if not current_item["seeds"].isdigit():
70+
current_item["seeds"] = ''
71+
72+
prettyPrinter(current_item)
14973

15074
return

src/searchengine/nova3/engines/versions.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
demonoid: 1.22
2-
extratorrent: 2.06
2+
extratorrent: 3.00
33
legittorrents: 2.01
44
mininova: 2.02
55
piratebay: 2.15

0 commit comments

Comments
 (0)