[webpages] Fix bad title scrapes
This commit is contained in:
@ -161,9 +161,11 @@ class WebPage(ScrobblableMixin):
|
||||
if not raw_text:
|
||||
return
|
||||
|
||||
self.title = raw_text[raw_text.find("<title>") + 7 : raw_text.find("</title>")]
|
||||
|
||||
if not self.title and self.extract:
|
||||
soup = BeautifulSoup(raw_text, "html.parser")
|
||||
title_tag = soup.find("title")
|
||||
if title_tag and title_tag.string:
|
||||
self.title = title_tag.string
|
||||
elif not self.title and self.extract:
|
||||
first_line = self.extract.split("\n")[0]
|
||||
self.title = first_line[:254]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user