Skip to content

Commit

Permalink
Merge branch 'master' into remove_core_except
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar authored Oct 2, 2024
2 parents f8807e6 + a882c61 commit 8cbffdd
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions trafilatura/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,6 @@ def bare_extraction(filecontent, url=None, no_fallback=False, # fast=False,

# load data
try:
tree = load_html(filecontent)
if tree is None:
LOGGER.error('empty HTML tree: %s', url)
raise ValueError

# regroup extraction options
if not options or not isinstance(options, Extractor):
options = Extractor(
Expand All @@ -164,6 +159,12 @@ def bare_extraction(filecontent, url=None, no_fallback=False, # fast=False,
author_blacklist=author_blacklist, url_blacklist=url_blacklist,
date_params=date_extraction_params
)

# load the HTML tree
tree = load_html(filecontent)
if tree is None:
LOGGER.error('empty HTML tree: %s', url)
raise ValueError

# quick and dirty HTML lang check
if options.lang and (options.fast or not LANGID_FLAG):
Expand Down

0 comments on commit 8cbffdd

Please sign in to comment.