Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if url_parts.fragment:
path_with_query_and_frag += '#{}'.format(url_parts.fragment)
for path in self.hidden_keyword_paths:
if not isinstance(path, string_types):
if path.search(path_with_query_and_frag):
keyword = False
break
elif path == path_with_query_and_frag:
keyword = False
break
if keyword is not None:
# Replace special placeholder with blank string
if keyword is False:
keyword = ''
return ExtractResult(engine_name, keyword, self)
result = None
if parser is None:
parser = get_parser(url_parts)
if parser is None:
if not use_naive_method:
return None # Tried to get keyword from non SERP URL
# Try to use naive method of detection
if _naive_re.search(url_parts.netloc):
query = _unicode_parse_qs(url_parts.query, keep_blank_values=True)
for param in _naive_params:
if param in query:
tld_res = tldextract.extract(url_parts.netloc)
return ExtractResult(tld_res.domain,
query[param][0],
None)
return None # Naive method could not detect a keyword either
result = parser.parse(url_parts)
if result is None:
return None
if lower_case:
result.keyword = result.keyword.lower()
if trimmed:
result.keyword = result.keyword.strip()
if collapse_whitespace:
result.keyword = re.sub(r'\s+', ' ', result.keyword, re.UNICODE)
keyword = ' '.join(keys).strip()
if engine_name == 'Google':
# Check for usage of Google's top bar menu
tbm = query.get('tbm', [None])[0]
if tbm == 'isch':
engine_name = 'Google Images'
elif tbm == 'vid':
engine_name = 'Google Video'
elif tbm == 'shop':
engine_name = 'Google Shopping'
if keyword is not None:
# Edge case found a keyword, exit quickly
return ExtractResult(engine_name, keyword, self)
# Otherwise we keep looking through the defined extractors
for extractor in self.keyword_extractor:
if not isinstance(extractor, string_types):
# Regular expression extractor
match = extractor.search(url_parts.path)
if match:
keyword = match.group(1)
break
else:
# Search for keywords in query string
if extractor in query:
# Take the last param in the qs because it should be the
# most recent
keyword = query[extractor][-1]