Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:raises QueryReturnedNotFoundException: When the server responds with a 404.
:raises ConnectionException: When download repeatedly failed."""
try:
resp = self._get_anonymous_session().get(url, stream=True)
if resp.status_code == 200:
self._log(filename, end=' ', flush=True)
with open(filename, 'wb') as file:
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, file)
else:
if resp.status_code == 403:
# suspected invalid URL signature
raise QueryReturnedForbiddenException("403 when accessing {}.".format(url))
if resp.status_code == 404:
# 404 not worth retrying.
raise QueryReturnedNotFoundException("404 when accessing {}.".format(url))
raise ConnectionException("HTTP error code {}.".format(resp.status_code))
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
error_string = "URL {}: {}".format(url, err)
if _attempt == self.max_connection_attempts:
raise ConnectionException(error_string)
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
try:
self._sleep()
self._get_and_write_raw(url, filename, _attempt + 1)
except KeyboardInterrupt:
self.error("[skipped by user]", repeat_at_end=False)
raise ConnectionException(error_string)
def get_profile_metadata(self, profile_name: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
"""Retrieves a profile's metadata, for use with e.g. :meth:`get_profile_posts` and :meth:`check_profile_id`."""
try:
metadata = self.get_json('{}/'.format(profile_name), params={})
return metadata['entry_data']['ProfilePage'][0]['graphql'], metadata
except QueryReturnedNotFoundException:
raise ProfileNotExistsException('Profile {} does not exist.'.format(profile_name))
is_graphql_query = 'graphql/query' in path
if is_graphql_query:
waittime = graphql_query_waittime()
if waittime > 0:
self._log('\nToo many queries in the last time. Need to wait {} seconds.'.format(waittime))
time.sleep(waittime)
if self.query_timestamps is not None:
self.query_timestamps.append(time.monotonic())
else:
self.query_timestamps = [time.monotonic()]
sess = session if session else self.session
try:
self._sleep()
resp = sess.get('https://{0}/{1}'.format(host, path), params=params)
if resp.status_code == 404:
raise QueryReturnedNotFoundException("404")
if resp.status_code == 429:
raise TooManyRequests("429 - Too Many Requests")
if resp.status_code != 200:
raise ConnectionException("HTTP error code {}.".format(resp.status_code))
if not is_graphql_query and not "__a" in params and host == "www.instagram.com":
match = re.search(r'window\._sharedData = (.*);', resp.text)
if match is None:
raise ConnectionException("Could not find \"window._sharedData\" in html response.")
return json.loads(match.group(1))
else:
resp_json = resp.json()
if 'status' in resp_json and resp_json['status'] != "ok":
if 'message' in resp_json:
raise ConnectionException("Returned \"{}\" status, message \"{}\".".format(resp_json['status'],
resp_json['message']))
else: