Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def interest_over_time(self):
"""Request data from Google's Interest Over Time section and return a dataframe"""
over_time_payload = {
# convert to string as requests will mangle
"req": json.dumps(self.interest_over_time_widget["request"]),
"token": self.interest_over_time_widget["token"],
"tz": self.tz,
}
# make the request and parse the returned json
req_json = self._get_data(
url=TrendReq.INTEREST_OVER_TIME_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=over_time_payload,
)
df = pd.DataFrame(req_json["default"]["timelineData"])
if df.empty:
return df
df["date"] = pd.to_datetime(df["time"].astype(dtype="float64"), unit="s")
df = df.set_index(["date"]).sort_index()
# split list columns into seperate ones, remove brackets and split on comma
result_df = df["value"].apply(
lambda x: pd.Series(str(x).replace("[", "").replace("]", "").split(","))
)
# rename each column with its search term, relying on order that google provides...
def bank_page_list(page=5):
"""
想要获取多少页的内容
注意路径
http://www.cbirc.gov.cn/cn/list/9103/910305/ybjfjcf/1.html
:param page: int 输入从第 1 页到 all_page 页的内容
:return: pd.DataFrame 另存为 csv 文件
"""
big_url_list = []
big_title_list = []
flag = True
cbirc_headers = cbirc_headers_without_cookie_2019.copy()
for i_page in range(1, page):
# i_page = 1
print(i_page)
main_url = "http://www.cbirc.gov.cn/cn/list/9103/910305/ybjfjcf/{}.html".format(
i_page
)
if flag:
res = requests.get(main_url, headers=cbirc_headers)
temp_cookie = res.headers["Set-Cookie"].split(";")[0]
cbirc_headers.update({"Cookie": res.headers["Set-Cookie"].split(";")[0]})
res = requests.get(main_url, headers=cbirc_headers)
soup = BeautifulSoup(res.text, "lxml")
res_html = (
"function getClearance(){"
+ soup.find_all("script")[0].get_text()
+ "};"
"""
if indicator:
if source or topic:
raise ValueError(INDIC_ERROR)
query_url = "/".join((INDICATOR_URL, parse_value_or_iterable(indicator)))
elif source:
if topic:
raise ValueError(INDIC_ERROR)
query_url = "/".join(
(SOURCES_URL, parse_value_or_iterable(source), "indicators")
)
elif topic:
query_url = "/".join((TOPIC_URL, parse_value_or_iterable(topic), "indicators"))
else:
query_url = INDICATOR_URL
return WBSearchResult(fetcher.fetch(query_url, cache=cache))
def id_only_query(query_url, query_id, cache):
"""
Retrieve information when ids are the only arguments
:query_url: the base url to use for the query
:query_id: an id or sequence of ids
:cache: use the cache
:returns: WBSearchResult containing dictionary objects describing results
"""
if query_id:
query_url = "/".join((query_url, parse_value_or_iterable(query_id)))
return WBSearchResult(fetcher.fetch(query_url))
:incomelevel: desired incomelevel id or ids.
:lendingtype: desired lendingtype id or ids.
:cache: use the cache
:returns: WBSearchResult containing dictionary objects representing each
country
"""
if country_id:
if incomelevel or lendingtype:
raise ValueError("Can't specify country_id and aggregates")
return id_only_query(COUNTRIES_URL, country_id, cache=cache)
args = {}
if incomelevel:
args["incomeLevel"] = parse_value_or_iterable(incomelevel)
if lendingtype:
args["lendingType"] = parse_value_or_iterable(lendingtype)
return WBSearchResult(fetcher.fetch(COUNTRIES_URL, args, cache=cache))
query_url = COUNTRIES_URL
try:
c_part = parse_value_or_iterable(country)
except TypeError:
raise TypeError("'country' must be a string or iterable'")
query_url = "/".join((query_url, c_part, "indicators", indicator))
args = {}
if data_date:
if isinstance(data_date, collections.Sequence):
data_date_str = ":".join((i.strftime("%Y") for i in data_date))
args["date"] = data_date_str
else:
args["date"] = data_date.strftime("%Y")
if source:
args["source"] = source
data = fetcher.fetch(query_url, args, cache=cache)
if convert_date:
data = convert_dates_to_datetime(data)
return data
def suggestions(self, keyword):
"""Request data from Google's Keyword Suggestion dropdown and return a dictionary"""
# make the request
kw_param = quote(keyword)
parameters = {"hl": self.hl}
req_json = self._get_data(
url=TrendReq.SUGGESTIONS_URL + kw_param,
params=parameters,
method=TrendReq.GET_METHOD,
trim_chars=5,
)["default"]["topics"]
return req_json
def _tokens(self):
"""Makes request to Google to get API tokens for interest over time, interest by region and related queries"""
# make the request and parse the returned json
widget_dict = self._get_data(
url=TrendReq.GENERAL_URL,
method=TrendReq.GET_METHOD,
params=self.token_payload,
trim_chars=4,
)["widgets"]
# order of the json matters...
first_region_token = True
# clear self.related_queries_widget_list and self.related_topics_widget_list
# of old keywords'widgets
self.related_queries_widget_list[:] = []
self.related_topics_widget_list[:] = []
# assign requests
for widget in widget_dict:
if widget["id"] == "TIMESERIES":
self.interest_over_time_widget = widget
if widget["id"] == "GEO_MAP" and first_region_token:
self.interest_by_region_widget = widget
def categories(self):
"""Request available categories data from Google's API and return a dictionary"""
params = {"hl": self.hl}
req_json = self._get_data(
url=TrendReq.CATEGORIES_URL,
params=params,
method=TrendReq.GET_METHOD,
trim_chars=5,
)
return req_json
def trending_searches(self, pn="united_states"):
"""Request data from Google's Hot Searches section and return a dataframe"""
# make the request
# forms become obsolute due to the new TRENDING_SEACHES_URL
# forms = {'ajax': 1, 'pn': pn, 'htd': '', 'htv': 'l'}
req_json = self._get_data(
url=TrendReq.TRENDING_SEARCHES_URL, method=TrendReq.GET_METHOD
)[pn]
result_df = pd.DataFrame(req_json)
return result_df