Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#!/usr/bin/env python
from ruia import Middleware
middleware01 = Middleware()
middleware02 = Middleware()
@middleware01.request
async def print_on_request01(spider_ins, request):
request.headers = {"User-Agent": "ruia ua"}
@middleware01.response
async def print_on_response01(spider_ins, request, response):
assert isinstance(response.html, str)
@middleware02.request
async def print_on_request02(spider_ins, request):
pass
def test_spider_with_error_middleware():
error_middleware = Middleware()
@error_middleware.request
def error_request(spider_ins, request, response):
pass
@error_middleware.response
async def error_response(spider_ins, request, response):
raise TypeError("error")
class SpiderDemo(Spider):
start_urls = ["https://httpbin.org/get?p=0"]
async def parse(self, response):
pass
SpiderDemo.start(middleware=error_middleware)
#!/usr/bin/env python
import asyncio
import os
from ruia import Item, Middleware, Response, Request, Spider, TextField
html_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "data", "for_spider_testing.html"
)
with open(html_path, mode="r", encoding="utf-8") as file:
HTML = file.read()
middleware = Middleware()
async def retry_func(request):
request.request_config["TIMEOUT"] = 10
@middleware.request
async def print_on_request(spider_ins, request):
request.headers = {"User-Agent": "ruia ua"}
@middleware.response
async def print_on_response(spider_ins, request, response):
assert isinstance(response.html, str)
assert request.headers == {"User-Agent": "ruia ua"}
#!/usr/bin/env python
from ruia import Middleware
middleware01 = Middleware()
middleware02 = Middleware()
@middleware01.request
async def print_on_request01(spider_ins, request):
request.headers = {"User-Agent": "ruia ua"}
@middleware01.response
async def print_on_response01(spider_ins, request, response):
assert isinstance(response.html, str)
@middleware02.request
async def print_on_request02(spider_ins, request):
#!/usr/bin/env python
from ruia import Spider, Middleware
middleware = Middleware()
@middleware.request
async def print_on_request(spider_ins, request):
request.metadata = {"url": request.url}
print(f"request: {request.metadata}")
# Just operate request object, and do not return anything.
@middleware.response
async def print_on_response(spider_ins, request, response):
print(f"response: {response.metadata}")
class MiddlewareSpiderDemo(Spider):
start_urls = ["https://httpbin.org/get"]
#!/usr/bin/env python
from ruia import Middleware
middleware = Middleware()
@middleware.request
async def print_on_request(spider_ins, request):
ua = "ruia user-agent"
request.headers.update({"User-Agent": ua})
# request.kwargs.update({"proxy": "http://0.0.0.0:8118"})