ConcurrentWebScraper constructor
ConcurrentWebScraper({
- required ProxyManager proxyManager,
- int maxConcurrentTasks = 5,
- ProxyHttpClient? httpClient,
- String? defaultUserAgent,
- Map<
String, String> ? defaultHeaders, - int defaultTimeout = 30000,
- int maxRetries = 3,
- ScrapingLogger? logger,
- RobotsTxtHandler? robotsTxtHandler,
- StreamingHtmlParser? streamingParser,
- bool respectRobotsTxt = true,
Creates a new ConcurrentWebScraper with the given parameters
proxyManager is the proxy manager for getting proxies
maxConcurrentTasks is the maximum number of concurrent tasks
httpClient is the HTTP client to use
defaultUserAgent is the default user agent to use
defaultHeaders are the default headers to use
defaultTimeout is the default timeout for requests in milliseconds
maxRetries is the maximum number of retry attempts
logger is the logger for scraping operations
robotsTxtHandler is the robots.txt handler
streamingParser is the streaming HTML parser
respectRobotsTxt whether to respect robots.txt rules
Implementation
ConcurrentWebScraper({
required ProxyManager proxyManager,
int maxConcurrentTasks = 5,
ProxyHttpClient? httpClient,
String? defaultUserAgent,
Map<String, String>? defaultHeaders,
int defaultTimeout = 30000,
int maxRetries = 3,
ScrapingLogger? logger,
RobotsTxtHandler? robotsTxtHandler,
StreamingHtmlParser? streamingParser,
bool respectRobotsTxt = true,
}) : _webScraper = WebScraper(
proxyManager: proxyManager,
httpClient: httpClient,
defaultUserAgent: defaultUserAgent,
defaultHeaders: defaultHeaders,
defaultTimeout: defaultTimeout,
maxRetries: maxRetries,
logger: logger,
robotsTxtHandler: robotsTxtHandler,
streamingParser: streamingParser,
respectRobotsTxt: respectRobotsTxt,
),
_taskQueue = ScrapingTaskQueue(
maxConcurrentTasks: maxConcurrentTasks,
logger: logger,
),
_logger = logger ?? ScrapingLogger();