fetchHtmlStream method
Fetches HTML content as a stream from the given URL
url is the URL to fetch
headers are additional headers to send with the request
timeout is the timeout for the request in milliseconds
retries is the number of retry attempts
priority is the priority of the request (higher values = higher priority)
Implementation
Future<Stream<List<int>>> fetchHtmlStream({
required String url,
Map<String, String>? headers,
int? timeout,
int? retries,
int priority = 0,
}) async {
// Prepare headers with user agent
final effectiveHeaders = {
'User-Agent': _userAgentRotator.getRandomUserAgent(),
...?headers,
};
// User agent is already in the headers
return _taskQueue.addTask<Stream<List<int>>>(
task: () async {
final response = await _httpClient
.send(
http.Request('GET', Uri.parse(url))
..headers.addAll(effectiveHeaders),
)
.timeout(Duration(milliseconds: timeout ?? _defaultTimeout));
if (response.statusCode >= 200 && response.statusCode < 300) {
return response.stream;
} else {
final statusCode = response.statusCode;
final errorMessage = 'HTTP error: $statusCode';
// Create appropriate exception based on status code
if (statusCode == 429) {
throw ScrapingException.rateLimit(
'Rate limit exceeded',
url: url,
statusCode: statusCode,
isRetryable: true,
);
} else if (statusCode == 403) {
throw ScrapingException.permission(
'Access forbidden',
url: url,
statusCode: statusCode,
isRetryable: false,
);
} else if (statusCode == 401) {
throw ScrapingException.authentication(
'Authentication required',
url: url,
statusCode: statusCode,
isRetryable: false,
);
} else if (statusCode >= 500) {
throw ScrapingException.http(
'Server error',
url: url,
statusCode: statusCode,
isRetryable: true,
);
} else {
throw ScrapingException.http(
errorMessage,
url: url,
statusCode: statusCode,
isRetryable: statusCode >= 500 || statusCode == 429,
);
}
}
},
priority: priority,
taskName: 'FetchHTMLStream-$url',
);
}