extractContentWithPagination method
Future<List<TextExtractionResult> >
extractContentWithPagination({
- required String url,
- required PaginationConfig paginationConfig,
- LazyLoadConfig? lazyLoadConfig,
- TextExtractionOptions textExtractionOptions = const TextExtractionOptions(),
- Map<
String, String> ? headers, - int? timeout,
- int? retries,
Extracts the main content from multiple pages with pagination
url is the starting URL
paginationConfig is the pagination configuration
lazyLoadConfig is the lazy loading configuration (optional)
textExtractionOptions are the text extraction options (optional)
headers are additional headers to send with the request
timeout is the timeout for the request in milliseconds
retries is the number of retry attempts
Implementation
Future<List<TextExtractionResult>> extractContentWithPagination({
required String url,
required PaginationConfig paginationConfig,
LazyLoadConfig? lazyLoadConfig,
TextExtractionOptions textExtractionOptions = const TextExtractionOptions(),
Map<String, String>? headers,
int? timeout,
int? retries,
}) async {
// Create an extractor function
Future<TextExtractionResult> contentExtractor(
String html,
String pageUrl,
) async {
// Apply lazy loading if configured
if (lazyLoadConfig != null && lazyLoadConfig.handleLazyLoading) {
final lazyLoadResult = await lazyLoadHandler.handleLazyLoading(
url: pageUrl,
config: lazyLoadConfig,
headers: headers,
);
html = lazyLoadResult.html;
}
// Extract text from the HTML
return textExtractor.extractText(html, options: textExtractionOptions);
}
// Use the pagination handler with the content extractor
final result = await paginationHandler.scrapeWithPagination(
url: url,
config: paginationConfig,
extractor: contentExtractor,
headers: headers,
timeout: timeout,
retries: retries,
);
return result.results;
}