extractStructuredData method
Extracts structured data from a URL with priority
url is the URL to fetch
selectors is a map of field names to CSS selectors
attributes is a map of field names to attributes to extract (optional)
priority is the priority of the task (higher values = higher priority)
headers are additional headers to send with the request
timeout is the timeout for the request in milliseconds
retries is the number of retry attempts
ignoreRobotsTxt whether to ignore robots.txt rules (default: false)
Implementation
Future<List<Map<String, String>>> extractStructuredData({
required String url,
required Map<String, String> selectors,
Map<String, String?>? attributes,
int priority = 0,
Map<String, String>? headers,
int? timeout,
int? retries,
bool ignoreRobotsTxt = false,
}) {
return _taskQueue.addTask<List<Map<String, String>>>(
task: () async {
// First fetch the HTML
final html = await _webScraper.fetchHtml(
url: url,
headers: headers,
timeout: timeout,
retries: retries,
ignoreRobotsTxt: ignoreRobotsTxt,
);
// Then extract the structured data from the HTML
return _webScraper.extractStructuredData(
html: html,
selectors: selectors,
attributes: attributes,
);
},
priority: priority,
taskName: 'ExtractStructuredData-$url',
);
}