extractDataStream method
Extracts data from a URL using streaming for memory efficiency
url is the URL to fetch
selector is the CSS selector to use
attribute is the attribute to extract (optional)
asText whether to extract the text content (default: true)
headers are additional headers to send with the request
timeout is the timeout for the request in milliseconds
retries is the number of retry attempts
ignoreRobotsTxt whether to ignore robots.txt rules (default: false)
chunkSize is the size of each chunk to process (default: 1024 * 1024 bytes)
Implementation
Stream<String> extractDataStream({
required String url,
required String selector,
String? attribute,
bool asText = true,
Map<String, String>? headers,
int? timeout,
int? retries,
bool ignoreRobotsTxt = false,
int chunkSize = 1024 * 1024, // 1MB chunks
}) async* {
final htmlStream = await fetchHtmlStream(
url: url,
headers: headers,
timeout: timeout,
retries: retries,
ignoreRobotsTxt: ignoreRobotsTxt,
);
final dataStream = _streamingParser.extractDataStream(
htmlStream: htmlStream,
selector: selector,
attribute: attribute,
asText: asText,
chunkSize: chunkSize,
);
await for (final item in dataStream) {
yield item;
}
}