PHP/HTML/tidy parse file
Содержание
Beautifying HTML Using Tidy
<source lang="html4strict">
<?php
$options = array("indent" => true, "indent-spaces" => 4, "wrap" => 4096); $tidy = tidy_parse_file("http://www.php.net/", $options); tidy_clean_repair($tidy); echo $tidy;
?>
</source>
Extracting URLs Using Tidy
<source lang="html4strict">
<?php
function dump_urls(tidy_node $node, &$urls = NULL) { $urls = (is_array($urls)) ? $urls : array(); if(isset($node->id)) { if($node->id == TIDY_TAG_A) { $urls[] = $node->attribute["href"]; } } if($node->hasChildren()) { foreach($node->child as $child) { dump_urls($child, $urls); } } return $urls; } $tidy = tidy_parse_file("http://www.php.net/"); $urls = dump_urls($tidy->body()); print_r($urls);
?>
</source>
Reducing Bandwidth Usage Using Tidy
<source lang="html4strict">
<?php
$options = array("clean" => true, "drop-proprietary-attributes" => true, "drop-font-tags" => true, "drop-empty-paras" => true, "hide-comments" => true, "join-classes" => true, "join-styles" => true); $tidy = tidy_parse_file("http://www.php.net/", $options); tidy_clean_repair($tidy); echo $tidy;
?>
</source>
Retrieving an Entrance Node in Tidy
<source lang="html4strict">
<?php
$tidy = tidy_parse_file("http://www.php.net/"); $root = $tidy->root();
?>
</source>