ich Probleme mit der Codierung mit nach queryXpath Ausführungphp dom utf-8-Codierung Probleme
<?php
header ('Content-Type: text/html; charset=utf-8');
mb_internal_encoding ('utf-8');
mb_http_output ('utf-8');
mb_http_input ('utf-8');
mb_regex_encoding ('utf-8');
ini_set ('include_path', 'ZendFramework-2.4.9\library');
require_once 'Zend/Loader/StandardAutoloader.php';
$autoloader = new Zend\Loader\StandardAutoloader (array (
'fallback_autoloader' => true
));
$autoloader->register();
use Zend\Dom\Query;
use Zend\Debug\Debug;
$url = "http://expert.com.pt/115-5-programas/14865-02-809-002-00263-meireles-maq-lavar-loica-mll-125-w-5604409141651.html";
$ch = curl_init ($url);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_HEADER, 0);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, true);
$content = curl_exec ($ch);
curl_close ($ch);
$pdom = new Query (mb_convert_encoding ($content, 'HTML-ENTITIES', "UTF-8"));
// $pdom->setEncoding('UTF-8');
// echo $pdom->getEncoding();
$result = $pdom->queryXpath ('//*[@itemtype="http://schema.org/Product"]');
if ($result->count()) {
foreach ($result as $r) {
// echo "----------------------------------------";
if ($r->hasChildnodes()) {
$lbHtml = $r->C14N();
$dom2 = new Query ($lbHtml);
$nome_produto = $dom2->queryXpath ('//*[@itemprop="name"]');
$ref_expert = $dom2->queryXpath ('//*[@itemprop="sku"]');
$preco = $dom2->queryXpath ('//*[@itemprop="price"]');
// *[@itemprop="image"] // small pic
$imagem = $dom2->queryXpath ('//*[@id="bigpic"]');
$peq_desc = $dom2->queryXpath ('//*[@itemprop="description"]');
// *[contains(@class,"product-desc")]
$url_prod = $dom2->queryXpath ('//*[contains(@class,"pb-center-column col-xs-12 col-sm-4")]/p[4]/a');
$categoria = $pdom->queryXpath ('//*[contains(@class,"breadcrumb clearfix")]/a[4]'); // categoria
if ($nome_produto->count()) {
foreach ($nome_produto as $name) {
$_arr ['name'] = $name->nodeValue;
}
}
if ($ref_expert->count()) {
foreach ($ref_expert as $ref) {
$_arr ['ref'] = $ref->nodeValue;
}
}
if ($preco->count()) {
foreach ($preco as $_preco) {
preg_match ("/((?:[0-9]+,)*[0-9]+(?:\.[0-9]+)?)/", $_preco->nodeValue, $_preco);
$_arr ['price'] = (float) str_replace (",", ".", $_preco [0]);
}
}
if ($imagem->count()) {
foreach ($imagem as $_image) {
$_arr ['image'] = $_image->getAttribute ('src');
}
}
if ($peq_desc->count()) {
foreach ($peq_desc as $_peqdesc) {
$_arr ['description_small'] = $_peqdesc->C14N();
}
}
if ($url_prod->count()) {
foreach ($url_prod as $_url_prod) {
$_arr ['url_prod'] = $_url_prod->getAttribute ('href');
}
}
if ($categoria->count()) {
foreach ($categoria as $_categoria) {
$_arr ['categoria'] = $_categoria->nodeValue;
}
}
// die();
}
}
}
echo "<pre>";
print_r ($_arr);
der Code immer wieder
Array
(
[name] =>MEIRELES - Máq. Lavar Loiça MLL 125 W
[ref] => 02.809.002.00263
[preis] => 289.99
[Bild] =>http://expert.com.pt/180503-large_default/02-809-002-00263- [categoria] => 5 Programas
)
nur um es für mich klar zu machen, was ist Ihre erwartete Ausgabe? –
[Name] => MEIRELES - Máq. Lavar Loiça MLL 125 W die Codierung in der richtigen Art und Weise –
das Problem war die HTML-Ausgabe, die ich gefiltert habe nahm nicht das Meta-Tag –