1. 乱码解决
毫无疑问, 一上来就碰到了乱码问题, 固然我已按文档所述, 所有的字符使用 UTF-8 编码:
本来 loadHTML 会依靠 HTML 中的声明 meta 标签. 假如没有这样的标签, 就看成 iso-8859-1 字符集, 所以乱码. 要解决, 就给字符串加上如许的一个标签在头部:
$meta = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>';
@$dom->loadHTML($meta . $html);
PHP官网解释:
<?php
$doc = new DOMDocument();
$doc->loadHTML('<?xml encoding="UTF-8">' . $html); //$doc->loadHTML("<html><body>Test<br></body></html>");
echo $doc->saveHTML();
?>
vendor\phpoffice\phpspreadsheet\src\PhpSpreadsheet\Helper\Html.php
public function toRichTextObject($html)
{
$this->initialise(); //初始化
// Create a new DOM object
$dom = new DOMDocument();
// Load the HTML file into the DOM object
// Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup
@$dom->loadHTML('<?xml encoding="UTF-8">' .$html);
//return $dom->saveHTML();
// Discard excess white space //丢弃多余的空格
$dom->preserveWhiteSpace = false;
$this->richTextObject = new RichText();
$this->parseElements($dom); //解析,出现问题
// Clean any further spurious whitespace
$this->cleanWhitespace();
return $this->richTextObject;
//return ('html h很好<strong><strike>pizza</strike></strong>');
}