1. 乱码解决
毫无疑问, 一上来就碰到了乱码问题, 固然我已按文档所述, 所有的字符使用 UTF-8 编码:

 

    本来 loadHTML 会依靠 HTML 中的声明 meta 标签. 假如没有这样的标签, 就看成 iso-8859-1 字符集, 所以乱码. 要解决, 就给字符串加上如许的一个标签在头部:

$meta = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'; 

@$dom->loadHTML($meta . $html);

 

PHP官网解释:

<?php
$doc 
= new DOMDocument();
 

$doc->loadHTML('<?xml encoding="UTF-8">' . $html);    //$doc->loadHTML("<html><body>Test<br></body></html>");
echo 
$doc->saveHTML();
?>

 

 

vendor\phpoffice\phpspreadsheet\src\PhpSpreadsheet\Helper\Html.php

 

  public function toRichTextObject($html)
    {
        $this->initialise(); //初始化

        //    Create a new DOM object
        $dom = new DOMDocument();
        //    Load the HTML file into the DOM object
        //  Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup
        @$dom->loadHTML('<?xml encoding="UTF-8">' .$html);
 

        //return $dom->saveHTML();
        //    Discard excess white space //丢弃多余的空格
        $dom->preserveWhiteSpace = false;

        $this->richTextObject = new RichText();
        
        
        $this->parseElements($dom);  //解析,出现问题
        
        
        // Clean any further spurious whitespace
        $this->cleanWhitespace();

        return $this->richTextObject;
        //return ('html h很好<strong><strike>pizza</strike></strong>');
    }