php解析word文件
阿新 • • 發佈:2018-12-10
一個簡單的word文件閱讀類,使用正則實現簡單的docx文件閱讀,下面是程式碼
<?php class Lib { /** * @param $file docx檔案路徑 * @return string 生成的html字串 * ---讀取docx文件轉換為html,僅保留段落,表格,文字框,不保留樣式 * by sdxjwkq */ public function docxToHtml($file) { $zip = new \ZipArchive(); $zip->open($file); $xml = $zip->getFromName("word/document.xml"); $xml = file_get_contents("a.xml"); $table = array(); //快取表格 $textbox = array(); //快取文字框 //處理表格 preg_match_all('/<w:tbl>([\s\S]*?)<\/w:tbl>/s', $xml, $tableHandel); for ($i = 0; $i < count($tableHandel[0]); $i++) { $table["@
[email protected]" . $i] = $tableHandel[0][$i]; $xml = str_replace($tableHandel[0][$i], "@[email protected]" . $i, $xml); } //處理文字框 preg_match_all('/<w:pict>([\s\S]*?)<\/w:pict>/s', $xml, $textboxHandel); foreach ($textboxHandel[0] as $key => &$value) { $temp = $value; $temp2 = ""; preg_match_all('/<w:t>([\s\S]*?)<\/w:t>/s', $value, $div); foreach ($div[0] as $k => &$v) { $temp2 .= $v; } $xml = str_replace($temp, $temp2, $xml); } for ($i = 0; $i < count($textboxHandel[0]); $i++) { $textbox["@[email protected]" . $i] = $textboxHandel[0][$i]; $xml = str_replace($textboxHandel[0][$i], "@[email protected]" . $i, $xml); } preg_match_all('/<w:p([\s\S]*?)<\/w:p>|@[email protected]\d|@[email protected]\d/s', $xml, $content); foreach ($content[0] as $key => &$value) { if (strpos($value, "TABLECONTENT")) { $value = $table[$value]; } $value = str_replace("w:", "", $value); } $content = $content[0]; //把段落和表格解析出來 $docx = <<<HTML_ENTITIES <style> table{ background-color:#000; } table td{ padding:5px 5px 5px 5px; } table tr{ background-color:#fff; } </style> HTML_ENTITIES; foreach ($content as $a => &$b) { $b = json_decode( json_encode( simplexml_load_string($b) ), true ); if (isset($b['tr'])) { //表格 $docx .= "<table border='0' cellspacing='1' cellpadding='0'>"; foreach ($b['tr'] as $key => $value) { $docx .= "<tr>"; foreach ($value['tc'] as $k => $v) { if (isset($v['p']['r'][0])) { $docx .= "<td>"; foreach ($v['p']['r'] as $ke => $va) { $docx .= $va['t']; } $docx .= "</td>"; } else { $docx .= "<td>" . $v['p']['r']['t'] . "</td>"; } } $docx .= "</tr>"; } $docx .= "</table>"; } else { //段落 $docx .= "<p>"; if (isset($b['r'][0])) { foreach ($b['r'] as $key => &$value) { if (is_string($value['t'])) { $docx .= $value['t']; } } } else { if (is_string($b['r']['t'])) { $docx .= $b['r']['t']; } } $docx .= "</p>"; } } return $docx; } }