관리-도구
편집 파일: class-export.php
<?php /** * Export to HTML. * * @package documentor */ /** * Export to HTML Class */ class Documentor_Export { /** * Image extensions. * * @var array */ public $img_extensions = array( 'gif', 'jpg', 'jpeg', 'png', 'tiff', 'tif', 'bmp', 'svg' ); /** * Regexp to remove from final html. * * @var string */ public $clean_html_regexp = '/<link.*(rel=(\"|\')profile(\"|\'))[^>]*?>/i /<link.*(rel=(\"|\')pingback(\"|\'))[^>]*?>/i /<link.*(rel=(\"|\')dns-prefetch(\"|\'))[^>]*?>/i /<link.*(rel=(\"|\')alternate(\"|\'))[^>]*?>/i /<link.*(rel=(\"|\')https:\/\/api.w.org\/(\"|\'))[^>]*?>/i /<link.*(rel=(\"|\')EditURI(\"|\'))[^>]*?>/i /<link.*(rel=(\"|\')wlwmanifest(\"|\'))[^>]*?>/i /<link.*(rel=(\"|\')next(\"|\'))[^>]*?>/i /<link.*(rel=(\"|\')prev(\"|\'))[^>]*?>/i /<link.*(rel=(\"|\')canonical(\"|\'))[^>]*?>/i /<link.*(rel=(\"|\')shortlink(\"|\'))[^>]*?>/i /<meta.*(name=(\"|\')generator(\"|\'))[^>]*?>/i'; /** * Custom CSS. * * @var string */ public $custom_css = '.documentor-single-feedback, .documentor-search-form, .documentor-search-form-result { display: none; } body { background-color: #fff; }'; /** * Custom JS. * * @var string */ public $custom_js = ''; /** * Max delta. * * @var integer */ public $max_delta = 1; /** * Construct. */ public function __construct() { if ( function_exists( 'domain_mapping_siteurl' ) ) { $this->wp_site_url = domain_mapping_siteurl( get_current_blog_id() ); $this->wp_content_url = str_replace( get_original_url( $this->wp_site_url ), $this->wp_site_url, content_url() ); } else { $this->wp_site_url = site_url(); $this->wp_content_url = content_url(); } $this->wp_content_name = '/' . wp_basename( WP_CONTENT_DIR ); $this->wp_root_url = str_replace( $this->wp_content_name, '', $this->wp_content_url ); $this->wp_root_dir = str_replace( $this->wp_content_name, '', WP_CONTENT_DIR ); $export_folder = 'documentor-export'; $wp_upload_dir = wp_upload_dir(); $this->export_path = $wp_upload_dir['basedir'] . '/' . $export_folder; $this->export_url = $wp_upload_dir['baseurl'] . '/' . $export_folder; } /** * Array with saved URLs. * * @var array */ protected $saved_urls = array(); /** * Save URL. * * @param string $old_url - old URL. * @param string $new_url - new URL. */ public function save_url( $old_url, $new_url ) { $this->saved_urls[ $old_url ] = $new_url; } /** * Get URL. * * @param string $url - url. */ public function get_url( $url ) { return isset( $this->saved_urls[ $url ] ) ? $this->saved_urls[ $url ] : false; } /** * Replace URLs. * * @param string $content - content. */ public function replace_urls( $content ) { foreach ( $this->saved_urls as $old_url => $new_url ) { if ( preg_match( '/\.html$/', $new_url ) ) { $content = str_replace( '"' . $old_url . '"', '"' . $new_url . '"', $content ); $content = str_replace( '\'' . $old_url . '\'', '\'' . $new_url . '\'', $content ); } else { $content = str_replace( $old_url, $new_url, $content ); } } return $content; } /** * Run exporter. * * @param int $doc_id - documenta ID. */ public function run( $doc_id ) { // Turn off PHP output compression. // phpcs:disable $previous = error_reporting( error_reporting() ^ E_WARNING ); ini_set( 'output_buffering', 'off' ); ini_set( 'zlib.output_compression', false ); error_reporting( $previous ); // phpcs:enable if ( $GLOBALS['is_nginx'] ) { // Setting this header instructs Nginx to disable fastcgi_buffering // and disable gzip for this request. header( 'X-Accel-Buffering: no' ); header( 'Content-Encoding: none' ); } // Start the event stream. header( 'Content-Type: text/event-stream' ); // 2KB padding for IE // phpcs:ignore echo ':' . str_repeat( ' ', 2048 ) . "\n\n"; // Time to run the import! set_time_limit( 0 ); // Ensure we're not buffered. wp_ob_end_flush_all(); flush(); // get all pages. $docs = get_pages( array( 'post_type' => 'docs', 'child_of' => $doc_id, 'post_status' => array( 'publish', 'draft' ), 'posts_per_page' => '-1', // phpcs:ignore 'sort_column' => 'menu_order', 'order' => 'ASC', ) ); $docs = $this->build_tree( $docs, $doc_id ); // get main doc. array_unshift( $docs, array( 'id' => $doc_id, 'file_name' => 'index.html', 'permalink' => get_permalink( $doc_id ), ) ); // save urls. foreach ( $docs as $doc ) { $this->save_url( $doc['permalink'], $doc['file_name'] ); } $this->max_delta += count( $docs ); // export folder path. $export_folder_name = get_post_field( 'post_name', $doc_id ); $export_path = trailingslashit( $this->export_path . '/' . $export_folder_name ); // remove export folder. $this->rimraf_dir( $export_path ); // run export. foreach ( $docs as $doc ) { $this->emit_sse_message( $doc['file_name'] ); $this->run_single( $doc['permalink'], $doc['file_name'], $export_path ); } // zip. $this->emit_sse_message( 'ZIP Documentation' ); $result = $this->zip( $doc_id ); $this->emit_sse_message( $result, 'complete' ); } /** * Run single document. * * @param string $permalink - page link. * @param string $file_name - file name. * @param string $export_path - export path. */ public function run_single( $permalink, $file_name, $export_path ) { // request page. $content = wp_remote_get( $permalink ); if ( is_wp_error( $content ) ) { return 'Can\'t get post content.'; } // get page html content. $content = wp_remote_retrieve_body( $content ); // check if html. if ( ( ( stripos( $content, '<html' ) === false ) && ( stripos( $content, '<!DOCTYPE html' ) === false ) ) || preg_match( '/<html[^>]*(?:amp|⚡)/', $content ) === 1 || stripos( $content, '<xsl:stylesheet' ) !== false ) { return 'Failed page request.'; } // parse and save all js files. if ( preg_match_all( '#<script.*</script>#Usmi', $content, $matches ) ) { foreach ( $matches[0] as $tag ) { if ( preg_match( '#<script[^>]*src=("|\')([^>]*)("|\')#Usmi', $tag, $source ) ) { $url = $this->normalize_url( $source[2] ); $path = $this->get_path( $url ); // copy file. if ( $path && ! $this->get_url( $url ) ) { $relative_path = $this->get_assets_relative_path( $path ); $copy_to = $export_path . '/' . $relative_path; $this->copy_file( $path, $copy_to ); $this->save_url( $url, '.' . $relative_path ); } } } } // parse and save all css files. if ( preg_match_all( '#(<link[^>]*stylesheet[^>]*>)#Usmi', $content, $matches ) ) { foreach ( $matches[0] as $tag ) { if ( preg_match( '#<link.*href=("|\')(.*)("|\')#Usmi', $tag, $source ) ) { $url = $this->normalize_url( $source[2] ); $path = $this->get_path( $url ); // copy file. if ( $path && ! $this->get_url( $url ) ) { $relative_path = $this->get_assets_relative_path( $path ); $copy_to = $export_path . '/' . $relative_path; if ( $this->copy_file( $path, $copy_to ) ) { // find imported images, fonts and other files in css content. $this->parse_css( $url, $export_path ); } $this->save_url( $url, '.' . $relative_path ); } } } } // find all links in html. preg_match_all( '#\bhttps?://[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/))#', $content, $matches ); if ( is_array( $matches ) ) { foreach ( $matches[0] as $url ) { $url = $this->normalize_url( $url ); // save images. $url_ext = pathinfo( $url, PATHINFO_EXTENSION ); if ( in_array( $url_ext, $this->img_extensions, true ) && ! $this->get_url( $url ) ) { $path = $this->get_path( $url ); // copy file. if ( $path ) { $relative_path = $this->get_assets_relative_path( $path ); $copy_to = $export_path . '/' . $relative_path; $this->copy_file( $path, $copy_to ); $this->save_url( $url, '.' . $relative_path ); } } } } // replace all document links. $content = $this->replace_urls( $content ); // add custom js and css. $custom_js = documentor()->get_option( 'custom_js', 'documentor_export', $this->custom_js ); if ( $custom_js ) { $content = str_replace( '</head>', "\n<script>" . $custom_js . "</script>\n\n</head>", $content ); } $custom_css = documentor()->get_option( 'custom_css', 'documentor_export', $this->custom_css ); if ( $custom_css ) { if ( false !== stripos( $custom_css, '</style>' ) ) { $custom_css = trim( preg_replace( '#<style[^>]*>(.*)</style>#is', '$1', $custom_css ) ); } $content = str_replace( '</head>', '<style type="text/css">' . $custom_css . "</style>\n\n</head>", $content ); } $content = $this->clean_html( $content ); // save html file. $html_path = $export_path . '/' . $file_name; if ( ! file_exists( $html_path ) ) { wp_mkdir_p( pathinfo( $html_path, PATHINFO_DIRNAME ) ); file_put_contents( $html_path, $content, 0644 ); // phpcs:ignore } unset( $content ); return 'Exported'; } /** * Build tree. * * @param array $docs - all documents array. * @param int $parent - parent doc id. * @param string $name_pre - name of parent doc. */ public function build_tree( $docs, $parent = 0, $name_pre = '' ) { $result = array(); if ( ! $docs ) { return $result; } $i = 1; foreach ( $docs as $key => $doc ) { if ( (int) $doc->post_parent === (int) $parent ) { $name_pre_cur = $name_pre . ( $i++ ) . '.'; unset( $docs[ $key ] ); $result[] = array( 'id' => $doc->ID, 'file_name' => $name_pre_cur . $doc->post_name . '.html', 'permalink' => get_permalink( $doc->ID ), ); // get childs. $result = array_merge( $result, $this->build_tree( $docs, $doc->ID, $name_pre_cur ) ); } } return $result; } /** * Parse CSS. * * @param string $css_url - css url. * @param string $export_path - export path. */ public function parse_css( $css_url, $export_path ) { $css_path = $this->get_path( $css_url ); $assets = array(); if ( ! $css_path ) { return; } // get css content and remove comments. // phpcs:ignore $css_content_nocomments = preg_replace( '#/\*.*\*/#Um', '', file_get_contents( $css_path ) ); // find imported files in css content. preg_match_all( "/@import[ ]*['\"]{0,}(url\()*['\"]*([^;'\"\)]*)['\"\)]*/ui", $css_content_nocomments, $matches ); if ( is_array( $matches ) ) { foreach ( $matches[2] as $import ) { $url = $this->normalize_url( $import ); $path = $this->get_path( $url, $css_url ); $assets[] = $path; if ( $path && file_exists( $path ) && is_readable( $path ) ) { $assets[] = $path; // copy CSS file. $relative_path = $this->get_assets_relative_path( $path ); $copy_to = $export_path . '/' . $relative_path; if ( $this->copy_file( $path, $copy_to ) ) { // find imported images, fonts and other files in css content. $this->parse_css( $url, $export_path ); } } } } // find images. preg_match_all( '#(background[^;{}]*url\((?!\s?"?\'?\s?data)(.*)\)[^;}]*)(?:;|$|})#Usm', $css_content_nocomments, $matches ); if ( is_array( $matches ) ) { foreach ( $matches[2] as $count => $url ) { $path = $this->get_path( $url, $css_url ); if ( $path ) { $assets[] = $path; } } } // find fonts. $fonturl_regex = $this->get_fonts_regex(); preg_match_all( $fonturl_regex, $css_content_nocomments, $matches ); if ( is_array( $matches ) ) { foreach ( $matches[0] as $count => $url ) { $path = $this->get_path( $url, $css_url ); if ( $path ) { $assets[] = $path; } } } // copy all assets to destination folder. $assets = array_unique( $assets ); foreach ( $assets as $asset ) { $relative_path = $this->get_assets_relative_path( $asset ); $copy_to = $export_path . '/' . $relative_path; $this->copy_file( $asset, $copy_to ); } unset( $css_content_nocomments ); unset( $assets ); } /** * Get fonts. * * @return string */ public function get_fonts_regex() { // regex to find fonts, externalised to avoid nasty errors for php < 5.3 // http://stackoverflow.com/questions/21392684/extracting-urls-from-font-face-by-searching-within-font-face-for-replacement. return <<<'LOD' ~ (?(DEFINE) (?<quoted_content> (["']) (?>[^"'\\]++ | \\{2} | \\. | (?!\g{-1})["'] )*+ \g{-1} ) (?<comment> /\* .*? \*/ ) (?<url_skip> (?: https?: | data: ) [^"'\s)}]*+ ) (?<other_content> (?> [^u}/"']++ | \g<quoted_content> | \g<comment> | \Bu | u(?!rl\s*+\() | /(?!\*) | \g<url_start> \g<url_skip> ["']?+ )++ ) (?<anchor> \G(?<!^) ["']?+ | @font-face \s*+ { ) (?<url_start> url\( \s*+ ["']?+ ) ) \g<comment> (*SKIP)(*FAIL) | \g<anchor> \g<other_content>?+ \g<url_start> \K [./]*+ ( [^"'\s)}]*+ ) # url ~xs LOD; } /** * Get path from URL. * * @param string $url - url. * @param boolean $base_url - base url. * @return string */ public function get_path( $url, $base_url = false ) { $url = $this->normalize_url( $url ); // phpcs:ignore $site_host = parse_url( $this->wp_site_url, PHP_URL_HOST ); // normalize. if ( strpos( $url, '//' ) === 0 ) { if ( is_ssl() ) { $url = 'https:' . $url; } else { $url = 'http:' . $url; } } elseif ( ( strpos( $url, '//' ) === false ) && ( strpos( $url, $site_host ) === false ) ) { if ( $this->wp_site_url === $site_host ) { $url = $this->wp_site_url . $url; } elseif ( $base_url && strpos( $url, 'http' ) !== 0 ) { if ( strpos( $url, './' ) === 0 ) { $url = preg_replace( '^/.\//', '', $url ); } $base_url_folder = pathinfo( $base_url, PATHINFO_DIRNAME ); $url = trailingslashit( $base_url_folder ) . $url; } else { $subdir_levels = substr_count( preg_replace( '/https?:\/\//', '', $this->wp_site_url ), '/' ); $url = $this->wp_site_url . str_repeat( '/..', $subdir_levels ) . $url; } } // first check; hostname wp site should be hostname of url. // phpcs:ignore $this_host = @parse_url( $url, PHP_URL_HOST ); if ( $this_host !== $site_host ) { return false; } // try to remove "wp root url" from url while not minding http<>https. $tmp_ao_root = preg_replace( '/https?:/', '', $this->wp_root_url ); $tmp_url = preg_replace( '/https?:/', '', $url ); $path = str_replace( $tmp_ao_root, '', $tmp_url ); // final check; if path starts with :// or //, this is not a URL in the WP context and we have to assume we can't aggregate. if ( preg_match( '#^:?//#', $path ) ) { /** External script/css (adsense, etc) */ return false; } $path = str_replace( '//', '/', $this->wp_root_dir . $path ); return $path; } /** * Get assets relative path. * * @param string $path - path. * @return string */ public function get_assets_relative_path( $path ) { $path = str_replace( $this->wp_root_dir, '', $path ); $path = preg_replace( '/^\/wp-content/', '/assets/', $path ); $path = preg_replace( '/^\/wp-includes/', '/assets/', $path ); $path = str_replace( '//', '/', $path ); return $path; } /** * Copy file. * * @param string $from - from path. * @param string $to - to path. * @return boolean */ public function copy_file( $from, $to ) { if ( ! file_exists( $to ) && file_exists( $from ) ) { wp_mkdir_p( pathinfo( $to, PATHINFO_DIRNAME ) ); // phpcs:ignore @copy( $from, $to ); return true; } return false; } /** * Normalize url. * * @param string $url - url. * @return string */ public function normalize_url( $url ) { $url = trim( $url, " \t\n\r\0\x0B\"'" ); if ( strpos( $url, '%' ) !== false ) { $url = urldecode( $url ); } if ( strpos( $url, '?' ) !== false ) { $url = strtok( $url, '?' ); } return $url; } /** * Clean HTML string. * * @param string $html - html. * @return string */ public function clean_html( $html ) { $regexp_array = explode( "\n", documentor()->get_option( 'clean_html', 'documentor_export', $this->clean_html_regexp ) ); foreach ( $regexp_array as $reg ) { $reg = trim( $reg ); if ( $reg && preg_match( $reg, null ) !== false ) { $html = preg_replace( $reg, '', $html ); } } $html = preg_replace( "/(\r?\n){2,}/", "\n\n", $html ); return $html; } /** * Zip document. * * @param int $main_doc_id - post id. * @return string */ public function zip( $main_doc_id ) { $export_folder_name = get_post_field( 'post_name', $main_doc_id ); $source = $this->export_path . '/' . $export_folder_name . '/'; $destination = $this->export_path . '/' . $export_folder_name . '.zip'; $dest_url = $this->export_url . '/' . $export_folder_name . '.zip'; if ( ! extension_loaded( 'zip' ) || ! file_exists( $source ) ) { return false; } // remove old zip. if ( file_exists( $destination ) ) { unlink( $destination ); // phpcs:ignore } $zip = new ZipArchive(); if ( ! $zip->open( $destination, ZIPARCHIVE::CREATE ) ) { return false; } $source = str_replace( '\\', '/', realpath( $source ) ); if ( is_dir( $source ) === true ) { $files = new RecursiveIteratorIterator( new RecursiveDirectoryIterator( $source ), RecursiveIteratorIterator::SELF_FIRST ); foreach ( $files as $file ) { $file = str_replace( '\\', '/', $file ); // Ignore "." and ".." folders. if ( in_array( substr( $file, strrpos( $file, '/' ) + 1 ), array( '.', '..' ), true ) ) { continue; } $file = realpath( $file ); if ( is_dir( $file ) === true ) { $zip->addEmptyDir( str_replace( $source . '/', '', $file . '/' ) ); } elseif ( is_file( $file ) === true ) { // phpcs:ignore $zip->addFromString( str_replace( $source . '/', '', $file ), file_get_contents( $file ) ); } } } elseif ( is_file( $source ) === true ) { // phpcs:ignore $zip->addFromString( basename( $source ), file_get_contents( $source ) ); } $zip->close(); return $dest_url; } /** * Remove dir. * * @param string $dir - dir path. * @return boolean */ public function rimraf_dir( $dir ) { $files = array_diff( scandir( $dir ), array( '.', '..' ) ); foreach ( $files as $file ) { ( is_dir( "$dir/$file" ) ) ? $this->rimraf_dir( "$dir/$file" ) : unlink( "$dir/$file" ); // phpcs:ignore } return rmdir( $dir ); // phpcs:ignore } /** * Emit a Server-Sent Events message. * * @param string $message - message. * @param string $action - action. */ protected function emit_sse_message( $message, $action = 'message' ) { $data = array( 'action' => $action, 'message' => $message, 'max_delta' => $this->max_delta, ); echo "event: message\n"; echo 'data: ' . wp_json_encode( $data ) . "\n\n"; // Extra padding. echo ':' . str_repeat( ' ', 2048 ) . "\n\n"; // phpcs:ignore flush(); } }