189 lines
7.5 KiB
PHP
189 lines
7.5 KiB
PHP
<?php
|
|
|
|
function normalizeUrl($url, $base, $type)
|
|
{
|
|
$localpath = function ($url, $type) {
|
|
$purl1 = parse_url($url);
|
|
$purl2 = parse_url(XOOPS_URL);
|
|
if (isset($purl1['host']) && isset($purl2['host']) && $purl1['host'] === $purl2['host']) {
|
|
$url = preg_replace('/^http:/', 'https:', $url);
|
|
}
|
|
|
|
$url = preg_replace('/^https?:\/\/nimg.neuroinf\.jp\/?/', '/', $url);
|
|
$url = preg_replace('/^\/fmanager\/(view|tmb)\/(.*)(:?\?_t=.*)?$/U', '/modules/fmanager/index.php/\1/\2', $url);
|
|
$url = preg_replace('/^\/modules\/xoonips\/detail\.php\?((:?item_)?id=.+)&download=.*$/U', '/modules/xoonips/detail.php?\1', $url);
|
|
$url = preg_replace('/^\/modules\/xoonips\/image\.php\/thumbnail\/(\d+)\/(.*)$/U', '/modules/xoonips/file/\1.png', $url);
|
|
$url = preg_replace('/^\/modules\/xoonips\/preview\.php\/\d+\/(\d+)\/(.*)$/U', '/modules/xoonips/file/\1.png', $url);
|
|
$url = preg_replace('/^\/modules\/xoonips\/download\.php\/\d+\/(\d+)\/(.*)$/U', '/modules/xoonips/file/\1/\2', $url);
|
|
$url = preg_replace('/^\/modules\/xoonips\/download\.php\/([^\/]+)\?file_id=(\d+)$/U', '/modules/xoonips/file/\2/\1', $url);
|
|
if (preg_match('/^\/modules\/xoonips\/download\.php\?file_id=(\d+)$/U', $url, $matches)) {
|
|
$fpath = MYDUMPTOOL_OUTPUTDIR.'/public/modules/xoonips/file/'.$matches[1];
|
|
$found = false;
|
|
foreach (glob($fpath.'/*') as $fname) {
|
|
if ('.htaccess' === $fname) {
|
|
continue;
|
|
} else {
|
|
$url = '/modules/xoonips/file/'.$matches[1].'/'.urlencode(basename($fname));
|
|
$found = true;
|
|
}
|
|
}
|
|
if (false === $found) {
|
|
echo 'Failed to replace xoonips download url:'.$url.PHP_EOL;
|
|
|
|
return false;
|
|
}
|
|
}
|
|
// append
|
|
if ('videoSrc' === $type) {
|
|
//$url = preg_replace('/^\/modules\/xoonips\/file\/1459.png$/', '/modules/xoonips/file/1459/benham\'s_top.mp4', $url);
|
|
//$url = preg_replace('/^\/modules\/xoonips\/file\/1469.png$/', '/modules/xoonips/file/1469/music_notes.mp4', $url);
|
|
}
|
|
|
|
return $url;
|
|
};
|
|
if ('' === $url) {
|
|
return $localpath($base, $type);
|
|
}
|
|
$purl = parse_url($url);
|
|
if (isset($purl['scheme'])) {
|
|
return $localpath($url, $type);
|
|
}
|
|
$pburl = parse_url($base);
|
|
$path = $pburl['path'];
|
|
if ('/' == $url[0]) {
|
|
$path = '';
|
|
} elseif ('#' !== $url[0] && '?' !== $url[0]) {
|
|
$path = preg_replace('#/[^/]*$#', '', $path).'/';
|
|
}
|
|
$path .= $url;
|
|
$pattern = ['#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'];
|
|
do {
|
|
$path = preg_replace($pattern, '/', $path, -1, $count);
|
|
} while (0 < $count);
|
|
|
|
return $localpath($pburl['scheme'].'://'.$pburl['host'].$path, $type);
|
|
}
|
|
|
|
function checkLocalFile($url, $tag)
|
|
{
|
|
$url2 = preg_replace('/'.preg_quote(XOOPS_URL, '/').'/', '', $url);
|
|
if (preg_match('/^\//', $url2)) {
|
|
if (!preg_match('/\/([^\/]+\.(php|html))?(\?.+)?(#.+)?$/i', $url2, $matches)) {
|
|
$dstFpath = MYDUMPTOOL_OUTPUTDIR.'/public'.urldecode($url2);
|
|
$srcFpath = XOOPS_ROOT_PATH.urldecode($url2);
|
|
if (!file_exists($dstFpath)) {
|
|
if (file_exists($srcFpath)) {
|
|
MyDumpTool::makeDirectory('/public'.dirname($url2));
|
|
if (!MyDumpTool::fileCopy($srcFpath, $dstFpath)) {
|
|
echo 'Error: Failed to copy file: '.$fpath.PHP_EOL;
|
|
|
|
return false;
|
|
}
|
|
} else {
|
|
echo 'Error: File not found: '.$fpath.PHP_EOL;
|
|
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $url2;
|
|
}
|
|
|
|
return $url;
|
|
}
|
|
|
|
function normalizeHtml($text, $path)
|
|
{
|
|
$text = MyDumpTool::fixHtml($text);
|
|
$text = preg_replace_callback('/<img ([^>]*)src="(.*)"([^>]*)\/>/Us', function ($matches) use ($path) {
|
|
$url = htmlspecialchars_decode($matches[2], ENT_QUOTES);
|
|
$url = normalizeUrl($url, $path, 'imgSrc');
|
|
$url = checkLocalFile($url, 'imgSrc');
|
|
if (false === $url) {
|
|
exit($path.PHP_EOL);
|
|
}
|
|
|
|
return '<img '.$matches[1].'src="'.htmlspecialchars($url, ENT_QUOTES).'"'.$matches[3].'/>';
|
|
}, $text);
|
|
$text = preg_replace_callback('/<a ([^>]*)href="([^"]+)"([^>]*)>/Us', function ($matches) use ($path) {
|
|
$url = htmlspecialchars_decode($matches[2], ENT_QUOTES);
|
|
if (!preg_match('/^#/', $url)) {
|
|
$url = normalizeUrl($url, $path, 'aHref');
|
|
if (false === $url) {
|
|
exit($path.PHP_EOL);
|
|
}
|
|
$url = checkLocalFile($url, 'aHref');
|
|
if (false === $url) {
|
|
exit($path.PHP_EOL);
|
|
}
|
|
}
|
|
|
|
return '<a '.$matches[1].'href="'.htmlspecialchars($url, ENT_QUOTES).'"'.$matches[3].'>';
|
|
}, $text);
|
|
$text = preg_replace_callback('/<embed ([^>]*)src="(.*)"([^>]*)\/>/Us', function ($matches) use ($path) {
|
|
$url = htmlspecialchars_decode($matches[2], ENT_QUOTES);
|
|
$url = normalizeUrl($url, $path, 'embedSrc');
|
|
if (false === $url) {
|
|
exit($path.PHP_EOL);
|
|
}
|
|
$url = checkLocalFile($url, 'embedSrc');
|
|
if (false === $url) {
|
|
exit($path.PHP_EOL);
|
|
}
|
|
|
|
return '<embed '.$matches[1].'src="'.htmlspecialchars($url, ENT_QUOTES).'"'.$matches[3].'/>';
|
|
}, $text);
|
|
$text = preg_replace_callback('/<video ([^>]*)src="(.*)"([^>]*)>/Us', function ($matches) use ($path) {
|
|
$url = htmlspecialchars_decode($matches[2], ENT_QUOTES);
|
|
$url = normalizeUrl($url, $path, 'videoSrc');
|
|
if (false === $url) {
|
|
exit($path.PHP_EOL);
|
|
}
|
|
$url = checkLocalFile($url, 'videoSrc');
|
|
if (false === $url) {
|
|
exit($path.PHP_EOL);
|
|
}
|
|
|
|
return '<video '.$matches[1].'src="'.htmlspecialchars($url, ENT_QUOTES).'"'.$matches[3].'>';
|
|
}, $text);
|
|
$text = preg_replace('/ lang="([^"]+)"/Us', '', $text);
|
|
$text = preg_replace('/ xml:lang="([^"]+)"/Us', '', $text);
|
|
// remove font-family attributes
|
|
$text = preg_replace_callback('/<span([^>]*?)>((?!<span).*)<\/span>/Us', function ($matches) use ($path) {
|
|
if ('' === trim($matches[1]) || '' === trim($matches[2]) || ' ' === trim($matches[2])) {
|
|
return $matches[2];
|
|
}
|
|
$m1 = $matches[1];
|
|
$m1 = preg_replace_callback('/style="(.*)"/Us', function ($matches2) {
|
|
$styles1 = array_map('trim', explode(';', htmlspecialchars_decode($matches2[1], ENT_QUOTES)));
|
|
$styles2 = [];
|
|
foreach ($styles1 as $style1) {
|
|
if ('' === $style1) {
|
|
continue;
|
|
}
|
|
$style = explode(':', $style1);
|
|
list($key, $value) = array_map('trim', explode(':', $style1));
|
|
if ('font-family' === $key || 'line-height' === $key || '' === $value || 'color' === $key && 'null' === $value) {
|
|
// skip
|
|
} else {
|
|
$styles2[] = $key.':'.$value;
|
|
}
|
|
}
|
|
|
|
return 0 === count($styles2) ? '' : ('style="'.htmlspecialchars(implode(';', $styles2).';', ENT_QUOTES).'"');
|
|
}, $m1);
|
|
if ('' === trim($m1)) {
|
|
return $matches[2];
|
|
}
|
|
if (strpos($m1, 'font-family') > 0) {
|
|
echo 'trimed span:'.$path.PHP_EOL;
|
|
}
|
|
|
|
return '<span'.$m1.'>'.$matches[2].'</span>';
|
|
}, $text);
|
|
$text = MyDumpTool::fixHtml($text);
|
|
|
|
return $text;
|
|
}
|