Вход Регистрация
Файл: vendor/league/commonmark/src/Extension/Autolink/UrlAutolinkParser.php
Строк: 194
<?php

declare(strict_types=1);

/*
 * This file is part of the league/commonmark package.
 *
 * (c) Colin O'Dell <colinodell@gmail.com>
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

namespace LeagueCommonMarkExtensionAutolink;

use 
LeagueCommonMarkExtensionCommonMarkNodeInlineLink;
use 
LeagueCommonMarkParserInlineInlineParserInterface;
use 
LeagueCommonMarkParserInlineInlineParserMatch;
use 
LeagueCommonMarkParserInlineParserContext;

final class 
UrlAutolinkParser implements InlineParserInterface
{
    private const 
ALLOWED_AFTER = [null' '"t""n""x0b""x0c""x0d"'*''_''~''('];

    
// RegEx adapted from https://github.com/symfony/symfony/blob/6.3/src/Symfony/Component/Validator/Constraints/UrlValidator.php
    
private const REGEX '~
        (
            # Must start with a supported scheme + auth, or "www"
            (?:
                (?:%s)://                                                                            # protocol
                (?:(?:(?:[_.pLpN-]|%%[0-9A-Fa-f]{2})+:)?((?:[_.pLpN-]|%%[0-9A-Fa-f]{2})+)@)? # basic auth
            |www.)
            (?:
                (?:
                    (?:xn--[a-z0-9-]++.)*+xn--[a-z0-9-]++            # a domain name using punycode
                        |
                    (?:[pLpNpSpM-_]++.){1,127}[pLpNpM]++    # a multi-level domain name; total length must be 253 bytes or less
                        |
                    [a-z0-9-_]++                                    # a single-level domain name
                ).?
                    |                                                 # or
                d{1,3}.d{1,3}.d{1,3}.d{1,3}                    # an IP address
                    |                                                 # or
                [
                    (?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])).){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])).){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])).){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])).){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])).){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])).){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])).){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::))))
                ]  # an IPv6 address
            )
            (?::[0-9]+)?                              # a port (optional)
            (?:/ (?:[pLpN-._~!$&'
()*+,;=:@]|%%[0-9A-Fa-f]{2})* )*        # a path
            
(?:? (?:[pLpN-._~!$&'[]()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )? # a query (optional)
            (?:# (?:[pLpN-._~!$&'
()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?     # a fragment (optional)
        
)~ixu';

    /**
     * @var string[]
     *
     * @psalm-readonly
     */
    private array $prefixes = ['
www.'];

    /**
     * @psalm-var non-empty-string
     *
     * @psalm-readonly
     */
    private string $finalRegex;

    private string $defaultProtocol;

    /**
     * @param array<int, string> $allowedProtocols
     */
    public function __construct(array $allowedProtocols = ['
http', 'https', 'ftp'], string $defaultProtocol = 'http')
    {
        /**
         * @psalm-suppress PropertyTypeCoercion
         */
        $this->finalRegex = sprintf(self::REGEX, implode('
|', $allowedProtocols));

        foreach ($allowedProtocols as $protocol) {
            $this->prefixes[] = $protocol . '
://';
        
}

        
$this->defaultProtocol $defaultProtocol;
    }

    public function 
getMatchDefinition(): InlineParserMatch
    
{
        return 
InlineParserMatch::oneOf(...$this->prefixes);
    }

    public function 
parse(InlineParserContext $inlineContext): bool
    
{
        
$cursor $inlineContext->getCursor();

        
// Autolinks can only come at the beginning of a line, after whitespace, or certain delimiting characters
        
$previousChar $cursor->peek(-1);
        if (! 
in_array($previousCharself::ALLOWED_AFTERtrue)) {
            return 
false;
        }

        
// Check if we have a valid URL
        
if (! preg_match($this->finalRegex$cursor->getRemainder(), $matches)) {
            return 
false;
        }

        
$url $matches[0];

        
// Does the URL end with punctuation that should be stripped?
        
if (preg_match('/(.+?)([?!.,:*_~]+)$/'$url$matches)) {
            
// Add the punctuation later
            
$url $matches[1];
        }

        
// Does the URL end with something that looks like an entity reference?
        
if (preg_match('/(.+)(&[A-Za-z0-9]+;)$/'$url$matches)) {
            
$url $matches[1];
        }

        
// Does the URL need unmatched parens chopped off?
        
if (substr($url, -1) === ')' && ($diff self::diffParens($url)) > 0) {
            
$url substr($url0, -$diff);
        }

        
$cursor->advanceBy(mb_strlen($url'UTF-8'));

        
// Auto-prefix 'http(s)://' onto 'www' URLs
        
if (substr($url04) === 'www.') {
            
$inlineContext->getContainer()->appendChild(new Link($this->defaultProtocol '://' $url$url));

            return 
true;
        }

        
$inlineContext->getContainer()->appendChild(new Link($url$url));

        return 
true;
    }

    
/**
     * @psalm-pure
     */
    
private static function diffParens(string $content): int
    
{
        
// Scan the entire autolink for the total number of parentheses.
        // If there is a greater number of closing parentheses than opening ones,
        // we don’t consider ANY of the last characters as part of the autolink,
        // in order to facilitate including an autolink inside a parenthesis.
        
preg_match_all('/[()]/'$content$matches);

        
$charCount = ['(' => 0')' => 0];
        foreach (
$matches[0] as $char) {
            
$charCount[$char]++;
        }

        return 
$charCount[')'] - $charCount['('];
    }
}
Онлайн: 0
Реклама