misp-galaxy/galaxy/lib/python3.12/site-packages/pip/_internal/index/sources.py

import logging
import mimetypes
import os
from collections import defaultdict
from typing import Callable, Dict, Iterable, List, Optional, Tuple

from pip._vendor.packaging.utils import (
    InvalidSdistFilename,
    InvalidVersion,
    InvalidWheelFilename,
    canonicalize_name,
    parse_sdist_filename,
    parse_wheel_filename,
)

from pip._internal.models.candidate import InstallationCandidate
from pip._internal.models.link import Link
from pip._internal.utils.urls import path_to_url, url_to_path
from pip._internal.vcs import is_url

logger = logging.getLogger(__name__)

FoundCandidates = Iterable[InstallationCandidate]
FoundLinks = Iterable[Link]
CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
PageValidator = Callable[[Link], bool]


class LinkSource:
    @property
    def link(self) -> Optional[Link]:
        """Returns the underlying link, if there's one."""
        raise NotImplementedError()

    def page_candidates(self) -> FoundCandidates:
        """Candidates found by parsing an archive listing HTML file."""
        raise NotImplementedError()

    def file_links(self) -> FoundLinks:
        """Links found by specifying archives directly."""
        raise NotImplementedError()


def _is_html_file(file_url: str) -> bool:
    return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"


class _FlatDirectoryToUrls:
    """Scans directory and caches results"""

    def __init__(self, path: str) -> None:
        self._path = path
        self._page_candidates: List[str] = []
        self._project_name_to_urls: Dict[str, List[str]] = defaultdict(list)
        self._scanned_directory = False

    def _scan_directory(self) -> None:
        """Scans directory once and populates both page_candidates
        and project_name_to_urls at the same time
        """
        for entry in os.scandir(self._path):
            url = path_to_url(entry.path)
            if _is_html_file(url):
                self._page_candidates.append(url)
                continue

            # File must have a valid wheel or sdist name,
            # otherwise not worth considering as a package
            try:
                project_filename = parse_wheel_filename(entry.name)[0]
            except (InvalidWheelFilename, InvalidVersion):
                try:
                    project_filename = parse_sdist_filename(entry.name)[0]
                except (InvalidSdistFilename, InvalidVersion):
                    continue

            self._project_name_to_urls[project_filename].append(url)
        self._scanned_directory = True

    @property
    def page_candidates(self) -> List[str]:
        if not self._scanned_directory:
            self._scan_directory()

        return self._page_candidates

    @property
    def project_name_to_urls(self) -> Dict[str, List[str]]:
        if not self._scanned_directory:
            self._scan_directory()

        return self._project_name_to_urls


class _FlatDirectorySource(LinkSource):
    """Link source specified by ``--find-links=<path-to-dir>``.

    This looks the content of the directory, and returns:

    * ``page_candidates``: Links listed on each HTML file in the directory.
    * ``file_candidates``: Archives in the directory.
    """

    _paths_to_urls: Dict[str, _FlatDirectoryToUrls] = {}

    def __init__(
        self,
        candidates_from_page: CandidatesFromPage,
        path: str,
        project_name: str,
    ) -> None:
        self._candidates_from_page = candidates_from_page
        self._project_name = canonicalize_name(project_name)

        # Get existing instance of _FlatDirectoryToUrls if it exists
        if path in self._paths_to_urls:
            self._path_to_urls = self._paths_to_urls[path]
        else:
            self._path_to_urls = _FlatDirectoryToUrls(path=path)
            self._paths_to_urls[path] = self._path_to_urls

    @property
    def link(self) -> Optional[Link]:
        return None

    def page_candidates(self) -> FoundCandidates:
        for url in self._path_to_urls.page_candidates:
            yield from self._candidates_from_page(Link(url))

    def file_links(self) -> FoundLinks:
        for url in self._path_to_urls.project_name_to_urls[self._project_name]:
            yield Link(url)


class _LocalFileSource(LinkSource):
    """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.

    If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
    the option, it is converted to a URL first. This returns:

    * ``page_candidates``: Links listed on an HTML file.
    * ``file_candidates``: The non-HTML file.
    """

    def __init__(
        self,
        candidates_from_page: CandidatesFromPage,
        link: Link,
    ) -> None:
        self._candidates_from_page = candidates_from_page
        self._link = link

    @property
    def link(self) -> Optional[Link]:
        return self._link

    def page_candidates(self) -> FoundCandidates:
        if not _is_html_file(self._link.url):
            return
        yield from self._candidates_from_page(self._link)

    def file_links(self) -> FoundLinks:
        if _is_html_file(self._link.url):
            return
        yield self._link


class _RemoteFileSource(LinkSource):
    """``--find-links=<url>`` or ``--[extra-]index-url=<url>``.

    This returns:

    * ``page_candidates``: Links listed on an HTML file.
    * ``file_candidates``: The non-HTML file.
    """

    def __init__(
        self,
        candidates_from_page: CandidatesFromPage,
        page_validator: PageValidator,
        link: Link,
    ) -> None:
        self._candidates_from_page = candidates_from_page
        self._page_validator = page_validator
        self._link = link

    @property
    def link(self) -> Optional[Link]:
        return self._link

    def page_candidates(self) -> FoundCandidates:
        if not self._page_validator(self._link):
            return
        yield from self._candidates_from_page(self._link)

    def file_links(self) -> FoundLinks:
        yield self._link


class _IndexDirectorySource(LinkSource):
    """``--[extra-]index-url=<path-to-directory>``.

    This is treated like a remote URL; ``candidates_from_page`` contains logic
    for this by appending ``index.html`` to the link.
    """

    def __init__(
        self,
        candidates_from_page: CandidatesFromPage,
        link: Link,
    ) -> None:
        self._candidates_from_page = candidates_from_page
        self._link = link

    @property
    def link(self) -> Optional[Link]:
        return self._link

    def page_candidates(self) -> FoundCandidates:
        yield from self._candidates_from_page(self._link)

    def file_links(self) -> FoundLinks:
        return ()


def build_source(
    location: str,
    *,
    candidates_from_page: CandidatesFromPage,
    page_validator: PageValidator,
    expand_dir: bool,
    cache_link_parsing: bool,
    project_name: str,
) -> Tuple[Optional[str], Optional[LinkSource]]:
    path: Optional[str] = None
    url: Optional[str] = None
    if os.path.exists(location):  # Is a local path.
        url = path_to_url(location)
        path = location
    elif location.startswith("file:"):  # A file: URL.
        url = location
        path = url_to_path(location)
    elif is_url(location):
        url = location

    if url is None:
        msg = (
            "Location '%s' is ignored: "
            "it is either a non-existing path or lacks a specific scheme."
        )
        logger.warning(msg, location)
        return (None, None)

    if path is None:
        source: LinkSource = _RemoteFileSource(
            candidates_from_page=candidates_from_page,
            page_validator=page_validator,
            link=Link(url, cache_link_parsing=cache_link_parsing),
        )
        return (url, source)

    if os.path.isdir(path):
        if expand_dir:
            source = _FlatDirectorySource(
                candidates_from_page=candidates_from_page,
                path=path,
                project_name=project_name,
            )
        else:
            source = _IndexDirectorySource(
                candidates_from_page=candidates_from_page,
                link=Link(url, cache_link_parsing=cache_link_parsing),
            )
        return (url, source)
    elif os.path.isfile(path):
        source = _LocalFileSource(
            candidates_from_page=candidates_from_page,
            link=Link(url, cache_link_parsing=cache_link_parsing),
        )
        return (url, source)
    logger.warning(
        "Location '%s' is ignored: it is neither a file nor a directory.",
        location,
    )
    return (url, None)
Linux commit 2024-08-20 16:46:04 +00:00			`import logging`
			`import mimetypes`
			`import os`
			`from collections import defaultdict`
			`from typing import Callable, Dict, Iterable, List, Optional, Tuple`

			`from pip._vendor.packaging.utils import (`
			`InvalidSdistFilename,`
			`InvalidVersion,`
			`InvalidWheelFilename,`
			`canonicalize_name,`
			`parse_sdist_filename,`
			`parse_wheel_filename,`
			`)`

			`from pip._internal.models.candidate import InstallationCandidate`
			`from pip._internal.models.link import Link`
			`from pip._internal.utils.urls import path_to_url, url_to_path`
			`from pip._internal.vcs import is_url`

			`logger = logging.getLogger(__name__)`

			`FoundCandidates = Iterable[InstallationCandidate]`
			`FoundLinks = Iterable[Link]`
			`CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]`
			`PageValidator = Callable[[Link], bool]`


			`class LinkSource:`
			`@property`
			`def link(self) -> Optional[Link]:`
			`"""Returns the underlying link, if there's one."""`
			`raise NotImplementedError()`

			`def page_candidates(self) -> FoundCandidates:`
			`"""Candidates found by parsing an archive listing HTML file."""`
			`raise NotImplementedError()`

			`def file_links(self) -> FoundLinks:`
			`"""Links found by specifying archives directly."""`
			`raise NotImplementedError()`


			`def _is_html_file(file_url: str) -> bool:`
			`return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"`


			`class _FlatDirectoryToUrls:`
			`"""Scans directory and caches results"""`

			`def __init__(self, path: str) -> None:`
			`self._path = path`
			`self._page_candidates: List[str] = []`
			`self._project_name_to_urls: Dict[str, List[str]] = defaultdict(list)`
			`self._scanned_directory = False`

			`def _scan_directory(self) -> None:`
			`"""Scans directory once and populates both page_candidates`
			`and project_name_to_urls at the same time`
			`"""`
			`for entry in os.scandir(self._path):`
			`url = path_to_url(entry.path)`
			`if _is_html_file(url):`
			`self._page_candidates.append(url)`
			`continue`

			`# File must have a valid wheel or sdist name,`
			`# otherwise not worth considering as a package`
			`try:`
			`project_filename = parse_wheel_filename(entry.name)[0]`
			`except (InvalidWheelFilename, InvalidVersion):`
			`try:`
			`project_filename = parse_sdist_filename(entry.name)[0]`
			`except (InvalidSdistFilename, InvalidVersion):`
			`continue`

			`self._project_name_to_urls[project_filename].append(url)`
			`self._scanned_directory = True`

			`@property`
			`def page_candidates(self) -> List[str]:`
			`if not self._scanned_directory:`
			`self._scan_directory()`

			`return self._page_candidates`

			`@property`
			`def project_name_to_urls(self) -> Dict[str, List[str]]:`
			`if not self._scanned_directory:`
			`self._scan_directory()`

			`return self._project_name_to_urls`


			`class _FlatDirectorySource(LinkSource):`
			"""Link source specified by ``--find-links=<path-to-dir>``.

			`This looks the content of the directory, and returns:`

			* ``page_candidates``: Links listed on each HTML file in the directory.
			* ``file_candidates``: Archives in the directory.
			`"""`

			`_paths_to_urls: Dict[str, _FlatDirectoryToUrls] = {}`

			`def __init__(`
			`self,`
			`candidates_from_page: CandidatesFromPage,`
			`path: str,`
			`project_name: str,`
			`) -> None:`
			`self._candidates_from_page = candidates_from_page`
			`self._project_name = canonicalize_name(project_name)`

			`# Get existing instance of _FlatDirectoryToUrls if it exists`
			`if path in self._paths_to_urls:`
			`self._path_to_urls = self._paths_to_urls[path]`
			`else:`
			`self._path_to_urls = _FlatDirectoryToUrls(path=path)`
			`self._paths_to_urls[path] = self._path_to_urls`

			`@property`
			`def link(self) -> Optional[Link]:`
			`return None`

			`def page_candidates(self) -> FoundCandidates:`
			`for url in self._path_to_urls.page_candidates:`
			`yield from self._candidates_from_page(Link(url))`

			`def file_links(self) -> FoundLinks:`
			`for url in self._path_to_urls.project_name_to_urls[self._project_name]:`
			`yield Link(url)`


			`class _LocalFileSource(LinkSource):`
			"""``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.

			If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
			`the option, it is converted to a URL first. This returns:`

			* ``page_candidates``: Links listed on an HTML file.
			* ``file_candidates``: The non-HTML file.
			`"""`

			`def __init__(`
			`self,`
			`candidates_from_page: CandidatesFromPage,`
			`link: Link,`
			`) -> None:`
			`self._candidates_from_page = candidates_from_page`
			`self._link = link`

			`@property`
			`def link(self) -> Optional[Link]:`
			`return self._link`

			`def page_candidates(self) -> FoundCandidates:`
			`if not _is_html_file(self._link.url):`
			`return`
			`yield from self._candidates_from_page(self._link)`

			`def file_links(self) -> FoundLinks:`
			`if _is_html_file(self._link.url):`
			`return`
			`yield self._link`


			`class _RemoteFileSource(LinkSource):`
			"""``--find-links=<url>`` or ``--[extra-]index-url=<url>``.

			`This returns:`

			* ``page_candidates``: Links listed on an HTML file.
			* ``file_candidates``: The non-HTML file.
			`"""`

			`def __init__(`
			`self,`
			`candidates_from_page: CandidatesFromPage,`
			`page_validator: PageValidator,`
			`link: Link,`
			`) -> None:`
			`self._candidates_from_page = candidates_from_page`
			`self._page_validator = page_validator`
			`self._link = link`

			`@property`
			`def link(self) -> Optional[Link]:`
			`return self._link`

			`def page_candidates(self) -> FoundCandidates:`
			`if not self._page_validator(self._link):`
			`return`
			`yield from self._candidates_from_page(self._link)`

			`def file_links(self) -> FoundLinks:`
			`yield self._link`


			`class _IndexDirectorySource(LinkSource):`
			"""``--[extra-]index-url=<path-to-directory>``.

			This is treated like a remote URL; ``candidates_from_page`` contains logic
			for this by appending ``index.html`` to the link.
			`"""`

			`def __init__(`
			`self,`
			`candidates_from_page: CandidatesFromPage,`
			`link: Link,`
			`) -> None:`
			`self._candidates_from_page = candidates_from_page`
			`self._link = link`

			`@property`
			`def link(self) -> Optional[Link]:`
			`return self._link`

			`def page_candidates(self) -> FoundCandidates:`
			`yield from self._candidates_from_page(self._link)`

			`def file_links(self) -> FoundLinks:`
			`return ()`


			`def build_source(`
			`location: str,`
			`*,`
			`candidates_from_page: CandidatesFromPage,`
			`page_validator: PageValidator,`
			`expand_dir: bool,`
			`cache_link_parsing: bool,`
			`project_name: str,`
			`) -> Tuple[Optional[str], Optional[LinkSource]]:`
			`path: Optional[str] = None`
			`url: Optional[str] = None`
			`if os.path.exists(location): # Is a local path.`
			`url = path_to_url(location)`
			`path = location`
			`elif location.startswith("file:"): # A file: URL.`
			`url = location`
			`path = url_to_path(location)`
			`elif is_url(location):`
			`url = location`

			`if url is None:`
			`msg = (`
			`"Location '%s' is ignored: "`
			`"it is either a non-existing path or lacks a specific scheme."`
			`)`
			`logger.warning(msg, location)`
			`return (None, None)`

			`if path is None:`
			`source: LinkSource = _RemoteFileSource(`
			`candidates_from_page=candidates_from_page,`
			`page_validator=page_validator,`
			`link=Link(url, cache_link_parsing=cache_link_parsing),`
			`)`
			`return (url, source)`

			`if os.path.isdir(path):`
			`if expand_dir:`
			`source = _FlatDirectorySource(`
			`candidates_from_page=candidates_from_page,`
			`path=path,`
			`project_name=project_name,`
			`)`
			`else:`
			`source = _IndexDirectorySource(`
			`candidates_from_page=candidates_from_page,`
			`link=Link(url, cache_link_parsing=cache_link_parsing),`
			`)`
			`return (url, source)`
			`elif os.path.isfile(path):`
			`source = _LocalFileSource(`
			`candidates_from_page=candidates_from_page,`
			`link=Link(url, cache_link_parsing=cache_link_parsing),`
			`)`
			`return (url, source)`
			`logger.warning(`
			`"Location '%s' is ignored: it is neither a file nor a directory.",`
			`location,`
			`)`
			`return (url, None)`