# HG changeset patch # User Vincent Hatakeyama <vincent.hatakeyama@xcg-consulting.fr> # Date 1697556992 -7200 # Tue Oct 17 17:36:32 2023 +0200 # Node ID fa9ea4bf1da3d92c4c423326b5832fc8924d7cf7 # Parent b0c048c6325e7afe4b7c23291dd0781548f9d7d7 ✨ Multiprocess for checking/compiling/downloading diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -53,7 +53,7 @@ image: $TEMP_IMAGE script: - python3 -m pip install mypy types-PyYAML types-psycopg2 types-python-dateutil - types-tomli types-requests build twine + types-requests build twine - mypy odoo_scripts tests rules: - if: $CI_COMMIT_TAG == null diff --git a/NEWS.rst b/NEWS.rst --- a/NEWS.rst +++ b/NEWS.rst @@ -7,6 +7,8 @@ Use tomli instead of toml for python < 3.11. +Download wheel for python packages if able, caching them. Use multiprocess to speed up coping/compiling/downloading. + 20.9.0 ------ diff --git a/odoo_scripts/docker_build_copy.py b/odoo_scripts/docker_build_copy.py --- a/odoo_scripts/docker_build_copy.py +++ b/odoo_scripts/docker_build_copy.py @@ -2,12 +2,15 @@ """Tools to help build an image """ import argparse +import functools import logging import os import sys +import tempfile from collections.abc import Mapping +from multiprocessing import Pool from subprocess import DEVNULL, CalledProcessError, check_call, check_output -from typing import Dict, List, Optional, Tuple +from typing import List, Optional, Tuple, Union import hglib # type: ignore[import] @@ -152,116 +155,42 @@ packages_to_compile.append(package) else: packages_to_copy.append(package) - # TODO changer pour faire en parallèle via du yield ou autre chose - # avec plusieurs compilation/copy/recopiage en parallèle (2 à 4 de chaque). if download: - packages_to_download: Dict[ - str, List[Tuple[str, str]] - ] = find_packages_to_download(packages_to_compile + packages_to_copy) - # try with build if present, fallback to pip wheel otherwise - if packages_to_download: - _logger.info("%d package(s) to download", len(packages_to_download)) - downloaded_packages = download_packages( - orus_api_token, - packages_to_download, - requirements, - target, + downloaded_packages = [] + _do = functools.partial( + _search_and_download_package, orus_api_token=orus_api_token, target=target ) + for package_group in (packages_to_compile, packages_to_copy): + if package_group: + with Pool() as p: + results = p.map(_do, package_group) + for result in results: + if result is not None: + package, required = result + package_group.remove(package) + downloaded_packages.append(package) + requirements.extend(required) if downloaded_packages: _logger.info("%d packages downloaded", len(downloaded_packages)) - for package in downloaded_packages: - if package in packages_to_compile: - packages_to_compile.remove(package) - elif package in packages_to_copy: - packages_to_copy.remove(package) - else: - _logger.warning(f"Who asked to get {package}?") if packages_to_compile: _logger.info("Compiling %d packages", len(packages_to_compile)) - try: - # TODO pip wheel semble plus rapide - # build is not usable directly so use a subprocess. - # Disable flake8, this is just to test that the package is installed. - import build # type: ignore[import] # noqa: F401 - - cmd = [ - sys.executable, - "-m", - "build", - "--outdir", - target, - ] + packages_to_compile - _logger.debug(" ".join(cmd)) - check_call(cmd) - except ImportError: - # pip is not usable directly, it is indicated in its doc to use subprocess - # instead. - cmd = [ - sys.executable, - "-m", - "pip", - "wheel", - "--no-deps", - "-w", - target, - "--ignore-requires-python", - ] + packages_to_compile - _logger.debug(" ".join(cmd)) - check_call(cmd) - - try: - # Disable flake8, this is just to test that the package is installed - import twine # type: ignore[import] # noqa: F401 - - cmd = [sys.executable, "-m", "twine", "check", f"{target}/*"] - check_call(cmd) - except ImportError: - _logger.warning("No twine, no check done") + with Pool() as p: + compiled_requirements = p.map( + functools.partial(_compile_package, target=target), packages_to_compile + ) + for reqs in compiled_requirements: + requirements.extend(reqs) + if packages_to_copy: + _logger.info("Copying %d packages", len(packages_to_copy)) + with Pool() as p: + copied_requirements = p.map( + functools.partial(_copy_packages, target=target), packages_to_copy + ) + for copy_result in copied_requirements: + requirements.extend(copy_result) - for package in packages_to_copy: - cvs_parent = package - package_path_in_target = "" - while not ( - os.path.exists(os.path.join(cvs_parent, ".git")) - or os.path.exists(os.path.join(cvs_parent, ".hg")) - or os.path.exists(os.path.join(cvs_parent, ".hg_archival.txt")) - or os.path.exists(os.path.join(cvs_parent, ".git_archival.txt")) - ): - package_path_in_target = os.path.join( - os.path.basename(cvs_parent), package_path_in_target - ) - if os.path.dirname(cvs_parent) == "": - # stop at one level in the super project - break - cvs_parent = os.path.dirname(cvs_parent) - this_package_target = os.path.join(target, _target_path(cvs_parent)) - # Use hg archive - if os.path.exists(os.path.join(cvs_parent, ".hg")): - _logger.info( - "Using mercurial archive for %s to %s", - cvs_parent, - this_package_target, - ) - client = hglib.open(cvs_parent) - client.archive(this_package_target.encode("UTF-8")) - # fix for setuptools-scm/hatchling that do not like when the file has - # latesttag: null - check_call( - [ - "sed", - "-i", - "-s", - "s,latesttag: null,latesttag: 0,", - os.path.join(this_package_target, ".hg_archival.txt"), - ] - ) - else: - _copy_package(cvs_parent + os.path.sep, this_package_target) - requirements.append( - os.path.join(".", this_package_target, package_path_in_target) - ) - # Write requirements - # pip needs to be run with pip install -r <target>/requirements for the path in the + # Write requirements file + # pip needs to be run with pip install -r <target>/requirements for the paths in the # file to be valid with open(os.path.join(target, "requirements"), "wt") as f: f.write("\n".join(requirements)) @@ -291,27 +220,113 @@ list_modules() -def download_packages( - orus_api_token, - packages_to_download: Dict[str, List[Tuple[str, str]]], - requirements, - target, -) -> List[str]: - downloaded_packages: List[str] = [] - # Download - for package in packages_to_download: - _dl = _download_packages( - orus_api_token, packages_to_download[package], requirements, target +def _copy_packages(package, target) -> List[str]: + cvs_parent = package + package_path_in_target = "" + while not ( + os.path.exists(os.path.join(cvs_parent, ".git")) + or os.path.exists(os.path.join(cvs_parent, ".hg")) + or os.path.exists(os.path.join(cvs_parent, ".hg_archival.txt")) + or os.path.exists(os.path.join(cvs_parent, ".git_archival.txt")) + ): + package_path_in_target = os.path.join( + os.path.basename(cvs_parent), package_path_in_target ) - if _dl: - downloaded_packages.append(package) - return downloaded_packages + if os.path.dirname(cvs_parent) == "": + # stop at one level in the super project + break + cvs_parent = os.path.dirname(cvs_parent) + this_package_target = os.path.join(target, _target_path(cvs_parent)) + # Use hg archive + if os.path.exists(os.path.join(cvs_parent, ".hg")): + _logger.info( + "Using mercurial archive for %s to %s", + cvs_parent, + this_package_target, + ) + client = hglib.open(cvs_parent) + client.archive(this_package_target.encode("UTF-8")) + # fix for setuptools-scm/hatchling that do not like when the file has + # latesttag: null + check_call( + [ + "sed", + "-i", + "-s", + "s,latesttag: null,latesttag: 0,", + os.path.join(this_package_target, ".hg_archival.txt"), + ] + ) + else: + _copy_package(cvs_parent + os.path.sep, this_package_target) + return [os.path.join(".", this_package_target, package_path_in_target)] + + +def _compile_package(package_to_compile, target) -> List[str]: + return _compile_packages([package_to_compile], target) -def _download_packages( +def _compile_packages(packages_to_compile, target) -> List[str]: + with tempfile.TemporaryDirectory() as tmpdirname: + try: + # TODO pip wheel semble plus rapide + # build is not usable directly so use a subprocess. + # Disable flake8, this is just to test that the package is installed. + import build # type: ignore[import] # noqa: F401 + + cmd = [ + sys.executable, + "-m", + "build", + "--outdir", + tmpdirname, + ] + packages_to_compile + _logger.debug(" ".join(cmd)) + check_call(cmd) + except ImportError: + # pip is not usable directly, it is indicated in its doc to use subprocess + # instead. + cmd = [ + sys.executable, + "-m", + "pip", + "wheel", + "--no-deps", + "-w", + tmpdirname, + "--ignore-requires-python", + ] + packages_to_compile + _logger.debug(" ".join(cmd)) + check_call(cmd) + content = os.listdir(tmpdirname) + + try: + # Disable flake8, this is just to test that the package is installed + import twine # type: ignore[import] # noqa: F401 + + cmd = [sys.executable, "-m", "twine", "check", f"{target}/*"] + check_call(cmd) + except ImportError: + _logger.warning("No twine, no check done") + requirements = [] + for file in content: + # LINUX + check_call(["mv", os.path.join(tmpdirname, file), target]) + requirements.append(os.path.join(".", target, file)) + return requirements + + +def _search_and_download_package(package, orus_api_token, target) -> Union[None, Tuple]: + potential_packages: List[Tuple[str, str]] = _find_packages_to_download(package) + if potential_packages: + return _download_package(package, orus_api_token, potential_packages, target) + return None + + +def _download_package( + package, orus_api_token, - package_to_download, - requirements, + package_to_download: List[Tuple[str, str]], target, ): # Download @@ -324,6 +339,7 @@ ":all:", xcg_index_url(orus_api_token), ] + requirements = [] for package_name, version in package_to_download: try: download_target = cache_dir(package_name, version) @@ -343,21 +359,11 @@ # LINUX check_call(["cp", "-a", os.path.join(download_target, file), target]) requirements.append(os.path.join(".", target, file)) - return package_to_download + return package, requirements except CalledProcessError: # Ignore issues, mainly when the package version does not exist pass - return - - -def find_packages_to_download(packages) -> Dict[str, List[Tuple[str, str]]]: - packages_to_download: Dict[str, List[Tuple[str, str]]] = {} - _logger.info("Reading pyproject.toml to find out package names") - for package in packages: - _packages_to_download = _find_packages_to_download(package) - if _packages_to_download: - packages_to_download[package] = _packages_to_download - return packages_to_download + return None def _find_packages_to_download(package) -> List[Tuple[str, str]]: