Commit 71e01850 authored by Mathieu Coupé's avatar Mathieu Coupé
Browse files

Merge branch 'optimize-cache' into 'main'

Optimize branches/tags/MRs cache

See merge request to-be-continuous/tools/gitlab-butler!60
parents fedb931a 57ea9c19
Loading
Loading
Loading
Loading
+103 −45
Original line number Diff line number Diff line
@@ -23,6 +23,93 @@ class PipelineSourceEnum(Enum):
    TAG = 2
    MERGE_REQUEST = 3

# Cache to avoid requesting branch/tags/mr statuses multiple times
class ProjectSourcesCache:

    class Branch:
        def __init__(
                self,
                name: str,
                protected: bool,
                default: bool
        ) -> None:
            self.name: str = name
            self.protected: bool = protected
            self.default: bool = default

    class MergeRequest:
        def __init__(
                self,
                id: str,
                iid: str,
                source_branch: str,
                target_branch: str,
                state: str
        ) -> None:
            self.id: str = id
            self.iid: str = iid
            self.source_branch: str = source_branch
            self.target_branch: str = target_branch
            self.state: str = state

    class Tag:
        def __init__(
                self,
                name: str
        ) -> None:
            self.name: str = name

    def __init__(
            self,
            project: Project
    ) -> None:
        self.project = project
        self.tags: dict[str, ProjectSourcesCache.Tag] | None = None
        self.branches: dict[str, ProjectSourcesCache.Branch] | None = None
        self.merge_requests: dict[str, ProjectSourcesCache.MergeRequest] | None = None

    def from_branches(self, ref: str) -> Branch | None:
        if self.branches is None:
            # get list of branches, with protected and default statuses
            self.branches = dict()
            for branch in self.project.branches.list(iterator=True):
                self.branches[branch.name] = ProjectSourcesCache.Branch(name=branch.name, protected=branch.protected, default=branch.default)

        if ref in self.branches:
            return self.branches[ref]

        return None

    def from_tags(self, ref: str) -> Tag | None:
        if self.tags is None:
            # get list of tags
            self.tags = dict()
            for tag in self.project.tags.list(iterator=True):
                self.tags[tag.name] = ProjectSourcesCache.Tag(name=tag.name)

        if ref in self.tags:
            return self.tags[ref]

        return None

    def from_merge_request(self, merge_request_iid: str) -> MergeRequest:
        if self.merge_requests is None:
            # will store MR details, but we only fetch MR details on demand
            self.merge_requests = dict()

        # check if MR is already in the cache
        if merge_request_iid not in self.merge_requests:
            # get from API
            merge_request_from_api = self.project.mergerequests.get(merge_request_iid, lazy=False)
            # store MR in cache
            self.merge_requests[merge_request_iid] = ProjectSourcesCache.MergeRequest(id=merge_request_from_api.id,
                                                                                      iid=merge_request_from_api.iid,
                                                                                      source_branch=merge_request_from_api.source_branch,
                                                                                      target_branch=merge_request_from_api.target_branch,
                                                                                      state=merge_request_from_api.state)

        return self.merge_requests[merge_request_iid]


class PipelineSource:
    def __init__(
@@ -125,24 +212,6 @@ class Butler:
        self.handle_error(ValueError(f'Unknown reference format "{ref}"'))
        return None

    @staticmethod
    def build_caches(project: Project) -> dict:
        """
        Generates caches for a project : list of tags, list of merge-requests, list of branches
        """

        # get list of branches, with protected and default statuses
        branches = dict()
        for branch in project.branches.list(iterator=True):
            branches[branch.name] = ({'name': branch.name, 'protected': branch.protected, 'default': branch.default})

        # get list of tags
        tags = dict()
        for tag in project.tags.list(iterator=True):
            tags[tag.name] = ({'name': tag.name})

        return {'branches': branches, 'tags': tags, 'mr': dict()}

    def get_max_pipelines_for_source(self, pipeline: ProjectPipeline, cfg: ButlerCfg, source: PipelineSource) -> int:
        """
        Compute the limit for this pipeline source
@@ -247,7 +316,7 @@ class Butler:

        return True

    def find_pipeline_source_status(self, project: Project, pipeline: ProjectPipeline, caches: dict) -> PipelineSource | None:
    def find_pipeline_source_status(self, project: Project, pipeline: ProjectPipeline, caches: ProjectSourcesCache) -> PipelineSource | None:
        """
        Check if the source of the pipeline still exists
        """
@@ -259,35 +328,27 @@ class Butler:
                return None

            # check if MR is already in the cache and get it from API if needed
            if merge_request_iid not in caches['mr'].keys():
                merge_request_from_api = project.mergerequests.get(merge_request_iid, lazy=False)
                # store MR in cache
                caches['mr'][merge_request_iid] = {'id': merge_request_from_api.id, 'iid': merge_request_from_api.iid,
                                                   'source_branch': merge_request_from_api.source_branch,
                                                   'target_branch': merge_request_from_api.target_branch,
                                                   'state': merge_request_from_api.state}
            merge_request = caches.from_merge_request(merge_request_iid)

            merge_request = caches["mr"][merge_request_iid]
            # check MR state
            if self.verbose and self.debug:
                considered_as_existing = "no longer existing" if merge_request['state'] in ['closed', 'merged'] else "existing"
                print(f'Pipeline {pipeline.id} {pipeline.ref} is linked to MR {merge_request["iid"]} ({merge_request["state"]} state, considered as {considered_as_existing})')
                considered_as_existing = "no longer existing" if merge_request.state in ['closed', 'merged'] else "existing"
                print(f'Pipeline {pipeline.id} {pipeline.ref} is linked to MR {merge_request.iid} ({merge_request.state} state, considered as {considered_as_existing})')

            # MR considering as existing if state is not closed/merged
            return PipelineSource(existing=merge_request['state'] not in ['closed', 'merged'], source_type=PipelineSourceEnum.MERGE_REQUEST, state=merge_request["state"])
            return PipelineSource(existing=merge_request.state not in ['closed', 'merged'], source_type=PipelineSourceEnum.MERGE_REQUEST, state=merge_request.state)
        else:
            # pipeline from a tag ?
            if pipeline.ref in caches['tags'].keys():
                if self.verbose and self.debug:
                    print(f'Pipeline {pipeline.id} {pipeline.ref} is linked to tag {pipeline.ref} : {caches["tags"][pipeline.ref]}')
                return PipelineSource(existing=True, source_type=PipelineSourceEnum.TAG)

            # pipeline from a branch ?
            if pipeline.ref in caches['branches'].keys():
                branch = caches['branches'][pipeline.ref]
            if branch := caches.from_branches(pipeline.ref):
                if self.verbose and self.debug:
                    print(f'Pipeline {pipeline.id} {pipeline.ref} is linked to branch {pipeline.ref} : {branch}')
                return PipelineSource(existing=True, source_type=PipelineSourceEnum.BRANCH, default=branch['default'], protected=branch['protected'])
                return PipelineSource(existing=True, source_type=PipelineSourceEnum.BRANCH, default=branch.default, protected=branch.protected)

            # pipeline from a tag ?
            if tag := caches.from_tags(pipeline.ref):
                if self.verbose and self.debug:
                    print(f'Pipeline {pipeline.id} {pipeline.ref} is linked to tag {pipeline.ref} : {tag}')
                return PipelineSource(existing=True, source_type=PipelineSourceEnum.TAG)

        if self.verbose and self.debug:
            print(f'Pipeline {pipeline.id} {pipeline.ref} ({pipeline.source}) is not linked to any branch or tag')
@@ -380,11 +441,8 @@ class Butler:
        pipelines_deletions_attempts = 0
        pipelines_deletions_count = 0

        # build cache
        # list of existing elements may be long, it could be better to fetch the elements on the fly
        caches = self.build_caches(project)
        if self.verbose and self.debug:
            print(f'cached branches and tags = {caches}')
        # existing branches, tags and MR will be fetched the elements on the fly, only if needed and cached
        caches = ProjectSourcesCache(project)

        # list of known pipelines to keep
        kept_pipelines: dict[str, list[ProjectPipeline]] = {}