Commit 3f2fc43b authored by Protocole's avatar Protocole Committed by Thomas Boni
Browse files

Resolve "Add an antivirus verification for every docker image in every job"

parent bfe2b0f0
Loading
Loading
Loading
Loading
+67 −0
Original line number Diff line number Diff line
stages:
  - static_tests
  - merge_tests
  - project_setup
  - build
  - dynamic_tests
@@ -250,3 +251,69 @@ release:
    done
  rules:
    - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH && $CI_PROJECT_PATH_SLUG == "r2devops-hub"'

refresh_job_av_database:
  image: python:3.9.1-alpine
  stage: static_tests
  variables:
    PIPENV_PIPFILE: tools/job_av/Pipfile
    PYTHONPATH: "./:${PYTHONPATH}"
  script:
    - apk add --no-cache python3 py3-pip clamav util-linux
    - pip install --ignore-installed distlib pipenv
    - pipenv install

    # Updating the ClamAV database to push it into the cache
    - freshclam

    # Runner cannot access /var/lib/clamav, so we need to copy it
    # in order to cache it properly
    - mkdir freshclam_db
    - cp /var/lib/clamav/* freshclam_db
  cache:
    key: "clamav-db"
    paths:
      - freshclam_db
    policy: push
  rules:
    - if: '$CI_PIPELINE_SOURCE == "schedule" && $CI_CLAMAV_PIPELINE && $CI_PROJECT_PATH_SLUG == "r2devops-hub"'

generate_job_av:
  image: python:3.9.1-alpine
  stage: static_tests
  variables:
    PIPENV_PIPFILE: tools/job_av/Pipfile
    JOB_LOGFILE: "generate_job.log"
    GENERATED_YAML: "generated-gitlab-ci.yml"
    PYTHONPATH: "./:${PYTHONPATH}"
    SCANNED_IMAGES_FILE: "clamav_scanned_images.json"
  script:
    - if [ -f "${SCANNED_IMAGES_FILE}" ]; then cat ${SCANNED_IMAGES_FILE}; fi
    - apk add --no-cache python3 py3-pip util-linux
    - pip install --ignore-installed distlib pipenv
    - pipenv install

    - pipenv run python3 tools/job_av/job_av.py
    - cat $GENERATED_YAML
  artifacts:
    paths:
      - ${GENERATED_YAML}
    expire_in: 30 days
    when: always
  cache:
    key: "clamav-scanned-jobs"
    paths:
      - clamav_scanned_images.json
    policy: pull-push
  rules:
    - if: '$CI_MERGE_REQUEST_EVENT_TYPE == "merge_train" && $CI_PROJECT_PATH_SLUG == "r2devops-hub"'

child_job_av:
  stage: merge_tests
  trigger:
    include:
      - artifact: generated-gitlab-ci.yml
        job: generate_job_av
    strategy: depend
  rules:
    - if: '$CI_MERGE_REQUEST_EVENT_TYPE == "merge_train" && $CI_PROJECT_PATH_SLUG == "r2devops-hub"'

tools/job_av/Pipfile

0 → 100644
+10 −0
Original line number Diff line number Diff line
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true

[requires]
python_version = "3"

[packages]
pyyaml = "==5.3.1"
+52 −0
Original line number Diff line number Diff line
stages:
  - static_tests

workflow:
  rules:
    - if: $CI_MERGE_REQUEST_IID
    - if: $CI_COMMIT_TAG
    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH

job_av:
  image: docker:19.03
  stage: static_tests
  services:
    - name: docker:19.03-dind
      entrypoint: ["env", "-u", "DOCKER_HOST"]
      command: ["dockerd-entrypoint.sh"]
  variables:
    PIPENV_PIPFILE: tools/job_image/Pipfile
    AV_LOGFILE: "clamav.log"

    DOCKER_HOST: tcp://docker:2375
    DOCKER_DRIVER: overlay2
    DOCKER_TLS_CERTDIR: ""
  script:
    - apk add --no-cache python3 py3-pip clamav util-linux
    - pip install --ignore-installed distlib pipenv
    - pipenv install
    # Moving cache into the proper directory
    # See .gitlab-ci.yml for more info about it in `job_av`
    - mkdir -p /var/lib/clamav
    - cp freshclam_db/* /var/lib/clamav/
    # Add cut of `@` for specific image tags (eg: `lighthouse@sha...`)
    - JOB=$(basename ${IMAGE} | cut -d':' -f1 | cut -d'@' -f1)
    - docker create --name ${JOB} ${IMAGE}
    - docker export ${JOB} > ${JOB}.tar
    - mkdir ${JOB}
    - tar -xf ${JOB}.tar -C ${JOB}
    - clamscan -ir ${JOB} --max-filesize=50M > ${JOB}-scan.log 2>&1
    - cat ${JOB}-scan.log
  artifacts:
    paths:
      - "*-scan.log"
    expire_in: 30 days
    when: always
  cache:
    key: "clamav-db"
    paths:
      - freshclam_db
    policy: pull
  parallel:
    matrix:
      - IMAGE: []
 No newline at end of file

tools/job_av/job_av.py

0 → 100644
+164 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

import os
import logging
import json
import sys
import copy
import argparse
import yaml
from tools.job_image.job_image import get_image

# Import the Config module and set the path to run the script from root project
# /!\ This instruction is only working if you run this script from the root of the project
sys.path.insert(0, "./")
from tools.utils.utils import Config

utils = Config()

# Job variables
CI_FILENAME = os.getenv("GENERATED_YAML")
PARALLEL_LIMIT = 50

BASE_FILE = "base-gitlab-ci.yml"
JOB_DIR = "job_av"

SCANNED_IMAGES_FILE = os.getenv("SCANNED_IMAGES_FILE")
SCANNED_IMAGES = []

BLACKLIST = ["github/super-linter:v3.14.3", "shiftleft/sast-scan:v1.9.29"]


def argparse_setup():
    """Setup argparse

    Return
    ------
    obj
        Python object with arguments parsed
    """
    parser = argparse.ArgumentParser()
    return parser.parse_args()


def get_scanned_images():
    """Fetch already scanned images by
    the schedule pipeline

    :return: string[] with a list of scanned images
    """
    try:
        with open(SCANNED_IMAGES_FILE, "r") as images_file:
            scanned_image = json.load(images_file)
            logging.info("Scanned images list: %s", scanned_image)
            return scanned_image
    except OSError:
        logging.warning("There isn't any file containing already scanned images")
        with open(SCANNED_IMAGES_FILE, "w") as new_file:
            json.dump([], new_file)
            return []


def set_default_image(current_ci):
    """When there is no image to scan
    We replace the base-gitlab-ci.yml into a
    simple child pipeline job, saying "nothing to run"

    Based on test this "default job" lasts 20 seconds.
    """
    current_ci["job_av"]["script"] = []
    current_ci["job_av"]["script"].append("echo 'There is not any image to scan, finishing...'")

    current_ci["job_av"].pop("services")
    current_ci["job_av"].pop("artifacts")
    current_ci["job_av"].pop("cache")
    current_ci["job_av"].pop("parallel")
    current_ci["job_av"]["image"] = "alpine:3.13.2"

    logging.info("No image to scan, putting default useless child job")

def save_scanned_images(already_scanned):
    """Save the scanned images into
    needed file

    :param scanned_images: array with a list of image
    :return:
    """
    try:
        with open(SCANNED_IMAGES_FILE, 'w') as images_file:
            json.dump(already_scanned, images_file)

        logging.info("Saved successfully scanned images in this pipeline")
        logging.info("There is %s images now scanned", len(already_scanned))
    except OSError as error:
        logging.error("Failed to write new scanned images file %s", error)
        sys.exit(1)


if __name__ == "__main__":
    """Main function, iterate over the jobs to get their image
    and write a .gitlab-ci.yml that can run a child pipeline
    in order to use ClamAV for virus detection

    Returns
    -------
    0
        On success
    1
        On error
    """
    # Setup argparse
    args = argparse_setup()

    # Setup logging
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] %(message)s",
        handlers=[
            logging.FileHandler(utils.LOGFILE_NAME),
            logging.StreamHandler()
        ]
    )

    logging.info("Getting all images from every job in the hub")
    jobs = os.listdir(utils.JOBS_DIR)
    scanned_images = get_scanned_images()
    images = [
    ]

    for job in jobs:
        image = get_image(job)
        logging.info("Checking %s image", image)
        if (image is not None and image not in images
            and image not in BLACKLIST and image not in scanned_images):
            logging.info("Adding %s image to list of images to scan", image)
            images.append(image)

    logging.info("There is %s images to scan in this pipeline", len(images))

    image_chunks = [images[x: x + PARALLEL_LIMIT] for x in range(0, len(images), PARALLEL_LIMIT)]
    save_scanned_images(images + scanned_images)

    ci_file = {}
    with open(f"{utils.TOOLS_DIR}/{JOB_DIR}/{BASE_FILE}") as file:
        ci_file = yaml.load(file, Loader=yaml.FullLoader)

    logging.info("Creating jobs for each chunks of %i images to run in parallel", PARALLEL_LIMIT)
    for chunk_index, chunk in enumerate(image_chunks):
        if chunk_index == 0:
            for image_index, image in enumerate(chunk):
                logging.info("Adding image %s to job n°%i", image, chunk_index)
                ci_file["job_av"]["parallel"]["matrix"][0]["IMAGE"].append(image)
        else:
            ci_file[f"job_av_{chunk_index}"] = copy.deepcopy(ci_file["job_av"])
            ci_file[f"job_av_{chunk_index}"]["parallel"]["matrix"][0]["IMAGE"] = []
            for image_index, image in enumerate(chunk):
                logging.info("Adding image %s to job n°%i", image, chunk_index)
                ci_file[f"job_av_{chunk_index}"]["parallel"]["matrix"][0]["IMAGE"].append(image)

    if not images:
        set_default_image(ci_file)

    logging.info("Writing %s file", CI_FILENAME)
    with open(CI_FILENAME, "w+") as file:
        yaml.dump(ci_file, file, sort_keys=False)
+29 −0
Original line number Diff line number Diff line
@@ -24,6 +24,34 @@ def argparse_setup():
    parser.add_argument("job", help="job name to get the image from")
    return parser.parse_args()

def get_image(job):
    """Get the image of a job

    Parameters
    ----------
    job
        Job name

    Return
    ------
    str
        The string of the image of the job, or empty
    """
    logging.info(f"Getting the image for job {job}")

    with open(f"{utils.JOBS_DIR}/{job}/{job}.yml", 'r') as file:
        data = yaml.load(file, Loader=yaml.FullLoader)
        if "image" in data[job].keys():
            if isinstance(data[job]['image'], dict):
                return data[job]['image']['name']
            else:
                return data[job]['image']
        elif "extends" in data[job].keys():
            if isinstance(data[data[job]['extends']]['image'], dict):
                return data[data[job]['extends']]['image']['name']
            else:
                return data[data[job]['extends']]['image']

if __name__ == "__main__":
    """Main function, get the name of the image for a job

@@ -81,3 +109,4 @@ if __name__ == "__main__":
        except KeyError :
            logging.warning('The job %s doesn\'t declare its image and extends a job from outside of the file, we aren\'t able to check its image vulnerabilities', args.job)
            # TODO: check images from included jobs ==> https://gitlab.com/r2devops/hub/-/issues/282