Commit c8b840dd authored by Ronny Moreas's avatar Ronny Moreas Committed by Pierre Smeyers
Browse files

feat: exclude files from source based on .cnbignore file

parent 9c83037a
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -155,6 +155,12 @@ It uses the following variable:
| `app-dir` / `CNB_APP_DIR` | Absolute root directory in final image     | `/workspace`  |
| `src-app-dir` / `CNB_SRC_APP_DIR` | Relative path to the application source code base directory in your repository     | `.`           |

All files from your application source code base directory (as defined by the `CNB_SRC_APP_DIR` variable) are copied to the build workspace directory (`CNB_APP_DIR`) prior to building the image.

You may use a `.cnbignore` file in the source base directory to exclude files and directories from being copied to the build workspace. `.cnbignore` uses the same syntax as `.gitignore`, but supports only a subset of its features depending on the tools available in the builder image. For instance negation patterns
are only supported if the builder image includes the `rsync` command (e.g. [`heroku/builder:24`](https://github.com/heroku/builder)).

The `.git` directory is always excluded from the build context, even if not specified in `.cnbignore`.

<!--
In addition to the above variables, the CNB template also supports all configuration means supported
+192 −2
Original line number Diff line number Diff line
@@ -461,6 +461,197 @@ stages:
    configure_registries_auth
  }

  # cnbignore_to_find_patterns <INPUT_FILE> <OUTPUT_FILE>
  # Converts a .cnbignore-style file to find-compatible patterns.
  # Empty lines and comments are ignored. Leading '/' are made relative to source dir.
  # Negation patterns are not supported.
  function cnbignore_to_find_patterns() {
    local in="$1"
    local out="$2"

    printf '%s\n' "./.git" > "${out}"  # Exclude .git by default for parity with pack
    if [[ ! -f "${in}" ]]; then
      return 0
    fi

    while IFS= read -r rawline || [[ -n "$rawline" ]]; do
      # trim spaces
      line="${rawline#"${rawline%%[![:space:]]*}"}"
      line="${line%"${line##*[![:space:]]}"}"
      # skip blanks & comments
      [[ -z "${line}" || "${line}" =~ ^# ]] && continue

      if [[ "${line:0:1}" == "!" ]]; then
        log_warn "cnbignore_to_find_patterns: Negation patterns are not supported in find"
        continue
      fi
      if [[ "${line:0:1}" == "/" ]]; then
        # Absolute path pattern (relative to SRC_DIR)
        line=".${line}"
      else
        # General patterns: match anywhere in the directory tree
        line="**/${line}"
      fi
      if [[ "${line: -1}" == "/" ]]; then
        # Directory pattern: match the directory and its contents
        line="${line}*"
      fi
      printf '%s\n' "${line}" >> "${out}"
    done < "${in}"
  }

  # cnpignore_to_tar_exclude <INPUT_FILE> <OUTPUT_FILE>
  # Converts a .cnbignore-style file to a tar --exclude-vcs-ignores exclude file.
  # Empty lines and comments are ignored. Leading '/' are made relative to source dir.
  # Negation patterns are not supported.
  function cnbignore_to_tar_exclude() {
    local in="$1"
    local out="$2"
    : > "${out}"
    if [[ ! -f "${in}" ]]; then
      return 0
    fi
    while IFS= read -r rawline || [[ -n "$rawline" ]]; do
      # trim spaces
      line="${rawline#"${rawline%%[![:space:]]*}"}"
      line="${line%"${line##*[![:space:]]}"}"
      # skip blanks & comments
      [[ -z "${line}" || "${line}" =~ ^# ]] && continue
      if [[ "${line:0:1}" == "!" ]]; then
        log_warn "cnbignore_to_tar_exclude: Negation patterns are not supported in tar exclude files"
        continue
      fi
      if [[ "${line:0:1}" == "/" ]]; then
        # Make absolute paths relative to the source dir for tar
        line=".${line}"
      fi
      printf '%s\n' "${line}" >> "${out}"
    done < "${in}"
  }

  # gitignore_to_rsync_filter <INPUT_FILE> <OUTPUT_FILE>
  # Converts a .gitignore-style file to an rsync filter file.
  # Rules in the output file follow rsync filter syntax:
  #   + pattern  (include)
  #   - pattern  (exclude)
  # Empty lines and comments are ignored; '**' is supported by rsync.
  function cnbignore_to_rsync_filter() {
    local in="$1"
    local out="$2"
    echo "- .git" > "${out}"  # Exclude .git by default for parity with pack
    if [[ ! -f "${in}" ]]; then
      return 0
    fi
    # Read input line by line
    while IFS= read -r rawline || [[ -n "$rawline" ]]; do
      # Trim leading/trailing whitespace
      line="${rawline#"${rawline%%[![:space:]]*}"}"
      line="${line%"${line##*[![:space:]]}"}"

      # Skip empty lines and comments
      [[ -z "$line" || "${line:0:1}" == "#" ]] && continue

      # Gitignore also does not allow comments with ' #' in the middle by default,
      # but we keep everything after '#' because patterns can contain spaces.
      # (Adjust if needed.)
      if [[ "${line:0:1}" == "!" ]]; then
        # Negation (un-ignore) -> include in rsync
        pat="${line:1}"
        # Trim whitespace again
        pat="${pat#"${pat%%[![:space:]]*}"}"
        pat="${pat%"${pat##*[![:space:]]}"}"
        [[ -z "$pat" ]] && continue

        # If it's a directory pattern (trailing /), include the dir and its contents
        if [[ "${pat: -1}" == "/" ]]; then
          # Include the directory itself
          echo "+ $pat" >> "$out"
          # Include all underlying contents
          echo "+ ${pat}**" >> "$out"
        else
          echo "+ $pat" >> "$out"
        fi
      else
        # Normal ignore -> exclude in rsync
        pat="$line"
        # If it's a directory pattern (trailing /), exclude the dir and contents
        if [[ "${pat: -1}" == "/" ]]; then
          # Exclude the directory itself
          echo "- $pat" >> "$out"
          # Exclude all underlying contents (rsync supports ** in filters)
          echo "- ${pat}**" >> "$out"
        else
          echo "- $pat" >> "$out"
        fi
      fi
    done < "$in"
  }

  # copy_to_cnb_workspace <SRC_DIR> <DST_DIR>
  # Copies source code from SRC_DIR into the CNB workspace at DST_DIR,
  # excluding patterns listed in SRC_DIR/.cnbignore.
  #
  # .cnbignore format:
  #   - One pattern per line (gitignore-style glob).
  #   - Lines starting with '#' are comments; blank lines ignored.
  #   - Patterns are relative to SRC_DIR.
  #
  # Behavior:
  #   - If .cnbignore is absent/empty, copy everything.
  #   - Exclude .git/ by default for parity with pack’s default VCS filtering.
  #   - Uses rsync if available; otherwise tar --exclude-vcs-ignores; otherwise cp then find+rm fallback.
  #
  # Notes:
  #   - `!negation` is only supported when using rsync.
  #   - Leading '/' in patterns is treated as relative to SRC_DIR.
  copy_to_cnb_workspace() {
    assert_defined "${1}" "copy_to_cnb_workspace: SRC_DIR is required"
    assert_defined "${2}" "copy_to_cnb_workspace: DST_DIR is required"

    local SRC_DIR="${1}"
    local DST_DIR="${2}"
    mkdir -p "${DST_DIR}"

    local IGN_FILE="${SRC_DIR}/.cnbignore"
    
    # 1) Best path: rsync with --filter
    if command -v rsync >/dev/null 2>&1; then
      log_info "copy_to_cnb_workspace: Using rsync with .cnbignore excludes"
      local RSYNC_FILTER_FILE
      RSYNC_FILTER_FILE="$(mktemp)"
      cnbignore_to_rsync_filter "${IGN_FILE}" "${RSYNC_FILTER_FILE}"
      rsync -r --delete \
        --filter="merge ${RSYNC_FILTER_FILE}" \
        "${SRC_DIR}/" "${DST_DIR}/"
      rm -f "${RSYNC_FILTER_FILE}"
      return 0
    fi

    # 2) Fallback: tar with --exclude-vcs-ignores
    if tar --exclude-vcs-ignores -cf /dev/null /dev/null 2>/dev/null; then
      log_info "copy_to_cnb_workspace: Using tar with .cnbignore excludes"
      local TAR_EXCL_FILE
      TAR_EXCL_FILE="$(mktemp)"
      cnbignore_to_tar_exclude "${IGN_FILE}" "${TAR_EXCL_FILE}"
      cp "${TAR_EXCL_FILE}" "${SRC_DIR}/.gitignore"
      (cd "$SRC_DIR" && tar --exclude-vcs --exclude-vcs-ignores -cf - .) | (cd "$DST_DIR" && tar xf -)
      rm -f "${TAR_EXCL_FILE}"
      return 0
    fi

    # 3) Last resort: copy then remove matches via find
    log_info "copy_to_cnb_workspace: Last resort, using cp + find to apply .cnbignore excludes"
    cp -r "${SRC_DIR}/." "${DST_DIR}/"
    local FIND_PATTERNS_FILE
    FIND_PATTERNS_FILE="$(mktemp)"
    cnbignore_to_find_patterns "${IGN_FILE}" "${FIND_PATTERNS_FILE}"
    while IFS= read -r pat; do
      ( cd "${DST_DIR}" && \
        find . -path "${pat}" -prune -exec rm -rf {} + ) || true
    done < "${FIND_PATTERNS_FILE}"
    rm -f "${FIND_PATTERNS_FILE}"
  }

  # ENDSCRIPT

# job prototype
@@ -480,8 +671,7 @@ cnb-build:
  script:
    # required to allow Git operations while not owner
    - git config --global --add safe.directory $CI_PROJECT_DIR
    # Copy sources in $CNB_APP_DIR to avoid having the GitLab CI directory structure in the final image
    - cp -r $CI_PROJECT_DIR/$CNB_SRC_APP_DIR/. $CNB_APP_DIR
    - copy_to_cnb_workspace "$CI_PROJECT_DIR/$CNB_SRC_APP_DIR" "$CNB_APP_DIR"
    - cnb_repository=${CNB_SNAPSHOT_IMAGE%:*}
    - cnb_tag=${CNB_SNAPSHOT_IMAGE##*:}
    # if not set: build cache image from snapshot image