mirror of https://github.com/k3s-io/k3s
Speed up pre-commit boilerplate by only checking changed files
Although the boilerplate checker was very fast it can be faster. With this change we can hand the boilerplate a list of files which need to be checked or give it no files. If given no files it will run all files in the repo. Before you had to explicitly tell the boiler checker the 'extention' of the the files. In this case we let the checker figure it out and load the headers as needed. Doing the whole repo takes about 0.4 seconds. Doing a single go file takes < .04 seconds.pull/6/head
parent
c367d3c2e5
commit
8a91d86783
|
@ -16,22 +16,44 @@
|
|||
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import mmap
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
def PrintError(*err):
|
||||
print(*err, file=sys.stderr)
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("filenames", help="list of files to check, all files if unspecified", nargs='*')
|
||||
args = parser.parse_args()
|
||||
|
||||
def file_passes(filename, extension, ref, regexs):
|
||||
rootdir = os.path.dirname(__file__) + "/../../"
|
||||
rootdir = os.path.abspath(rootdir)
|
||||
|
||||
def get_refs():
|
||||
refs = {}
|
||||
for path in glob.glob(os.path.join(rootdir, "hack/boilerplate/boilerplate.*.txt")):
|
||||
extension = os.path.basename(path).split(".")[1]
|
||||
|
||||
ref_file = open(path, 'r')
|
||||
ref = ref_file.read().splitlines()
|
||||
ref_file.close()
|
||||
refs[extension] = ref
|
||||
|
||||
return refs
|
||||
|
||||
def file_passes(filename, refs, regexs):
|
||||
try:
|
||||
f = open(filename, 'r')
|
||||
except:
|
||||
return False
|
||||
|
||||
data = f.read()
|
||||
f.close()
|
||||
|
||||
extension = file_extension(filename)
|
||||
ref = refs[extension]
|
||||
|
||||
# remove build tags from the top of Go files
|
||||
if extension == "go":
|
||||
|
@ -70,25 +92,48 @@ def file_passes(filename, extension, ref, regexs):
|
|||
|
||||
return True
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 3:
|
||||
PrintError("usage: %s extension FILENAME [FILENAMES]" % sys.argv[0])
|
||||
return False
|
||||
def file_extension(filename):
|
||||
return os.path.splitext(filename)[1].split(".")[-1].lower()
|
||||
|
||||
basedir = os.path.dirname(os.path.abspath(__file__))
|
||||
skipped_dirs = ['Godeps', 'third_party', '_output', '.git']
|
||||
def normalize_files(files):
|
||||
newfiles = []
|
||||
for pathname in files:
|
||||
if any(x in pathname for x in skipped_dirs):
|
||||
continue
|
||||
newfiles.append(pathname)
|
||||
for i, pathname in enumerate(newfiles):
|
||||
if not os.path.isabs(pathname):
|
||||
newfiles[i] = os.path.join(rootdir, pathname)
|
||||
return newfiles
|
||||
|
||||
extension = sys.argv[1]
|
||||
# argv[0] is the binary, argv[1] is the extension (go, sh, py, whatever)
|
||||
filenames = sys.argv[2:]
|
||||
def get_files(extensions):
|
||||
files = []
|
||||
if len(args.filenames) > 0:
|
||||
files = args.filenames
|
||||
else:
|
||||
for root, dirs, walkfiles in os.walk(rootdir):
|
||||
# don't visit certain dirs. This is just a performance improvement
|
||||
# as we would prune these later in normalize_files(). But doing it
|
||||
# cuts down the amount of filesystem walking we do and cuts down
|
||||
# the size of the file list
|
||||
for d in skipped_dirs:
|
||||
if d in dirs:
|
||||
dirs.remove(d)
|
||||
|
||||
ref_filename = basedir + "/boilerplate." + extension + ".txt"
|
||||
try:
|
||||
ref_file = open(ref_filename, 'r')
|
||||
except:
|
||||
# No boilerplate template is success
|
||||
return True
|
||||
ref = ref_file.read().splitlines()
|
||||
for name in walkfiles:
|
||||
pathname = os.path.join(root, name)
|
||||
files.append(pathname)
|
||||
|
||||
files = normalize_files(files)
|
||||
outfiles = []
|
||||
for pathname in files:
|
||||
extension = file_extension(pathname)
|
||||
if extension in extensions:
|
||||
outfiles.append(pathname)
|
||||
return outfiles
|
||||
|
||||
def get_regexs():
|
||||
regexs = {}
|
||||
# Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
|
||||
regexs["year"] = re.compile( 'YEAR' )
|
||||
|
@ -98,9 +143,15 @@ def main():
|
|||
regexs["go_build_constraints"] = re.compile(r"^(// \+build.*\n)+\n", re.MULTILINE)
|
||||
# strip #!.* from shell scripts
|
||||
regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
|
||||
return regexs
|
||||
|
||||
def main():
|
||||
regexs = get_regexs()
|
||||
refs = get_refs()
|
||||
filenames = get_files(refs.keys())
|
||||
|
||||
for filename in filenames:
|
||||
if not file_passes(filename, extension, ref, regexs):
|
||||
if not file_passes(filename, refs, regexs):
|
||||
print(filename, file=sys.stdout)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -21,33 +21,7 @@ set -o pipefail
|
|||
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/..
|
||||
boiler="${KUBE_ROOT}/hack/boilerplate/boilerplate.py"
|
||||
|
||||
cd ${KUBE_ROOT}
|
||||
|
||||
find_files() {
|
||||
local ext=$1
|
||||
find . -not \( \
|
||||
\( \
|
||||
-wholename './output' \
|
||||
-o -wholename './_output' \
|
||||
-o -wholename './release' \
|
||||
-o -wholename './target' \
|
||||
-o -wholename './.git' \
|
||||
-o -wholename '*/third_party/*' \
|
||||
-o -wholename '*/Godeps/*' \
|
||||
\) -prune \
|
||||
\) -name "*.${ext}"
|
||||
}
|
||||
|
||||
files_need_boilerplate=()
|
||||
|
||||
files=($(find_files "go"))
|
||||
files_need_boilerplate+=($(${boiler} "go" "${files[@]}"))
|
||||
|
||||
files=($(find_files "sh"))
|
||||
files_need_boilerplate+=($(${boiler} "sh" "${files[@]}"))
|
||||
|
||||
files=($(find_files "py"))
|
||||
files_need_boilerplate+=($(${boiler} "py" "${files[@]}"))
|
||||
files_need_boilerplate=($(${boiler} "$@"))
|
||||
|
||||
if [[ ${#files_need_boilerplate[@]} -gt 0 ]]; then
|
||||
for file in "${files_need_boilerplate[@]}"; do
|
||||
|
|
|
@ -39,11 +39,12 @@ fi
|
|||
echo "${reset}"
|
||||
|
||||
echo -ne "Checking for files that need boilerplate... "
|
||||
out=($(hack/verify-boilerplate.sh))
|
||||
if [[ $? -ne 0 ]]; then
|
||||
files=($(git diff --cached --name-only --diff-filter ACM))
|
||||
out=($(hack/boilerplate/boilerplate.py "${files[@]}"))
|
||||
if [[ "${#out}" -ne 0 ]]; then
|
||||
echo "${red}ERROR!"
|
||||
echo "Some files are missing the required boilerplate header"
|
||||
echo "from hooks/boilerplate.txt:"
|
||||
echo "from hack/boilerplate/boilerplate.*.txt:"
|
||||
for f in "${out[@]}"; do
|
||||
echo " ${f}"
|
||||
done
|
||||
|
|
Loading…
Reference in New Issue