2015-08-07 14:02:34 +00:00
#!/usr/bin/env python
2016-06-03 00:25:58 +00:00
# Copyright 2015 The Kubernetes Authors.
2015-08-07 14:02:34 +00:00
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import json
import mmap
import os
import re
import sys
import argparse
parser = argparse . ArgumentParser ( )
parser . add_argument ( " filenames " , help = " list of files to check, all files if unspecified " , nargs = ' * ' )
parser . add_argument ( " -e " , " --skip-exceptions " , help = " ignore hack/verify-flags/exceptions.txt and print all output " , action = " store_true " )
args = parser . parse_args ( )
# Cargo culted from http://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python
def is_binary ( pathname ) :
""" Return true if the given filename is binary.
@raise EnvironmentError : if the file does not exist or cannot be accessed .
@attention : found @ http : / / bytes . com / topic / python / answers / 21222 - determine - file - type - binary - text on 6 / 08 / 2010
@author : Trent Mick < TrentM @ActiveState.com >
@author : Jorge Orpinel < jorge @orpinel.com > """
try :
2015-08-24 13:01:56 +00:00
with open ( pathname , ' r ' ) as f :
CHUNKSIZE = 1024
while 1 :
chunk = f . read ( CHUNKSIZE )
if ' \0 ' in chunk : # found null byte
return True
if len ( chunk ) < CHUNKSIZE :
break # done
2015-08-07 14:02:34 +00:00
except :
return True
2015-08-24 13:01:56 +00:00
2015-08-07 14:02:34 +00:00
return False
def get_all_files ( rootdir ) :
all_files = [ ]
for root , dirs , files in os . walk ( rootdir ) :
# don't visit certain dirs
2016-04-14 06:30:15 +00:00
if ' vendor ' in dirs :
dirs . remove ( ' vendor ' )
2016-08-24 20:02:57 +00:00
if ' staging ' in dirs :
dirs . remove ( ' staging ' )
2016-06-07 00:28:55 +00:00
if ' _output ' in dirs :
dirs . remove ( ' _output ' )
2015-09-25 22:01:59 +00:00
if ' _gopath ' in dirs :
dirs . remove ( ' _gopath ' )
2015-08-07 14:02:34 +00:00
if ' third_party ' in dirs :
dirs . remove ( ' third_party ' )
if ' .git ' in dirs :
dirs . remove ( ' .git ' )
2016-06-03 04:28:31 +00:00
if ' .make ' in dirs :
dirs . remove ( ' .make ' )
2016-10-20 21:14:34 +00:00
if ' BUILD ' in files :
files . remove ( ' BUILD ' )
2015-08-07 14:02:34 +00:00
if ' exceptions.txt ' in files :
files . remove ( ' exceptions.txt ' )
if ' known-flags.txt ' in files :
files . remove ( ' known-flags.txt ' )
for name in files :
pathname = os . path . join ( root , name )
if is_binary ( pathname ) :
continue
all_files . append ( pathname )
return all_files
def normalize_files ( rootdir , files ) :
newfiles = [ ]
2015-09-25 22:01:59 +00:00
a = [ ' Godeps ' , ' _gopath ' , ' third_party ' , ' .git ' , ' exceptions.txt ' , ' known-flags.txt ' ]
2015-08-07 14:02:34 +00:00
for f in files :
if any ( x in f for x in a ) :
continue
if f . endswith ( " .svg " ) :
continue
if f . endswith ( " .gliffy " ) :
continue
2015-12-08 00:33:27 +00:00
if f . endswith ( " .md " ) :
continue
if f . endswith ( " .yaml " ) :
continue
2015-08-07 14:02:34 +00:00
newfiles . append ( f )
for i , f in enumerate ( newfiles ) :
if not os . path . isabs ( f ) :
newfiles [ i ] = os . path . join ( rootdir , f )
return newfiles
def line_has_bad_flag ( line , flagre ) :
2015-08-13 23:52:01 +00:00
results = flagre . findall ( line )
for result in results :
2015-08-14 18:54:12 +00:00
if not " _ " in result :
return False
# this should exclude many cases where jinja2 templates use kube flags
# as variables, except it uses _ for the variable name
if " { % s et " + result + " = \" " in line :
return False
if " pillar[ " + result + " ] " in line :
return False
2015-08-14 19:26:15 +00:00
if " grains " + result in line :
return False
2015-08-14 19:30:46 +00:00
# something common in juju variables...
if " template_data[ " + result + " ] " in line :
return False
2015-08-14 18:54:12 +00:00
return True
2015-08-07 14:02:34 +00:00
return False
2017-05-22 02:51:04 +00:00
def check_known_flags ( rootdir ) :
pathname = os . path . join ( rootdir , " hack/verify-flags/known-flags.txt " )
f = open ( pathname , ' r ' )
flags = set ( f . read ( ) . splitlines ( ) )
f . close ( )
illegal_known_flags = set ( )
for flag in flags :
if len ( flag ) > 0 :
if not " - " in flag :
illegal_known_flags . add ( flag )
if len ( illegal_known_flags ) != 0 :
print ( " All flags in hack/verify-flags/known-flags.txt should contain character -, found these flags without - " )
l = list ( illegal_known_flags )
l . sort ( )
print ( " %s " % " \n " . join ( l ) )
sys . exit ( 1 )
2015-08-07 14:02:34 +00:00
# The list of files might not be the whole repo. If someone only changed a
# couple of files we don't want to run all of the golang files looking for
# flags. Instead load the list of flags from hack/verify-flags/known-flags.txt
# If running the golang files finds a new flag not in that file, return an
# error and tell the user to add the flag to the flag list.
def get_flags ( rootdir , files ) :
# preload the 'known' flags
pathname = os . path . join ( rootdir , " hack/verify-flags/known-flags.txt " )
f = open ( pathname , ' r ' )
2015-08-14 01:03:55 +00:00
flags = set ( f . read ( ) . splitlines ( ) )
f . close ( )
# preload the 'known' flags which don't follow the - standard
pathname = os . path . join ( rootdir , " hack/verify-flags/excluded-flags.txt " )
f = open ( pathname , ' r ' )
excluded_flags = set ( f . read ( ) . splitlines ( ) )
2015-08-07 14:02:34 +00:00
f . close ( )
regexs = [ re . compile ( ' Var[P]? \ ([^,]*, " ([^ " ]*) " ' ) ,
re . compile ( ' .String[P]? \ ( " ([^ " ]*) " ,[^,]+,[^)]+ \ ) ' ) ,
re . compile ( ' .Int[P]? \ ( " ([^ " ]*) " ,[^,]+,[^)]+ \ ) ' ) ,
re . compile ( ' .Bool[P]? \ ( " ([^ " ]*) " ,[^,]+,[^)]+ \ ) ' ) ,
re . compile ( ' .Duration[P]? \ ( " ([^ " ]*) " ,[^,]+,[^)]+ \ ) ' ) ,
re . compile ( ' .StringSlice[P]? \ ( " ([^ " ]*) " ,[^,]+,[^)]+ \ ) ' ) ]
new_flags = set ( )
2015-08-14 01:03:55 +00:00
new_excluded_flags = set ( )
2015-08-07 14:02:34 +00:00
# walk all the files looking for any flags being declared
for pathname in files :
if not pathname . endswith ( " .go " ) :
continue
f = open ( pathname , ' r ' )
data = f . read ( )
f . close ( )
matches = [ ]
for regex in regexs :
matches = matches + regex . findall ( data )
for flag in matches :
2015-08-14 01:03:55 +00:00
if any ( x in flag for x in excluded_flags ) :
continue
if " _ " in flag :
new_excluded_flags . add ( flag )
if not " - " in flag :
2015-08-07 14:02:34 +00:00
continue
if flag not in flags :
new_flags . add ( flag )
2015-08-14 01:03:55 +00:00
if len ( new_excluded_flags ) != 0 :
print ( " Found a flag declared with an _ but which is not explicitly listed as a valid flag name in hack/verify-flags/excluded-flags.txt " )
print ( " Are you certain this flag should not have been declared with an - instead? " )
2015-08-24 13:02:19 +00:00
l = list ( new_excluded_flags )
l . sort ( )
print ( " %s " % " \n " . join ( l ) )
2015-08-14 01:03:55 +00:00
sys . exit ( 1 )
2015-08-07 14:02:34 +00:00
if len ( new_flags ) != 0 :
2017-05-22 02:51:04 +00:00
print ( " Found flags with character - in golang files not in the list of known flags. Please add these to hack/verify-flags/known-flags.txt " )
2015-08-24 13:02:19 +00:00
l = list ( new_flags )
l . sort ( )
print ( " %s " % " \n " . join ( l ) )
2015-08-07 14:02:34 +00:00
sys . exit ( 1 )
return list ( flags )
def flags_to_re ( flags ) :
2015-08-14 01:03:55 +00:00
""" turn the list of all flags we found into a regex find both - and _ versions """
dashRE = re . compile ( ' [-_] ' )
2015-08-07 14:02:34 +00:00
flagREs = [ ]
for flag in flags :
# turn all flag names into regexs which will find both types
newre = dashRE . sub ( ' [-_] ' , flag )
2015-08-14 01:03:55 +00:00
# only match if there is not a leading or trailing alphanumeric character
2015-08-14 18:46:12 +00:00
flagREs . append ( " [^ \ w$ { ] " + newre + " [^ \ w] " )
2015-08-07 14:02:34 +00:00
# turn that list of regex strings into a single large RE
flagRE = " | " . join ( flagREs )
flagRE = re . compile ( flagRE )
return flagRE
def load_exceptions ( rootdir ) :
exceptions = set ( )
if args . skip_exceptions :
return exceptions
exception_filename = os . path . join ( rootdir , " hack/verify-flags/exceptions.txt " )
exception_file = open ( exception_filename , ' r ' )
for exception in exception_file . read ( ) . splitlines ( ) :
out = exception . split ( " : " , 1 )
if len ( out ) != 2 :
2015-09-24 20:10:25 +00:00
print ( " Invalid line in exceptions file: %s " % exception )
2015-08-07 14:02:34 +00:00
continue
filename = out [ 0 ]
line = out [ 1 ]
exceptions . add ( ( filename , line ) )
return exceptions
def main ( ) :
rootdir = os . path . dirname ( __file__ ) + " /../ "
rootdir = os . path . abspath ( rootdir )
exceptions = load_exceptions ( rootdir )
if len ( args . filenames ) > 0 :
files = args . filenames
else :
files = get_all_files ( rootdir )
files = normalize_files ( rootdir , files )
2017-05-22 02:51:04 +00:00
check_known_flags ( rootdir )
2015-08-07 14:02:34 +00:00
flags = get_flags ( rootdir , files )
flagRE = flags_to_re ( flags )
bad_lines = [ ]
# walk all the file looking for any flag that was declared and now has an _
for pathname in files :
relname = os . path . relpath ( pathname , rootdir )
f = open ( pathname , ' r ' )
for line in f . read ( ) . splitlines ( ) :
if line_has_bad_flag ( line , flagRE ) :
if ( relname , line ) not in exceptions :
bad_lines . append ( ( relname , line ) )
f . close ( )
if len ( bad_lines ) != 0 :
if not args . skip_exceptions :
2016-05-12 17:09:10 +00:00
print ( " Found illegal ' flag ' usage. If these are false negatives you should run `hack/verify-flags-underscore.py -e > hack/verify-flags/exceptions.txt` to update the list. " )
2015-08-17 18:32:18 +00:00
bad_lines . sort ( )
2015-08-07 14:02:34 +00:00
for ( relname , line ) in bad_lines :
print ( " %s : %s " % ( relname , line ) )
2015-08-21 08:41:33 +00:00
return 1
2015-08-07 14:02:34 +00:00
if __name__ == " __main__ " :
sys . exit ( main ( ) )