style: Refactor the style checker as a Python package

Refactor the style checker into a Python module that can be reused by
command line tools that integrate with git. In particular:

  * Create a style package in util
  * Move style validators from style.py to the style/validators.py.
  * Move style verifiers from style.py to the style/verifiers.py.
  * Move utility functions (sort_includes, region handling,
    file_types) into the style package
  * Move generic code from style.py to style/style.py.

Signed-off-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Curtis Dunham <curtis.dunham@arm.com>
Reviewed-by: Steve Reinhardt <steve.reinhardt@amd.com>

--HG--
rename : util/style.py => util/hgstyle.py
rename : util/sort_includes.py => util/style/sort_includes.py
extra : rebase_source : ad6cf9b9a18c48350dfc7b7c77bea6c5344fb53c
This commit is contained in:
Andreas Sandberg
2016-03-30 15:30:32 +01:00
parent 062b6c4c9d
commit 2580fcd9d7
10 changed files with 988 additions and 644 deletions

38
util/style/__init__.py Normal file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python
#
# Copyright (c) 2016 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Andreas Sandberg

176
util/style/file_types.py Normal file
View File

@@ -0,0 +1,176 @@
import os
# lanuage type for each file extension
lang_types = {
'.c' : "C",
'.cl' : "C",
'.h' : "C",
'.cc' : "C++",
'.hh' : "C++",
'.cxx' : "C++",
'.hxx' : "C++",
'.cpp' : "C++",
'.hpp' : "C++",
'.C' : "C++",
'.H' : "C++",
'.i' : "swig",
'.py' : "python",
'.pl' : "perl",
'.pm' : "perl",
'.s' : "asm",
'.S' : "asm",
'.l' : "lex",
'.ll' : "lex",
'.y' : "yacc",
'.yy' : "yacc",
'.isa' : "isa",
'.sh' : "shell",
'.slicc' : "slicc",
'.sm' : "slicc",
'.awk' : "awk",
'.el' : "lisp",
'.txt' : "text",
'.tex' : "tex",
'.mk' : "make",
}
# languages based on file prefix
lang_prefixes = (
('SCons', 'scons'),
('Make', 'make'),
('make', 'make'),
('Doxyfile', 'doxygen'),
)
# languages based on #! line of first file
hash_bang = (
('python', 'python'),
('perl', 'perl'),
('sh', 'shell'),
)
# the list of all languages that we detect
all_languages = frozenset(lang_types.itervalues())
all_languages |= frozenset(lang for start,lang in lang_prefixes)
all_languages |= frozenset(lang for start,lang in hash_bang)
def lang_type(filename, firstline=None, openok=True):
'''identify the language of a given filename and potentially the
firstline of the file. If the firstline of the file is not
provided and openok is True, open the file and read the first line
if necessary'''
basename = os.path.basename(filename)
name,extension = os.path.splitext(basename)
# first try to detect language based on file extension
try:
return lang_types[extension]
except KeyError:
pass
# now try to detect language based on file prefix
for start,lang in lang_prefixes:
if basename.startswith(start):
return lang
# if a first line was not provided but the file is ok to open,
# grab the first line of the file.
if firstline is None and openok:
handle = file(filename, 'r')
firstline = handle.readline()
handle.close()
# try to detect language based on #! in first line
if firstline and firstline.startswith('#!'):
for string,lang in hash_bang:
if firstline.find(string) > 0:
return lang
# sorry, we couldn't detect the language
return None
# directories and files to ignore by default
default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
default_file_ignore = frozenset(('parsetab.py', ))
def find_files(base, languages=all_languages,
dir_ignore=default_dir_ignore,
file_ignore=default_file_ignore):
'''find all files in a directory and its subdirectories based on a
set of languages, ignore directories specified in dir_ignore and
files specified in file_ignore'''
if base[-1] != '/':
base += '/'
def update_dirs(dirs):
'''strip the ignored directories out of the provided list'''
index = len(dirs) - 1
for i,d in enumerate(reversed(dirs)):
if d in dir_ignore:
del dirs[index - i]
# walk over base
for root,dirs,files in os.walk(base):
root = root.replace(base, '', 1)
# strip ignored directories from the list
update_dirs(dirs)
for filename in files:
if filename in file_ignore:
# skip ignored files
continue
# try to figure out the language of the specified file
fullpath = os.path.join(base, root, filename)
language = lang_type(fullpath)
# if the file is one of the langauges that we want return
# its name and the language
if language in languages:
yield fullpath, language
def update_file(dst, src, language, mutator):
'''update a file of the specified language with the provided
mutator generator. If inplace is provided, update the file in
place and return the handle to the updated file. If inplace is
false, write the updated file to cStringIO'''
# if the source and destination are the same, we're updating in place
inplace = dst == src
if isinstance(src, str):
# if a filename was provided, open the file
if inplace:
mode = 'r+'
else:
mode = 'r'
src = file(src, mode)
orig_lines = []
# grab all of the lines of the file and strip them of their line ending
old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
new_lines = list(mutator(old_lines, src.name, language))
for line in src.xreadlines():
line = line
if inplace:
# if we're updating in place and the file hasn't changed, do nothing
if old_lines == new_lines:
return
# otherwise, truncate the file and seek to the beginning.
dst = src
dst.truncate(0)
dst.seek(0)
elif isinstance(dst, str):
# if we're not updating in place and a destination file name
# was provided, create a file object
dst = file(dst, 'w')
for line in new_lines:
dst.write(line)
dst.write('\n')

281
util/style/region.py Normal file
View File

@@ -0,0 +1,281 @@
# Copyright (c) 2006 Nathan Binkert <nate@binkert.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
class _neg_inf(object):
'''This object always compares less than any other object'''
def __repr__(self): return '<neg_inf>'
def __lt__(self, other): return type(self) != type(other)
def __le__(self, other): return True
def __gt__(self, other): return False
def __ge__(self, other): return type(self) == type(other)
def __eq__(self, other): return type(self) == type(other)
def __ne__(self, other): return type(self) != type(other)
neg_inf = _neg_inf()
class _pos_inf(object):
'''This object always compares greater than any other object'''
def __repr__(self): return '<pos_inf>'
def __lt__(self, other): return False
def __le__(self, other): return type(self) == type(other)
def __gt__(self, other): return type(self) != type(other)
def __ge__(self, other): return True
def __eq__(self, other): return type(self) == type(other)
def __ne__(self, other): return type(self) != type(other)
pos_inf = _pos_inf()
class Region(tuple):
'''A region (range) of [start, end).
This includes utility functions to compare overlap of regions.'''
def __new__(cls, *args):
if len(args) == 1:
arg = args[0]
if isinstance(arg, Region):
return arg
args = tuple(arg)
if len(args) != 2:
raise AttributeError, \
"Only one or two arguments allowed, %d provided" % (alen, )
return tuple.__new__(cls, args)
def __repr__(self):
return 'Region(%s, %s)' % (self[0], self[1])
@property
def start(self):
return self[0]
@property
def end(self):
return self[1]
def __contains__(self, other):
'''other is
region: True if self and other is fully contained within self.
pos: True if other is within the region'''
if isinstance(other, tuple):
return self[0] <= other[0] and self[1] >= other[1]
return self[0] <= other and other < self[1]
def __eq__(self, other):
'''other is
region: True if self and other are identical.
pos: True if other is within the region'''
if isinstance(other, tuple):
return self[0] == other[0] and self[1] == other[1]
return self[0] <= other and other < self[1]
# @param self is a region.
# @param other is a region.
# @return if self and other are not identical.
def __ne__(self, other):
'''other is
region: true if they are not identical
pos: True if other is not in the region'''
if isinstance(other, tuple):
return self[0] != other[0] or self[1] != other[1]
return other < self[0] or self[1] <= other
# @param self is a region.
# @param other is a region.
# @return if self is less than other and does not overlap self.
def __lt__(self, other):
"self completely left of other (cannot overlap)"
if isinstance(other, tuple):
return self[1] <= other[0]
return self[1] <= other
# @param self is a region.
# @param other is a region.
# @return if self is less than other. self may overlap other,
# but not extend beyond the _end of other.
def __le__(self, other):
"self extends to the left of other (can overlap)"
if isinstance(other, tuple):
return self[0] <= other[0]
return self[0] <= other
# @param self is a region.
# @param other is a region.
# @return if self is greater than other and does not overlap other.
def __gt__(self, other):
"self is completely right of other (cannot overlap)"
if isinstance(other, tuple):
return self[0] >= other[1]
return self[0] > other
# @param self is a region.
# @param other is a region.
# @return if self is greater than other. self may overlap other,
# but not extend beyond the beginning of other.
def __ge__(self, other):
"self ex_ends beyond other to the right (can overlap)"
if isinstance(other, tuple):
return self[1] >= other[1]
return self[1] > other
class Regions(object):
'''A set of regions (ranges). Basically a region with holes.
Includes utility functions to merge regions and figure out if
something is in one of the regions.'''
def __init__(self, *args):
self.regions = []
self.extend(*args)
def copy(self):
copy = Regions()
copy.regions.extend(self.regions)
return copy
def append(self, *args):
self.regions.append(Region(*args))
def extend(self, *args):
self.regions.extend(Region(a) for a in args)
def __contains__(self, position):
for region in self.regions:
if position in region:
return True
return False
def __len__(self):
return len(self.regions)
def __iand__(self, other):
A = self.regions
B = other.regions
R = []
i = 0
j = 0
while i < len(self) and j < len(other):
a = A[i]
b = B[j]
if a[1] <= b[0]:
# A is completely before B. Skip A
i += 1
elif a[0] <= b[0]:
if a[1] <= b[1]:
# A and B overlap with B not left of A and A not right of B
R.append(Region(b[0], a[1]))
# Advance A because nothing is left
i += 1
if a[1] == b[1]:
# Advance B too
j += 1
else:
# A and B overlap with B completely within the bounds of A
R.append(Region(b[0], b[1]))
# Advance only B because some of A may still be useful
j += 1
elif b[1] <= a[0]:
# B is completely before A. Skip B.
j += 1
else:
assert b[0] < a[0]
if b[1] <= a[1]:
# A and B overlap with A not left of B and B not right of A
R.append(Region(a[0], b[1]))
# Advance B because nothing is left
j += 1
if a[1] == b[1]:
# Advance A too
i += 1
else:
# A and B overlap with A completely within the bounds of B
R.append(Region(a[0], a[1]))
# Advance only A because some of B may still be useful
i += 1
self.regions = R
return self
def __and__(self, other):
result = self.copy()
result &= other
return result
def __repr__(self):
return 'Regions(%s)' % ([(r[0], r[1]) for r in self.regions], )
all_regions = Regions(Region(neg_inf, pos_inf))
if __name__ == '__main__':
x = Regions(*((i, i + 1) for i in xrange(0,30,2)))
y = Regions(*((i, i + 4) for i in xrange(0,30,5)))
z = Region(6,7)
n = Region(9,10)
def test(left, right):
print "%s == %s: %s" % (left, right, left == right)
print "%s != %s: %s" % (left, right, left != right)
print "%s < %s: %s" % (left, right, left < right)
print "%s <= %s: %s" % (left, right, left <= right)
print "%s > %s: %s" % (left, right, left > right)
print "%s >= %s: %s" % (left, right, left >= right)
print
test(neg_inf, neg_inf)
test(neg_inf, pos_inf)
test(pos_inf, neg_inf)
test(pos_inf, pos_inf)
test(neg_inf, 0)
test(neg_inf, -11111)
test(neg_inf, 11111)
test(0, neg_inf)
test(-11111, neg_inf)
test(11111, neg_inf)
test(pos_inf, 0)
test(pos_inf, -11111)
test(pos_inf, 11111)
test(0, pos_inf)
test(-11111, pos_inf)
test(11111, pos_inf)
print x
print y
print x & y
print z
print 4 in x
print 4 in z
print 5 not in x
print 6 not in z
print z in y
print n in y, n not in y

317
util/style/sort_includes.py Normal file
View File

@@ -0,0 +1,317 @@
#!/usr/bin/env python
#
# Copyright (c) 2014-2015 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2011 The Hewlett-Packard Development Company
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Nathan Binkert
# Andreas Sandberg
import os
import re
import sys
from file_types import *
cpp_c_headers = {
'assert.h' : 'cassert',
'ctype.h' : 'cctype',
'errno.h' : 'cerrno',
'float.h' : 'cfloat',
'limits.h' : 'climits',
'locale.h' : 'clocale',
'math.h' : 'cmath',
'setjmp.h' : 'csetjmp',
'signal.h' : 'csignal',
'stdarg.h' : 'cstdarg',
'stddef.h' : 'cstddef',
'stdio.h' : 'cstdio',
'stdlib.h' : 'cstdlib',
'string.h' : 'cstring',
'time.h' : 'ctime',
'wchar.h' : 'cwchar',
'wctype.h' : 'cwctype',
}
include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
def include_key(line):
'''Mark directories with a leading space so directories
are sorted before files'''
match = include_re.match(line)
assert match, line
keyword = match.group(2)
include = match.group(3)
# Everything but the file part needs to have a space prepended
parts = include.split('/')
if len(parts) == 2 and parts[0] == 'dnet':
# Don't sort the dnet includes with respect to each other, but
# make them sorted with respect to non dnet includes. Python
# guarantees that sorting is stable, so just clear the
# basename part of the filename.
parts[1] = ' '
parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
key = '/'.join(parts)
return key
def _include_matcher(keyword="#include", delim="<>"):
"""Match an include statement and return a (keyword, file, extra)
duple, or a touple of None values if there isn't a match."""
rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
def matcher(context, line):
m = rex.match(line)
return m.groups() if m else (None, ) * 3
return matcher
def _include_matcher_fname(fname, **kwargs):
"""Match an include of a specific file name. Any keyword arguments
are forwarded to _include_matcher, which is used to match the
actual include line."""
rex = re.compile(fname)
base_matcher = _include_matcher(**kwargs)
def matcher(context, line):
(keyword, fname, extra) = base_matcher(context, line)
if fname and rex.match(fname):
return (keyword, fname, extra)
else:
return (None, ) * 3
return matcher
def _include_matcher_main():
"""Match a C/C++ source file's primary header (i.e., a file with
the same base name, but a header extension)."""
base_matcher = _include_matcher(delim='""')
rex = re.compile(r"^src/(.*)\.([^.]+)$")
header_map = {
"c" : "h",
"cc" : "hh",
"cpp" : "hh",
}
def matcher(context, line):
m = rex.match(context["filename"])
if not m:
return (None, ) * 3
base, ext = m.groups()
(keyword, fname, extra) = base_matcher(context, line)
try:
if fname == "%s.%s" % (base, header_map[ext]):
return (keyword, fname, extra)
except KeyError:
pass
return (None, ) * 3
return matcher
class SortIncludes(object):
# different types of includes for different sorting of headers
# <Python.h> - Python header needs to be first if it exists
# <*.h> - system headers (directories before files)
# <*> - STL headers
# <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
# "*" - M5 headers (directories before files)
includes_re = (
('main', '""', _include_matcher_main()),
('python', '<>', _include_matcher_fname("^Python\.h$")),
('c', '<>', _include_matcher_fname("^.*\.h$")),
('stl', '<>', _include_matcher_fname("^\w+$")),
('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
('swig0', '<>', _include_matcher(keyword="%import")),
('swig1', '<>', _include_matcher(keyword="%include")),
('swig2', '""', _include_matcher(keyword="%import", delim='""')),
('swig3', '""', _include_matcher(keyword="%include", delim='""')),
)
block_order = (
('main', ),
('python', ),
('c', ),
('stl', ),
('cc', ),
('m5header', ),
('swig0', 'swig1', 'swig2', 'swig3', ),
)
def __init__(self):
self.block_priority = {}
for prio, keys in enumerate(self.block_order):
for key in keys:
self.block_priority[key] = prio
def reset(self):
# clear all stored headers
self.includes = {}
def dump_blocks(self, block_types):
"""Merge includes of from several block types into one large
block of sorted includes. This is useful when we have multiple
include block types (e.g., swig includes) with the same
priority."""
includes = []
for block_type in block_types:
try:
includes += self.includes[block_type]
except KeyError:
pass
return sorted(set(includes))
def dump_includes(self):
includes = []
for types in self.block_order:
block = self.dump_blocks(types)
if includes and block:
includes.append("")
includes += block
self.reset()
return includes
def __call__(self, lines, filename, language):
self.reset()
context = {
"filename" : filename,
"language" : language,
}
def match_line(line):
if not line:
return (None, line)
for include_type, (ldelim, rdelim), matcher in self.includes_re:
keyword, include, extra = matcher(context, line)
if keyword:
# if we've got a match, clean up the #include line,
# fix up stl headers and store it in the proper category
if include_type == 'c' and language == 'C++':
stl_inc = cpp_c_headers.get(include, None)
if stl_inc:
include = stl_inc
include_type = 'stl'
return (include_type,
keyword + ' ' + ldelim + include + rdelim + extra)
return (None, line)
processing_includes = False
for line in lines:
include_type, line = match_line(line)
if include_type:
try:
self.includes[include_type].append(line)
except KeyError:
self.includes[include_type] = [ line ]
processing_includes = True
elif processing_includes and not line.strip():
# Skip empty lines while processing includes
pass
elif processing_includes:
# We are now exiting an include block
processing_includes = False
# Output pending includes, a new line between, and the
# current l.
for include in self.dump_includes():
yield include
yield ''
yield line
else:
# We are not in an include block, so just emit the line
yield line
# We've reached EOF, so dump any pending includes
if processing_includes:
for include in self.dump_includes():
yield include
# default language types to try to apply our sorting rules to
default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
def options():
import optparse
options = optparse.OptionParser()
add_option = options.add_option
add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
default=','.join(default_dir_ignore),
help="ignore directories")
add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
default=','.join(default_file_ignore),
help="ignore files")
add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
default=','.join(default_languages),
help="languages")
add_option('-n', '--dry-run', action='store_true',
help="don't overwrite files")
return options
def parse_args(parser):
opts,args = parser.parse_args()
opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
opts.file_ignore = frozenset(opts.file_ignore.split(','))
opts.languages = frozenset(opts.languages.split(','))
return opts,args
if __name__ == '__main__':
parser = options()
opts, args = parse_args(parser)
for base in args:
for filename,language in find_files(base, languages=opts.languages,
file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
if opts.dry_run:
print "%s: %s" % (filename, language)
else:
update_file(filename, filename, language, SortIncludes())

149
util/style/style.py Normal file
View File

@@ -0,0 +1,149 @@
#! /usr/bin/env python
# Copyright (c) 2014, 2016 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2006 The Regents of The University of Michigan
# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
# Copyright (c) 2016 Advanced Micro Devices, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Nathan Binkert
# Steve Reinhardt
# Andreas Sandberg
from abc import ABCMeta, abstractmethod
import difflib
import re
import sys
from region import *
tabsize = 8
lead = re.compile(r'^([ \t]+)')
trail = re.compile(r'([ \t]+)$')
any_control = re.compile(r'\b(if|while|for)([ \t]*)\(')
class UserInterface(object):
__metaclass__ = ABCMeta
def __init__(self, verbose=False):
self.verbose = verbose
def prompt(self, prompt, results, default):
while True:
result = self._prompt(prompt, results, default)
if result in results:
return result
@abstractmethod
def _prompt(self, prompt, results, default):
pass
@abstractmethod
def write(self, string):
pass
class StdioUI(UserInterface):
def _prompt(self, prompt, results, default):
return raw_input(prompt) or default
def write(self, string):
sys.stdout.write(string)
class MercurialUI(UserInterface):
def __init__(self, ui, *args, **kwargs):
super(MercurialUI, self).__init__(*args, **kwargs)
self.hg_ui = ui
def _prompt(self, prompt, results, default):
return self.hg_ui.prompt(prompt, default=default)
def write(self, string):
self.hg_ui.write(string)
def _re_ignore(expr):
"""Helper function to create regular expression ignore file
matcher functions"""
rex = re.compile(expr)
def match_re(fname):
return rex.match(fname)
return match_re
# This list contains a list of functions that are called to determine
# if a file should be excluded from the style matching rules or
# not. The functions are called with the file name relative to the
# repository root (without a leading slash) as their argument. A file
# is excluded if any function in the list returns true.
style_ignores = [
# Ignore external projects as they are unlikely to follow the gem5
# coding convention.
_re_ignore("^ext/"),
]
def check_ignores(fname):
"""Check if a file name matches any of the ignore rules"""
for rule in style_ignores:
if rule(fname):
return True
return False
def normalized_len(line):
"""Return a normalized line length with expanded tabs"""
count = 0
for c in line:
if c == '\t':
count += tabsize - count % tabsize
else:
count += 1
return count
def modified_regions(old, new, context=0):
regions = Regions()
m = difflib.SequenceMatcher(a=old, b=new, autojunk=False)
for group in m.get_grouped_opcodes(context):
first = group[0]
last = group[-1]
regions.extend(Region(first[3], last[4] + 1))
return regions

212
util/style/validators.py Normal file
View File

@@ -0,0 +1,212 @@
#!/usr/bin/env python
#
# Copyright (c) 2014, 2016 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2006 The Regents of The University of Michigan
# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
# Copyright (c) 2016 Advanced Micro Devices, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Nathan Binkert
# Steve Reinhardt
# Andreas Sandberg
from abc import ABCMeta, abstractmethod
import inspect
import re
import sys
import style
tabsize = 8
lead = re.compile(r'^([ \t]+)')
trail = re.compile(r'([ \t]+)$')
any_control = re.compile(r'\b(if|while|for)([ \t]*)\(')
class Validator(object):
"""Base class for style validators
Validators analyze source files for common style violations and
produce source code style violation statistics. Unlike style
verifiers (see verifiers.py), they do not try to fix any style
violations violations.
Deprecation warning: These classes are currently only used by the
"hg m5format" command and not by any style hooks. New style
checkers should inherit from Verifier instead of Validator.
"""
__metaclass__ = ABCMeta
def __init__(self, file_name, verbose=False, language=None):
self.file_name = file_name
self.verbose = verbose
self.bad = 0
self.language = language
def fail_line(self, line_no, line, message):
print '%s:%d>' % (self.file_name, line_no + 1), message
if self.verbose:
print line
self.bad += 1
def __nonzero__(self):
return self.bad == 0
@classmethod
def supported_lang(cls, language):
return True
@abstractmethod
def validate_line(self, line_no, line):
pass
@abstractmethod
def dump(self):
pass
class SimpleValidator(Validator):
supported_langs = set()
def __init__(self, fail_message, dump_message, file_name, **kwargs):
super(SimpleValidator, self).__init__(file_name, **kwargs)
self.fail_message = fail_message
self.dump_message = dump_message
@classmethod
def supported_lang(cls, language):
return not cls.cupported_langs or language in cls.supported_langs
def validate_line(self, line_no, line):
if not self.simple_validate_line(line):
self.fail_line(line_no, line, self.fail_message)
return False
else:
return True
@abstractmethod
def simple_validate_line(self, line):
pass
def dump(self):
print self.dump_message % {
"bad" : self.bad
}
class LineLength(Validator):
def __init__(self, *args, **kwargs):
super(LineLength, self).__init__(*args, **kwargs)
self.toolong80 = 0
def validate_line(self, line_no, line):
llen = style.normalized_len(line)
if llen == 80:
self.toolong80 += 1
if llen > 79:
self.fail_line(line_no, line, 'line too long (%d chars)' % llen)
return False
else:
return True
def dump(self):
print "%d violations of lines over 79 chars. " \
"%d of which are 80 chars exactly." % (self.bad, self.toolong80)
class ControlSpacing(Validator):
supported_langs = set(('C', 'C++'))
def validate_line(self, line_no, line):
match = any_control.search(line)
if match and match.group(2) != " ":
stats.badcontrol += 1
self.fail_line(line_no, line,
'improper spacing after %s' % match.group(1))
return False
else:
return True
def dump(self):
print "%d bad parens after if/while/for." % (self.bad, )
class CarriageReturn(SimpleValidator):
def __init__(self, *args, **kwargs):
super(CarriageReturn, self).__init__(
"carriage return found",
"%(bad)d carriage returns found.",
*args, **kwargs)
def simple_validate_line(self, line):
return line.find('\r') == -1
class TabIndent(SimpleValidator):
lead = re.compile(r'^([ \t]+)')
def __init__(self, *args, **kwargs):
super(TabIndent, self).__init__(
"using tabs to indent",
"%(bad)d cases of tabs to indent.",
*args, **kwargs)
def simple_validate_line(self, line):
match = TabIndent.lead.search(line)
return not (match and match.group(1).find('\t') != -1)
class TrailingWhitespace(SimpleValidator):
trail = re.compile(r'([ \t]+)$')
def __init__(self, *args, **kwargs):
super(TrailingWhitespace, self).__init__(
"trailing whitespace",
"%(bad)d cases of whitespace at the end of a line.",
*args, **kwargs)
def simple_validate_line(self, line):
return not TrailingWhitespace.trail.search(line)
def is_validator(cls):
"""Determine if a class is a Validator that can be instantiated"""
return inspect.isclass(cls) and issubclass(cls, Validator) and \
not inspect.isabstract(cls)
# list of all verifier classes
all_validators = [ v for n, v in \
inspect.getmembers(sys.modules[__name__], is_validator) ]

379
util/style/verifiers.py Normal file
View File

@@ -0,0 +1,379 @@
#!/usr/bin/env python
#
# Copyright (c) 2014, 2016 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2006 The Regents of The University of Michigan
# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
# Copyright (c) 2016 Advanced Micro Devices, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Nathan Binkert
# Steve Reinhardt
# Andreas Sandberg
from abc import ABCMeta, abstractmethod
from difflib import SequenceMatcher
import inspect
import os
import re
import sys
import style
import sort_includes
from region import *
from file_types import lang_type
def _modified_regions(old, new):
m = SequenceMatcher(a=old, b=new, autojunk=False)
regions = Regions()
for tag, i1, i2, j1, j2 in m.get_opcodes():
if tag != "equal":
regions.extend(Region(i1, i2))
return regions
class Verifier(object):
"""Base class for style verifiers
Verifiers check for style violations and optionally fix such
violations. Implementations should either inherit from this class
(Verifier) if they need to work on entire files or LineVerifier if
they operate on a line-by-line basis.
Subclasses must define these class attributes:
languages = set of strings identifying applicable languages
test_name = long descriptive name of test, will be used in
messages such as "error in <foo>" or "invalid <foo>"
opt_name = short name used to generate command-line options to
control the test (--fix-<foo>, --ignore-<foo>, etc.)
"""
__metaclass__ = ABCMeta
def __init__(self, ui, opts, base=None):
self.ui = ui
self.base = base
# opt_name must be defined as a class attribute of derived classes.
# Check test-specific opts first as these have precedence.
self.opt_fix = opts.get('fix_' + self.opt_name, False)
self.opt_ignore = opts.get('ignore_' + self.opt_name, False)
self.opt_skip = opts.get('skip_' + self.opt_name, False)
# If no test-specific opts were set, then set based on "-all" opts.
if not (self.opt_fix or self.opt_ignore or self.opt_skip):
self.opt_fix = opts.get('fix_all', False)
self.opt_ignore = opts.get('ignore_all', False)
self.opt_skip = opts.get('skip_all', False)
def normalize_filename(self, name):
abs_name = os.path.abspath(name)
if self.base is None:
return abs_name
abs_base = os.path.abspath(self.base)
return os.path.relpath(abs_name, start=abs_base)
def open(self, filename, mode):
try:
f = file(filename, mode)
except OSError, msg:
print 'could not open file %s: %s' % (filename, msg)
return None
return f
def skip(self, filename):
# We never want to handle symlinks, so always skip them: If the location
# pointed to is a directory, skip it. If the location is a file inside
# the gem5 directory, it will be checked as a file, so symlink can be
# skipped. If the location is a file outside gem5, we don't want to
# check it anyway.
if os.path.islink(filename):
return True
return lang_type(filename) not in self.languages
def apply(self, filename, regions=all_regions):
"""Possibly apply to specified regions of file 'filename'.
Verifier is skipped if --skip-<test> option was provided or if
file is not of an applicable type. Otherwise file is checked
and error messages printed. Errors are fixed or ignored if
the corresponding --fix-<test> or --ignore-<test> options were
provided. If neither, the user is prompted for an action.
Returns True to abort, False otherwise.
"""
if not (self.opt_skip or self.skip(filename)):
errors = self.check(filename, regions)
if errors and not self.opt_ignore:
if self.opt_fix:
self.fix(filename, regions)
else:
result = self.ui.prompt("(a)bort, (i)gnore, or (f)ix?",
'aif', 'a')
if result == 'f':
self.fix(filename, regions)
elif result == 'a':
return True # abort
return False
@abstractmethod
def check(self, filename, regions=all_regions):
"""Check specified regions of file 'filename'.
Line-by-line checks can simply provide a check_line() method
that returns True if the line is OK and False if it has an
error. Verifiers that need a multi-line view (like
SortedIncludes) must override this entire function.
Returns a count of errors (0 if none), though actual non-zero
count value is not currently used anywhere.
"""
pass
@abstractmethod
def fix(self, filename, regions=all_regions):
"""Fix specified regions of file 'filename'.
Line-by-line fixes can simply provide a fix_line() method that
returns the fixed line. Verifiers that need a multi-line view
(like SortedIncludes) must override this entire function.
"""
pass
class LineVerifier(Verifier):
def check(self, filename, regions=all_regions):
f = self.open(filename, 'r')
errors = 0
for num,line in enumerate(f):
if num not in regions:
continue
line = line.rstrip('\n')
if not self.check_line(line):
self.ui.write("invalid %s in %s:%d\n" % \
(self.test_name, filename, num + 1))
if self.ui.verbose:
self.ui.write(">>%s<<\n" % line[:-1])
errors += 1
return errors
def fix(self, filename, regions=all_regions):
f = self.open(filename, 'r+')
lines = list(f)
f.seek(0)
f.truncate()
for i,line in enumerate(lines):
line = line.rstrip('\n')
if i in regions:
line = self.fix_line(line)
f.write(line)
f.write("\n")
f.close()
@abstractmethod
def check_line(self, line):
pass
@abstractmethod
def fix_line(self, line):
pass
class Whitespace(LineVerifier):
"""Check whitespace.
Specifically:
- No tabs used for indent
- No trailing whitespace
"""
languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
test_name = 'whitespace'
opt_name = 'white'
_lead = re.compile(r'^([ \t]+)')
_trail = re.compile(r'([ \t]+)$')
def check_line(self, line):
match = Whitespace._lead.search(line)
if match and match.group(1).find('\t') != -1:
return False
match = Whitespace._trail.search(line)
if match:
return False
return True
def fix_line(self, line):
if Whitespace._lead.search(line):
newline = ''
for i,c in enumerate(line):
if c == ' ':
newline += ' '
elif c == '\t':
newline += ' ' * (tabsize - len(newline) % tabsize)
else:
newline += line[i:]
break
line = newline
return line.rstrip() + '\n'
class SortedIncludes(Verifier):
"""Check for proper sorting of include statements"""
languages = sort_includes.default_languages
test_name = 'include file order'
opt_name = 'include'
def __init__(self, *args, **kwargs):
super(SortedIncludes, self).__init__(*args, **kwargs)
self.sort_includes = sort_includes.SortIncludes()
def check(self, filename, regions=all_regions):
f = self.open(filename, 'r')
norm_fname = self.normalize_filename(filename)
old = [ l.rstrip('\n') for l in f.xreadlines() ]
f.close()
if len(old) == 0:
return 0
language = lang_type(filename, old[0])
new = list(self.sort_includes(old, norm_fname, language))
modified = _modified_regions(old, new) & regions
if modified:
self.ui.write("invalid sorting of includes in %s\n" % (filename))
if self.ui.verbose:
for start, end in modified.regions:
self.ui.write("bad region [%d, %d)\n" % (start, end))
return 1
return 0
def fix(self, filename, regions=all_regions):
f = self.open(filename, 'r+')
old = f.readlines()
lines = [ l.rstrip('\n') for l in old ]
language = lang_type(filename, lines[0])
sort_lines = list(self.sort_includes(lines, filename, language))
new = ''.join(line + '\n' for line in sort_lines)
f.seek(0)
f.truncate()
for i,line in enumerate(sort_lines):
f.write(line)
f.write('\n')
f.close()
class ControlSpace(LineVerifier):
"""Check for exactly one space after if/while/for"""
languages = set(('C', 'C++'))
test_name = 'spacing after if/while/for'
opt_name = 'control'
_any_control = re.compile(r'\b(if|while|for)([ \t]*)\(')
def check_line(self, line):
match = ControlSpace._any_control.search(line)
return not (match and match.group(2) != " ")
def fix_line(self, line):
new_line = _any_control.sub(r'\1 (', line)
return new_line
class LineLength(LineVerifier):
languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
test_name = 'line length'
opt_name = 'length'
def check_line(self, line):
return style.normalized_len(line) <= 78
def fix(self, filename, regions=all_regions):
self.ui.write("Warning: cannot automatically fix overly long lines.\n")
def fix_line(self, line):
pass
class BoolCompare(LineVerifier):
languages = set(('C', 'C++', 'python'))
test_name = 'boolean comparison'
opt_name = 'boolcomp'
regex = re.compile(r'\s*==\s*([Tt]rue|[Ff]alse)\b')
def check_line(self, line):
return self.regex.search(line) == None
def fix_line(self, line):
match = self.regex.search(line)
if match:
if match.group(1) in ('true', 'True'):
line = self.regex.sub('', line)
else:
self.ui.write("Warning: cannot automatically fix "
"comparisons with false/False.\n")
return line
def is_verifier(cls):
"""Determine if a class is a Verifier that can be instantiated"""
return inspect.isclass(cls) and issubclass(cls, Verifier) and \
not inspect.isabstract(cls)
# list of all verifier classes
all_verifiers = [ v for n, v in \
inspect.getmembers(sys.modules[__name__], is_verifier) ]