diff options
Diffstat (limited to 'llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py')
| -rwxr-xr-x | llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py | 198 | 
1 files changed, 198 insertions, 0 deletions
| diff --git a/llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py b/llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py new file mode 100755 index 00000000000..584efa2598b --- /dev/null +++ b/llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py @@ -0,0 +1,198 @@ +#!/usr/bin/python +""" A small program to compute checksums of LLVM checkout. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import hashlib +import logging +import re +import sys +from argparse import ArgumentParser +from project_tree import * + +SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$") + + +def main(): +  parser = ArgumentParser() +  parser.add_argument( +      "-v", "--verbose", action="store_true", help="enable debug logging") +  parser.add_argument( +      "-c", +      "--check", +      metavar="reference_file", +      help="read checksums from reference_file and " + +      "check they match checksums of llvm_path.") +  parser.add_argument( +      "--partial", +      action="store_true", +      help="ignore projects from reference_file " + +      "that are not checked out in llvm_path.") +  parser.add_argument( +      "--multi_dir", +      action="store_true", +      help="indicates llvm_path contains llvm, checked out " + +      "into multiple directories, as opposed to a " + +      "typical single source tree checkout.") +  parser.add_argument("llvm_path") + +  args = parser.parse_args() +  if args.check is not None: +    with open(args.check, "r") as f: +      reference_checksums = ReadLLVMChecksums(f) +  else: +    reference_checksums = None + +  if args.verbose: +    logging.basicConfig(level=logging.DEBUG) + +  llvm_projects = CreateLLVMProjects(not args.multi_dir) +  checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects) + +  if reference_checksums is None: +    WriteLLVMChecksums(checksums, sys.stdout) +    sys.exit(0) + +  if not ValidateChecksums(reference_checksums, checksums, args.partial): +    sys.stdout.write("Checksums differ.\nNew checksums:\n") +    WriteLLVMChecksums(checksums, sys.stdout) +    sys.stdout.write("Reference checksums:\n") +    WriteLLVMChecksums(reference_checksums, sys.stdout) +    sys.exit(1) +  else: +    sys.stdout.write("Checksums match.") + + +def ComputeLLVMChecksums(root_path, projects): +  """Compute checksums for LLVM sources checked out using svn. + +  Args: +    root_path: a directory of llvm checkout. +    projects: a list of LLVMProject instances, which describe checkout paths, +      relative to root_path. + +  Returns: +    A dict mapping from project name to project checksum. +  """ +  hash_algo = hashlib.sha256 + +  def collapse_svn_substitutions(contents): +    # Replace svn substitutions for $Date$ and $LastChangedDate$. +    # Unfortunately, these are locale-specific. +    return SVN_DATES_REGEX.sub("$\1$", contents) + +  def read_and_collapse_svn_subsitutions(file_path): +    with open(file_path, "rb") as f: +      contents = f.read() +      new_contents = collapse_svn_substitutions(contents) +      if contents != new_contents: +        logging.debug("Replaced svn keyword substitutions in %s", file_path) +        logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents) +      return new_contents + +  project_checksums = dict() +  # Hash each project. +  for proj in projects: +    project_root = os.path.join(root_path, proj.relpath) +    if not os.path.exists(project_root): +      logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath, +                   proj.name) +      continue + +    files = list() + +    def add_file_hash(file_path): +      if os.path.islink(file_path) and not os.path.exists(file_path): +        content = os.readlink(file_path) +      else: +        content = read_and_collapse_svn_subsitutions(file_path) +      hasher = hash_algo() +      hasher.update(content) +      file_digest = hasher.hexdigest() +      logging.debug("Checksum %s for file %s", file_digest, file_path) +      files.append((file_path, file_digest)) + +    logging.info("Computing checksum for %s", proj.name) +    WalkProjectFiles(root_path, projects, proj, add_file_hash) + +    # Compute final checksum. +    files.sort(key=lambda x: x[0]) +    hasher = hash_algo() +    for file_path, file_digest in files: +      file_path = os.path.relpath(file_path, project_root) +      hasher.update(file_path) +      hasher.update(file_digest) +    project_checksums[proj.name] = hasher.hexdigest() +  return project_checksums + + +def WriteLLVMChecksums(checksums, f): +  """Writes checksums to a text file. + +  Args: +    checksums: a dict mapping from project name to project checksum (result of +      ComputeLLVMChecksums). +    f: a file object to write into. +  """ + +  for proj in sorted(checksums.keys()): +    f.write("{} {}\n".format(checksums[proj], proj)) + + +def ReadLLVMChecksums(f): +  """Reads checksums from a text file, produced by WriteLLVMChecksums. + +  Returns: +    A dict, mapping from project name to project checksum. +  """ +  checksums = {} +  while True: +    line = f.readline() +    if line == "": +      break +    checksum, proj = line.split() +    checksums[proj] = checksum +  return checksums + + +def ValidateChecksums(reference_checksums, +                      new_checksums, +                      allow_missing_projects=False): +  """Validates that reference_checksums and new_checksums match. + +  Args: +    reference_checksums: a dict of reference checksums, mapping from a project +      name to a project checksum. +    new_checksums: a dict of checksums to be checked, mapping from a project +      name to a project checksum. +    allow_missing_projects: +      When True, reference_checksums may contain more projects than +        new_checksums. Projects missing from new_checksums are ignored. +      When False, new_checksums and reference_checksums must contain checksums +        for the same set of projects. If there is a project in +        reference_checksums, missing from new_checksums, ValidateChecksums +        will return False. + +  Returns: +    True, if checksums match with regards to allow_missing_projects flag value. +    False, otherwise. +  """ +  if not allow_missing_projects: +    if len(new_checksums) != len(reference_checksums): +      return False + +  for proj, checksum in new_checksums.iteritems(): +    # We never computed a checksum for this project. +    if proj not in reference_checksums: +      return False +    # Checksum did not match. +    if reference_checksums[proj] != checksum: +      return False + +  return True + + +if __name__ == "__main__": +  main() | 

