Coverage for src/ansible_sign/checksum/base.py: 99%
81 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-05 08:12 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-05 08:12 +0000
1import hashlib
2import os
5class InvalidChecksumLine(Exception):
6 pass
9class NoDifferException(Exception):
10 pass
13class ChecksumMismatch(Exception):
14 pass
17class ChecksumFile:
18 """
19 Slurp a checksum file and be able to check and compare its contents to a
20 given root directory. Also: be able to write out a checksum file.
22 We only allow sha256 for now, though supporting 512, etc. would be easy.
23 """
25 def __init__(self, root, differ=None):
26 self.root = root
27 if differ is not None:
28 self.differ = differ(root=self.root)
29 else:
30 from .differ.distlib_manifest import (
31 DistlibManifestChecksumFileExistenceDiffer,
32 )
34 self.differ = DistlibManifestChecksumFileExistenceDiffer(root=self.root)
36 @property
37 def differ_warnings(self):
38 """
39 A differ can store a set of warnings (as strings) in the_differ.warnings
40 which we can propagate up here. This allows calling code to display any
41 warnings found during diffing time.
42 """
44 return self.differ.warnings
46 @property
47 def warnings(self):
48 """
49 Right now this is just the same as differ_warnings. In the future it
50 might include warnings that we differ in methods in this class as well.
51 """
53 return self.differ_warnings
55 def _parse_gnu_style(self, line):
56 """
57 Attempt to parse a GNU style line checksum line, returning False if
58 we are unable to.
60 A GNU style line looks like this:
61 f712979c4c5dfe739253908d122f5c87faa8b5de6f15ba7a1548ae028ff22d13 hello_world.yml
63 Or maybe like this:
64 f82da8b4f98a3d3125fbc98408911f65dbc8dc38c0f38e258ebe290a8ad3d3c0 *binary
65 """
67 parts = line.split(" ", 1)
68 if len(parts) != 2 or len(parts[0]) != 64:
69 return False
71 if len(parts[1]) < 2 or parts[1][0] not in (" ", "*"):
72 return False
74 shasum = parts[0]
75 path = parts[1][1:]
76 return (path, shasum)
78 def parse(self, checksum_file_contents):
79 """
80 Given a complete checksum manifest as a string, parse it and return a
81 dict with the result, keyed on each filename or path.
82 """
83 checksums = {}
84 for idx, line in enumerate(checksum_file_contents.splitlines()):
85 if not line.strip():
86 continue
87 # parsed = self._parse_bsd_style(line)
88 # if parsed is False:
89 parsed = self._parse_gnu_style(line)
90 if parsed is False:
91 raise InvalidChecksumLine(f"Unparsable checksum, line {idx + 1}: {line}")
92 path = parsed[0]
93 shasum = parsed[1]
94 if path in checksums:
95 raise InvalidChecksumLine(f"Duplicate path in checksum, line {idx + 1}: {line}")
96 checksums[path] = shasum
97 return checksums
99 def diff(self, paths):
100 """
101 Given a collection of paths, use the differ to figure out which files
102 (in reality) have been added/removed from the project root (or latest
103 SCM tree).
104 """
106 paths = set(paths)
107 return self.differ.compare_filelist(paths)
109 def generate_gnu_style(self):
110 """
111 Using the root directory and 'differ' class given to the constructor,
112 generate a GNU-style checksum manifest file. This is always generated
113 from scratch by finding the list of relevant files in the root directory
114 (by asking the differ), and calculating the checksum for each of them.
116 The resulting list is always sorted by filename.
117 """
118 lines = []
119 calculated = self.calculate_checksums_from_root(verifying=False)
120 for path, checksum in sorted(calculated.items()):
121 # *two* spaces here - it's important for compat with coreutils.
122 lines.append(f"{checksum} {path}")
123 return "\n".join(lines) + "\n"
125 def calculate_checksum(self, path):
126 shasum = hashlib.sha256()
127 with open(path, "rb") as f:
128 while True:
129 chunk = f.read(65536)
130 if not chunk:
131 break
132 shasum.update(chunk)
133 return shasum.hexdigest()
135 def calculate_checksums_from_root(self, verifying):
136 """
137 Using the root of the project and the differ class passed to the
138 constructor, iterate over all files in the project and calculate their
139 checksums. Return a dictionary of the result, keyed on the filename.
141 Just calling this is not enough in many cases- you want to ensure that
142 the files in the checksum list are the same ones present in reality.
143 diff() above does just that. Use that in combination with this method,
144 or use verify() which does it for you.
145 """
146 out = {}
147 for path in self.differ.list_files(verifying=verifying):
148 shasum = self.calculate_checksum(os.path.join(self.root, path))
149 out[path] = shasum
150 return out
152 def verify(self, parsed_manifest_dct, diff=True):
153 """
154 Takes a parsed manifest file (e.g. using parse(), with paths as keys and
155 checksums as values).
157 Then calculates the current list of files in the project root. If paths
158 have been added or removed, ChecksumMismatch is raised.
160 Otherwise, each the checksum of file in the project root (and subdirs)
161 is calculated and that result is checked to be equal to the parsed
162 checksums passed in.
163 """
165 if diff: 165 ↛ 171line 165 didn't jump to line 171 because the condition on line 165 was always true
166 # If there are any differences in existing paths, fail the check...
167 differences = self.diff(parsed_manifest_dct.keys())
168 if differences["added"] or differences["removed"]:
169 raise ChecksumMismatch(differences)
171 recalculated = self.calculate_checksums_from_root(verifying=True)
172 mismatches = set()
173 for parsed_path, parsed_checksum in parsed_manifest_dct.items():
174 if recalculated[parsed_path] != parsed_checksum:
175 mismatches.add(parsed_path)
176 if mismatches:
177 raise ChecksumMismatch(f"Checksum mismatch: {', '.join(mismatches)}")
179 return True