Skip to content

Commit 93f7c95

Browse files
authored
Merge pull request github#3152 from dbartol/dbartol/sync-files
Move `sync-identical-files.py` into public repo as `sync-files.py`
2 parents e31143c + 3eef274 commit 93f7c95

File tree

2 files changed

+143
-0
lines changed

2 files changed

+143
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
.vs/*
1515
!.vs/VSWorkspaceSettings.json
1616

17+
# Byte-compiled python files
18+
*.pyc
19+
1720
# It's useful (though not required) to be able to unpack codeql in the ql checkout itself
1821
/codeql/
1922
.vscode/settings.json

config/sync-files.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/usr/bin/env python3
2+
3+
# Due to various technical limitations, we sometimes have files that need to be
4+
# kept identical in the repository. This script loads a database of such
5+
# files and can perform two functions: check whether they are still identical,
6+
# and overwrite the others with a master copy if needed.
7+
8+
import hashlib
9+
import shutil
10+
import os
11+
import sys
12+
import json
13+
import re
14+
path = os.path
15+
16+
file_groups = {}
17+
18+
def add_prefix(prefix, relative):
19+
result = path.join(prefix, relative)
20+
if path.commonprefix((path.realpath(result), path.realpath(prefix))) != \
21+
path.realpath(prefix):
22+
raise Exception("Path {} is not below {}".format(
23+
result, prefix))
24+
return result
25+
26+
def load_if_exists(prefix, json_file_relative):
27+
json_file_name = path.join(prefix, json_file_relative)
28+
if path.isfile(json_file_name):
29+
print("Loading file groups from", json_file_name)
30+
with open(json_file_name, 'r', encoding='utf-8') as fp:
31+
raw_groups = json.load(fp)
32+
prefixed_groups = {
33+
name: [
34+
add_prefix(prefix, relative)
35+
for relative in relatives
36+
]
37+
for name, relatives in raw_groups.items()
38+
}
39+
file_groups.update(prefixed_groups)
40+
41+
# Generates a list of C# test files that should be in sync
42+
def csharp_test_files():
43+
test_file_re = re.compile('.*(Bad|Good)[0-9]*\\.cs$')
44+
csharp_doc_files = {
45+
file:os.path.join(root, file)
46+
for root, dirs, files in os.walk("csharp/ql/src")
47+
for file in files
48+
if test_file_re.match(file)
49+
}
50+
return {
51+
"C# test '" + file + "'" : [os.path.join(root, file), csharp_doc_files[file]]
52+
for root, dirs, files in os.walk("csharp/ql/test")
53+
for file in files
54+
if file in csharp_doc_files
55+
}
56+
57+
def file_checksum(filename):
58+
with open(filename, 'rb') as file_handle:
59+
return hashlib.sha1(file_handle.read()).hexdigest()
60+
61+
def check_group(group_name, files, master_file_picker, emit_error):
62+
checksums = {file_checksum(f) for f in files}
63+
64+
if len(checksums) == 1:
65+
return
66+
67+
master_file = master_file_picker(files)
68+
if master_file is None:
69+
emit_error(__file__, 0,
70+
"Files from group '"+ group_name +"' not in sync.")
71+
emit_error(__file__, 0,
72+
"Run this script with a file-name argument among the "
73+
"following to overwrite the remaining files with the contents "
74+
"of that file or run with the --latest switch to update each "
75+
"group of files from the most recently modified file in the group.")
76+
for filename in files:
77+
emit_error(__file__, 0, " " + filename)
78+
else:
79+
print(" Syncing others from", master_file)
80+
for filename in files:
81+
if filename == master_file:
82+
continue
83+
print(" " + filename)
84+
os.replace(filename, filename + '~')
85+
shutil.copy(master_file, filename)
86+
print(" Backups written with '~' appended to file names")
87+
88+
def chdir_repo_root():
89+
root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')
90+
os.chdir(root_path)
91+
92+
def choose_master_file(master_file, files):
93+
if master_file in files:
94+
return master_file
95+
else:
96+
return None
97+
98+
def choose_latest_file(files):
99+
latest_time = None
100+
latest_file = None
101+
for filename in files:
102+
file_time = os.path.getmtime(filename)
103+
if (latest_time is None) or (latest_time < file_time):
104+
latest_time = file_time
105+
latest_file = filename
106+
return latest_file
107+
108+
local_error_count = 0
109+
def emit_local_error(path, line, error):
110+
print('ERROR: ' + path + ':' + line + " - " + error)
111+
global local_error_count
112+
local_error_count += 1
113+
114+
# This function is invoked directly by a CI script, which passes a different error-handling
115+
# callback.
116+
def sync_identical_files(emit_error):
117+
if len(sys.argv) == 1:
118+
master_file_picker = lambda files: None
119+
elif len(sys.argv) == 2:
120+
if sys.argv[1] == "--latest":
121+
master_file_picker = choose_latest_file
122+
elif os.path.isfile(sys.argv[1]):
123+
master_file_picker = lambda files: choose_master_file(sys.argv[1], files)
124+
else:
125+
raise Exception("File not found")
126+
else:
127+
raise Exception("Bad command line or file not found")
128+
chdir_repo_root()
129+
load_if_exists('.', 'config/identical-files.json')
130+
file_groups.update(csharp_test_files())
131+
for group_name, files in file_groups.items():
132+
check_group(group_name, files, master_file_picker, emit_error)
133+
134+
def main():
135+
sync_identical_files(emit_local_error)
136+
if local_error_count > 0:
137+
exit(1)
138+
139+
if __name__ == "__main__":
140+
main()

0 commit comments

Comments
 (0)