Skip to content

Commit 0387177

Browse files
authored
Merge pull request github#1851 from hvitved/csharp/early-identify-duplicate-extraction
Approved by calumgrant
2 parents fd88f7a + 61bd9f2 commit 0387177

File tree

7 files changed

+129
-79
lines changed

7 files changed

+129
-79
lines changed

csharp/extractor/Semmle.Extraction.CSharp/Analyser.cs

Lines changed: 83 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
11
using System;
2-
using System.Collections.Immutable;
32
using Microsoft.CodeAnalysis;
43
using Microsoft.CodeAnalysis.CSharp;
5-
using Microsoft.CodeAnalysis.Diagnostics;
64
using System.IO;
75
using System.Linq;
86
using Semmle.Extraction.CSharp.Populators;
9-
using System.Runtime.InteropServices;
107
using System.Collections.Generic;
11-
using System.Text;
128
using System.Threading.Tasks;
139
using System.Diagnostics;
1410
using Semmle.Util.Logging;
@@ -40,27 +36,37 @@ public Analyser(IProgressMonitor pm, ILogger logger)
4036
CSharpCompilation compilation;
4137
Layout layout;
4238

39+
private bool init;
4340
/// <summary>
44-
/// Initialize the analyser.
41+
/// Start initialization of the analyser.
4542
/// </summary>
46-
/// <param name="commandLineArguments">Arguments passed to csc.</param>
47-
/// <param name="compilationIn">The Roslyn compilation.</param>
48-
/// <param name="options">Extractor options.</param>
4943
/// <param name="roslynArgs">The arguments passed to Roslyn.</param>
50-
public void Initialize(
51-
CSharpCommandLineArguments commandLineArguments,
52-
CSharpCompilation compilationIn,
53-
Options options,
54-
string[] roslynArgs)
44+
/// <returns>A Boolean indicating whether to proceed with extraction.</returns>
45+
public bool BeginInitialize(string[] roslynArgs)
5546
{
56-
compilation = compilationIn;
47+
return init = LogRoslynArgs(roslynArgs, Extraction.Extractor.Version);
48+
}
5749

50+
/// <summary>
51+
/// End initialization of the analyser.
52+
/// </summary>
53+
/// <param name="commandLineArguments">Arguments passed to csc.</param>
54+
/// <param name="options">Extractor options.</param>
55+
/// <param name="compilation">The Roslyn compilation.</param>
56+
/// <returns>A Boolean indicating whether to proceed with extraction.</returns>
57+
public void EndInitialize(
58+
CSharpCommandLineArguments commandLineArguments,
59+
Options options,
60+
CSharpCompilation compilation)
61+
{
62+
if (!init)
63+
throw new InternalError("EndInitialize called without BeginInitialize returning true");
5864
layout = new Layout();
5965
this.options = options;
60-
66+
this.compilation = compilation;
6167
extractor = new Extraction.Extractor(false, GetOutputName(compilation, commandLineArguments), Logger);
68+
LogDiagnostics();
6269

63-
LogDiagnostics(roslynArgs);
6470
SetReferencePaths();
6571

6672
CompilationErrors += FilteredDiagnostics.Count();
@@ -110,7 +116,7 @@ public void InitializeStandalone(CSharpCompilation compilationIn, CommonOptions
110116
layout = new Layout();
111117
extractor = new Extraction.Extractor(true, null, Logger);
112118
this.options = options;
113-
LogDiagnostics(null);
119+
LogExtractorInfo(Extraction.Extractor.Version);
114120
SetReferencePaths();
115121
}
116122

@@ -205,11 +211,6 @@ static bool FileIsUpToDate(string src, string dest)
205211
File.GetLastWriteTime(dest) >= File.GetLastWriteTime(src);
206212
}
207213

208-
bool FileIsCached(string src, string dest)
209-
{
210-
return options.Cache && FileIsUpToDate(src, dest);
211-
}
212-
213214
/// <summary>
214215
/// Extracts compilation-wide entities, such as compilations and compiler diagnostics.
215216
/// </summary>
@@ -241,7 +242,7 @@ void DoAnalyseCompilation(string cwd, string[] args)
241242
}
242243

243244
public void LogPerformance(Entities.PerformanceMetrics p) => compilationEntity.PopulatePerformance(p);
244-
245+
245246
/// <summary>
246247
/// Extract an assembly to a new trap file.
247248
/// If the trap file exists, skip extraction to avoid duplicating
@@ -259,7 +260,7 @@ void DoAnalyseAssembly(PortableExecutableReference r)
259260
var projectLayout = layout.LookupProjectOrDefault(assemblyPath);
260261
using (var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true, options.TrapCompression))
261262
{
262-
var skipExtraction = FileIsCached(assemblyPath, trapWriter.TrapFile);
263+
var skipExtraction = options.Cache && File.Exists(trapWriter.TrapFile);
263264

264265
if (!skipExtraction)
265266
{
@@ -430,29 +431,74 @@ public void Dispose()
430431
public int TotalErrors => CompilationErrors + ExtractorErrors;
431432

432433
/// <summary>
433-
/// Logs detailed information about this invocation,
434-
/// in the event that errors were detected.
434+
/// Logs information about the extractor.
435435
/// </summary>
436-
/// <param name="roslynArgs">The arguments passed to Roslyn.</param>
437-
public void LogDiagnostics(string[] roslynArgs)
436+
public void LogExtractorInfo(string extractorVersion)
438437
{
439438
Logger.Log(Severity.Info, " Extractor: {0}", Environment.GetCommandLineArgs().First());
440-
if (extractor != null)
441-
Logger.Log(Severity.Info, " Extractor version: {0}", extractor.Version);
442-
439+
Logger.Log(Severity.Info, " Extractor version: {0}", extractorVersion);
443440
Logger.Log(Severity.Info, " Current working directory: {0}", Directory.GetCurrentDirectory());
441+
}
442+
443+
/// <summary>
444+
/// Logs information about the extractor, as well as the arguments to Roslyn.
445+
/// </summary>
446+
/// <param name="roslynArgs">The arguments passed to Roslyn.</param>
447+
/// <returns>A Boolean indicating whether the same arguments have been logged previously.</returns>
448+
public bool LogRoslynArgs(string[] roslynArgs, string extractorVersion)
449+
{
450+
LogExtractorInfo(extractorVersion);
451+
Logger.Log(Severity.Info, $" Arguments to Roslyn: {string.Join(' ', roslynArgs)}");
444452

445-
if (roslynArgs != null)
453+
var csharpLogDir = Extractor.GetCSharpLogDirectory();
454+
var tempFile = Path.Combine(csharpLogDir, $"csharp.{Path.GetRandomFileName()}.txt");
455+
456+
bool argsWritten;
457+
using (var streamWriter = new StreamWriter(new FileStream(tempFile, FileMode.Append, FileAccess.Write)))
446458
{
447-
Logger.Log(Severity.Info, $" Arguments to Roslyn: {string.Join(' ', roslynArgs)}");
459+
streamWriter.WriteLine($"# Arguments to Roslyn: {string.Join(' ', roslynArgs.Where(arg => !arg.StartsWith('@')))}");
460+
argsWritten = roslynArgs.WriteCommandLine(streamWriter);
461+
}
462+
463+
var hash = FileUtils.ComputeFileHash(tempFile);
464+
var argsFile = Path.Combine(csharpLogDir, $"csharp.{hash}.txt");
448465

449-
// Create a new file in the log folder.
450-
var argsFile = Path.Combine(Extractor.GetCSharpLogDirectory(), $"csharp.{Path.GetRandomFileName()}.txt");
466+
if (argsWritten)
467+
Logger.Log(Severity.Info, $" Arguments have been written to {argsFile}");
451468

452-
if (roslynArgs.ArchiveCommandLine(argsFile))
453-
Logger.Log(Severity.Info, $" Arguments have been written to {argsFile}");
469+
if (File.Exists(argsFile))
470+
{
471+
try
472+
{
473+
File.Delete(tempFile);
474+
}
475+
catch (IOException e)
476+
{
477+
Logger.Log(Severity.Warning, $" Failed to remove {tempFile}: {e.Message}");
478+
}
479+
return false;
454480
}
455481

482+
try
483+
{
484+
File.Move(tempFile, argsFile);
485+
}
486+
catch (IOException e)
487+
{
488+
Logger.Log(Severity.Warning, $" Failed to move {tempFile} to {argsFile}: {e.Message}");
489+
}
490+
491+
return true;
492+
}
493+
494+
495+
/// <summary>
496+
/// Logs detailed information about this invocation,
497+
/// in the event that errors were detected.
498+
/// </summary>
499+
/// <returns>A Boolean indicating whether to proceed with extraction.</returns>
500+
public void LogDiagnostics()
501+
{
456502
foreach (var error in FilteredDiagnostics)
457503
{
458504
Logger.Log(Severity.Error, " Compilation error: {0}", error);

csharp/extractor/Semmle.Extraction.CSharp/Extractor.cs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,12 @@ public static ExitCode Run(string[] args)
109109
return ExitCode.Failed;
110110
}
111111

112+
if (!analyser.BeginInitialize(compilerVersion.ArgsWithResponse))
113+
{
114+
logger.Log(Severity.Info, "Skipping extraction since files have already been extracted");
115+
return ExitCode.Ok;
116+
}
117+
112118
var referenceTasks = ResolveReferences(compilerArguments, analyser, canonicalPathCache, references);
113119

114120
var syntaxTrees = new List<SyntaxTree>();
@@ -131,7 +137,6 @@ public static ExitCode Run(string[] args)
131137
{
132138
logger.Log(Severity.Error, " No source files");
133139
++analyser.CompilationErrors;
134-
analyser.LogDiagnostics(compilerVersion.ArgsWithResponse);
135140
return ExitCode.Failed;
136141
}
137142

@@ -149,7 +154,7 @@ public static ExitCode Run(string[] args)
149154
// already.
150155
);
151156

152-
analyser.Initialize(compilerArguments, compilation, commandLineArguments, compilerVersion.ArgsWithResponse);
157+
analyser.EndInitialize(compilerArguments, commandLineArguments, compilation);
153158
analyser.AnalyseCompilation(cwd, args);
154159
analyser.AnalyseReferences();
155160

@@ -175,7 +180,7 @@ public static ExitCode Run(string[] args)
175180
{
176181
Frontend = new Entities.Timings() { Elapsed = sw1.Elapsed, Cpu = cpuTime1, User = userTime1 },
177182
Extractor = new Entities.Timings() { Elapsed = sw2.Elapsed, Cpu = cpuTime2 - cpuTime1, User = userTime2 - userTime1 },
178-
Total = new Entities.Timings() { Elapsed = stopwatch.Elapsed, Cpu=cpuTime2, User = userTime2 },
183+
Total = new Entities.Timings() { Elapsed = stopwatch.Elapsed, Cpu = cpuTime2, User = userTime2 },
179184
PeakWorkingSet = currentProcess.PeakWorkingSet64
180185
};
181186

csharp/extractor/Semmle.Extraction.Tests/Options.cs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -183,26 +183,25 @@ public void ShowingHelp()
183183
public void Fast()
184184
{
185185
Environment.SetEnvironmentVariable("LGTM_INDEX_EXTRACTOR", "--fast");
186-
options = CSharp.Options.CreateWithEnvironment(new string[] {});
186+
options = CSharp.Options.CreateWithEnvironment(new string[] { });
187187
Assert.True(options.Fast);
188188
}
189189

190190
[Fact]
191191
public void ArchiveArguments()
192192
{
193-
var file1 = Path.GetTempFileName();
194-
var file2 = Path.GetTempFileName();
193+
var sw = new StringWriter();
194+
var file = Path.GetTempFileName();
195195

196196
try
197197
{
198-
File.AppendAllText(file1, "Test");
199-
new string[] { "/noconfig", "@" + file1 }.ArchiveCommandLine(file2);
200-
Assert.Equal("Test", File.ReadAllText(file2));
198+
File.AppendAllText(file, "Test");
199+
new string[] { "/noconfig", "@" + file }.WriteCommandLine(sw);
200+
Assert.Equal("Test\n", sw.ToString());
201201
}
202202
finally
203203
{
204-
File.Delete(file1);
205-
File.Delete(file2);
204+
File.Delete(file);
206205
}
207206
}
208207
}

csharp/extractor/Semmle.Extraction/Extractor.cs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,6 @@ public interface IExtractor
7979
/// </summary>
8080
ILogger Logger { get; }
8181

82-
/// <summary>
83-
/// The extractor SHA, obtained from the git log.
84-
/// </summary>
85-
string Version { get; }
86-
8782
/// <summary>
8883
/// Creates a new context.
8984
/// </summary>
@@ -201,6 +196,6 @@ public string OutputPath
201196

202197
public ILogger Logger { get; private set; }
203198

204-
public string Version => $"{ThisAssembly.Git.BaseTag} ({ThisAssembly.Git.Sha})";
199+
public static string Version => $"{ThisAssembly.Git.BaseTag} ({ThisAssembly.Git.Sha})";
205200
}
206201
}

csharp/extractor/Semmle.Extraction/TrapWriter.cs

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
using System;
44
using System.IO;
55
using System.IO.Compression;
6-
using System.Security.Cryptography;
76
using System.Text;
87

98
namespace Semmle.Extraction
@@ -182,8 +181,8 @@ public void Dispose()
182181
return;
183182
}
184183

185-
var existingHash = ComputeHash(TrapFile);
186-
var hash = ComputeHash(tmpFile);
184+
var existingHash = FileUtils.ComputeFileHash(TrapFile);
185+
var hash = FileUtils.ComputeFileHash(tmpFile);
187186
if (existingHash != hash)
188187
{
189188
var root = TrapFile.Substring(0, TrapFile.Length - 8); // Remove trailing ".trap.gz"
@@ -205,22 +204,6 @@ public void Emit(ITrapEmitter emitter)
205204
emitter.EmitTrap(Writer);
206205
}
207206

208-
/// <summary>
209-
/// Computes the hash of <paramref name="filePath"/>.
210-
/// </summary>
211-
static string ComputeHash(string filePath)
212-
{
213-
using (var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read))
214-
using (var shaAlg = new SHA256Managed())
215-
{
216-
var sha = shaAlg.ComputeHash(fileStream);
217-
var hex = new StringBuilder(sha.Length * 2);
218-
foreach (var b in sha)
219-
hex.AppendFormat("{0:x2}", b);
220-
return hex.ToString();
221-
}
222-
}
223-
224207
/// <summary>
225208
/// Attempts to archive the specified input file to the normal area of the source archive.
226209
/// The file's path must be sufficiently short so as to render the path of its copy in the

csharp/extractor/Semmle.Util/CommandLineExtensions.cs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,20 @@ public static class CommandLineExtensions
1111
/// Subsequent "@" arguments are ignored.
1212
/// </summary>
1313
/// <param name="commandLineArguments">The raw command line arguments.</param>
14-
/// <param name="filename">The full filename to write to.</param>
14+
/// <param name="textWriter">The writer to archive to.</param>
1515
/// <returns>True iff the file was written.</returns>
16-
public static bool ArchiveCommandLine(this IEnumerable<string> commandLineArguments, string filename)
16+
public static bool WriteCommandLine(this IEnumerable<string> commandLineArguments, TextWriter textWriter)
1717
{
18-
foreach (var arg in commandLineArguments.Where(arg => arg[0] == '@').Select(arg => arg.Substring(1)))
18+
var found = false;
19+
foreach (var arg in commandLineArguments.Where(arg => arg.StartsWith('@')).Select(arg => arg.Substring(1)))
1920
{
20-
File.Copy(arg, filename, true);
21-
return true;
21+
string line;
22+
using (StreamReader file = new StreamReader(arg))
23+
while ((line = file.ReadLine()) != null)
24+
textWriter.WriteLine(line);
25+
found = true;
2226
}
23-
return false;
27+
return found;
2428
}
2529
}
2630
}

csharp/extractor/Semmle.Util/FileUtils.cs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
using System;
22
using System.IO;
33
using System.Linq;
4+
using System.Security.Cryptography;
5+
using System.Text;
46

57
namespace Semmle.Util
68
{
@@ -78,5 +80,21 @@ public static string FindProgramOnPath(string prog)
7880
var candidates = paths?.Where(path => exes.Any(exe0 => File.Exists(Path.Combine(path, exe0))));
7981
return candidates?.FirstOrDefault();
8082
}
83+
84+
/// <summary>
85+
/// Computes the hash of <paramref name="filePath"/>.
86+
/// </summary>
87+
public static string ComputeFileHash(string filePath)
88+
{
89+
using (var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read))
90+
using (var shaAlg = new SHA256Managed())
91+
{
92+
var sha = shaAlg.ComputeHash(fileStream);
93+
var hex = new StringBuilder(sha.Length * 2);
94+
foreach (var b in sha)
95+
hex.AppendFormat("{0:x2}", b);
96+
return hex.ToString();
97+
}
98+
}
8199
}
82100
}

0 commit comments

Comments
 (0)