From 19c9b9eb00ae9afa7bedb10bd6be06bb41a880f6 Mon Sep 17 00:00:00 2001 From: Jan Jones Date: Tue, 11 Feb 2025 10:35:29 +0100 Subject: [PATCH] Detect data section string literal hash collisions (#77061) * Detect data section string literal hash collisions * Move check into GetOrAdd * Remove VB error * Remove test only option from equality * Truncate the string in the error message * Update the spec * Update the spec --- docs/features/string-literals-data-section.md | 18 ++++++++++- .../CSharp/Portable/CSharpResources.resx | 3 ++ .../CSharp/Portable/Errors/ErrorCode.cs | 2 ++ .../CSharp/Portable/Errors/ErrorFacts.cs | 4 ++- .../CSharp/Portable/Errors/MessageProvider.cs | 1 + .../Portable/xlf/CSharpResources.cs.xlf | 5 ++++ .../Portable/xlf/CSharpResources.de.xlf | 5 ++++ .../Portable/xlf/CSharpResources.es.xlf | 5 ++++ .../Portable/xlf/CSharpResources.fr.xlf | 5 ++++ .../Portable/xlf/CSharpResources.it.xlf | 5 ++++ .../Portable/xlf/CSharpResources.ja.xlf | 5 ++++ .../Portable/xlf/CSharpResources.ko.xlf | 5 ++++ .../Portable/xlf/CSharpResources.pl.xlf | 5 ++++ .../Portable/xlf/CSharpResources.pt-BR.xlf | 5 ++++ .../Portable/xlf/CSharpResources.ru.xlf | 5 ++++ .../Portable/xlf/CSharpResources.tr.xlf | 5 ++++ .../Portable/xlf/CSharpResources.zh-Hans.xlf | 5 ++++ .../Portable/xlf/CSharpResources.zh-Hant.xlf | 5 ++++ .../Test/Emit/Emit/EmitMetadataTests.cs | 21 +++++++++++++ .../Test/Syntax/Diagnostics/DiagnosticTest.cs | 1 + .../CodeAnalysisTest/Emit/EmitOptionsTests.cs | 3 +- .../CodeGen/PrivateImplementationDetails.cs | 30 +++++++++++++++---- .../Diagnostic/CommonMessageProvider.cs | 1 + .../Core/Portable/Emit/EmitOptions.cs | 2 ++ .../Test/Core/Mocks/TestMessageProvider.cs | 2 ++ .../VisualBasic/Portable/Errors/ErrorFacts.vb | 1 + .../Portable/Errors/MessageProvider.vb | 6 ++++ .../CSharpLspBuildOnlyDiagnostics.cs | 3 +- 28 files changed, 154 insertions(+), 9 deletions(-) diff --git a/docs/features/string-literals-data-section.md b/docs/features/string-literals-data-section.md index 133416a2cfa36..adda3e666eaf0 100644 --- a/docs/features/string-literals-data-section.md +++ b/docs/features/string-literals-data-section.md @@ -54,7 +54,7 @@ The utf8 string literal encoding emit strategy emits `ldsfld` of a field in a ge For every unique string literal, a unique internal static class is generated which: - has name composed of `` followed by a hex-encoded XXH128 hash of the string - (collisions [should not happen][xxh128] with XXH128 and so they aren't currently detected or reported, the behavior in that case is undefined), + (collisions [should not happen][xxh128] with XXH128, but if there are string literals which would result in the same XXH128 hash, a compile-time error is reported), - is nested in the `` type to avoid polluting the global namespace and to avoid having to enforce name uniqueness across modules, - has one internal static readonly `string` field which is initialized in a static constructor of the class, @@ -62,6 +62,7 @@ For every unique string literal, a unique internal static class is generated whi There is also an internal static readonly `.data` field generated into `` containing the actual bytes, similar to [u8 string literals][u8-literals] and [constant array initializers][constant-array-init]. +This field uses hex-encoded SHA-256 hash for its name and collisions are currently not reported by the compiler. These other scenarios might also reuse the data field, e.g., the following statements could all reuse the same data field: ```cs @@ -241,6 +242,9 @@ This fixup phase already exists in the compiler in `MetadataWriter.WriteInstruct It is called from `SerializeMethodBodies` which precedes `PopulateSystemTables` call, hence synthesizing the utf8 string classes in the former should be possible and they would be emitted in the latter. +Alternatively, we could collect string literals during binding, then before emit sort them by length and content (for determinism) +to find the ones that are over the threshold and should be emitted with this new strategy. + ### Statistics The compiler could emit an info diagnostic with useful statistics for customers to determine what threshold to set. @@ -302,6 +306,18 @@ The compiler should report a diagnostic when the feature is enabled together wit `[assembly: System.Runtime.CompilerServices.CompilationRelaxations(0)]`, i.e., string interning enabled, because that is incompatible with the feature. +### Avoiding hash collisions + +Instead of XXH128 for the type names and SHA-256 for the data field names, we could use index-based names. +- The compiler could assign names lazily based on metadata tokens which are deterministic. + If building on the current approach, that might require some refactoring, + because internal data structures in the compiler might not be ready for lazy names like that. + But it would be easier if combined with the first strategy suggested for [automatic threshold](#automatic-threshold) above, + where we would not synthesize the types until very late in the emit phase (during fixup of the metadata tokens). +- We could build on the second strategy suggested for [automatic threshold](#automatic-threshold) where we would collect string literals during binding + (and perhaps also constant arrays and u8 strings if we want to extend this support to them as well), + then before emit we would sort them by length and content and assign indices to them to be then used for the synthesized names. + [u8-literals]: https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/proposals/csharp-11.0/utf8-string-literals [constant-array-init]: https://github.com/dotnet/roslyn/pull/24621 diff --git a/src/Compilers/CSharp/Portable/CSharpResources.resx b/src/Compilers/CSharp/Portable/CSharpResources.resx index 13c398af16501..bdb7325e53256 100644 --- a/src/Compilers/CSharp/Portable/CSharpResources.resx +++ b/src/Compilers/CSharp/Portable/CSharpResources.resx @@ -8047,4 +8047,7 @@ To remove the warning, you can use /reference instead (set the Embed Interop Typ 'InterceptsLocationAttribute(string, int, int)' is not supported. Move to 'InterceptableLocation'-based generation of these attributes instead. (https://github.com/dotnet/roslyn/issues/72133) + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + diff --git a/src/Compilers/CSharp/Portable/Errors/ErrorCode.cs b/src/Compilers/CSharp/Portable/Errors/ErrorCode.cs index 83f128759bf41..53439e87d1b0f 100644 --- a/src/Compilers/CSharp/Portable/Errors/ErrorCode.cs +++ b/src/Compilers/CSharp/Portable/Errors/ErrorCode.cs @@ -2360,6 +2360,8 @@ internal enum ErrorCode ERR_ImplicitlyTypedParamsParameter = 9272, ERR_VariableDeclarationNamedField = 9273, + ERR_DataSectionStringLiteralHashCollision = 9274, + // Note: you will need to do the following after adding errors: // 1) Update ErrorFacts.IsBuildOnlyDiagnostic (src/Compilers/CSharp/Portable/Errors/ErrorFacts.cs) diff --git a/src/Compilers/CSharp/Portable/Errors/ErrorFacts.cs b/src/Compilers/CSharp/Portable/Errors/ErrorFacts.cs index f6e520514814b..c3df3adc794b8 100644 --- a/src/Compilers/CSharp/Portable/Errors/ErrorFacts.cs +++ b/src/Compilers/CSharp/Portable/Errors/ErrorFacts.cs @@ -637,7 +637,9 @@ or ErrorCode.ERR_InterceptableMethodMustBeOrdinary or ErrorCode.ERR_PossibleAsyncIteratorWithoutYield or ErrorCode.ERR_PossibleAsyncIteratorWithoutYieldOrAwait or ErrorCode.ERR_RefLocalAcrossAwait - // Update src\EditorFeatures\CSharp\LanguageServer\CSharpLspBuildOnlyDiagnostics.cs + or ErrorCode.ERR_DataSectionStringLiteralHashCollision + // Update src\Features\CSharp\Portable\Diagnostics\LanguageServer\CSharpLspBuildOnlyDiagnostics.cs + // and TestIsBuildOnlyDiagnostic in src\Compilers\CSharp\Test\Syntax\Diagnostics\DiagnosticTest.cs // whenever new values are added here. => true, diff --git a/src/Compilers/CSharp/Portable/Errors/MessageProvider.cs b/src/Compilers/CSharp/Portable/Errors/MessageProvider.cs index cdd1ca8b7f5f2..ba05a944c54bd 100644 --- a/src/Compilers/CSharp/Portable/Errors/MessageProvider.cs +++ b/src/Compilers/CSharp/Portable/Errors/MessageProvider.cs @@ -250,6 +250,7 @@ public override void ReportDuplicateMetadataReferenceWeak(DiagnosticBag diagnost public override int ERR_EncUpdateFailedMissingSymbol => (int)ErrorCode.ERR_EncUpdateFailedMissingSymbol; public override int ERR_InvalidDebugInfo => (int)ErrorCode.ERR_InvalidDebugInfo; public override int ERR_FunctionPointerTypesInAttributeNotSupported => (int)ErrorCode.ERR_FunctionPointerTypesInAttributeNotSupported; + public override int ERR_DataSectionStringLiteralHashCollision => (int)ErrorCode.ERR_DataSectionStringLiteralHashCollision; // Generators: public override int WRN_GeneratorFailedDuringInitialization => (int)ErrorCode.WRN_GeneratorFailedDuringInitialization; diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.cs.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.cs.xlf index 9b514141b8ee4..5720459cb38ce 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.cs.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.cs.xlf @@ -537,6 +537,11 @@ Kopírovací konstruktor {0} musí být veřejný nebo chráněný, protože záznam není zapečetěný. + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. Název {0} neodpovídá příslušnému parametru Deconstruct {1}. diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.de.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.de.xlf index 08914ed33405f..2ac6b42fb054d 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.de.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.de.xlf @@ -537,6 +537,11 @@ Der Kopierkonstruktor "{0}" muss öffentlich oder geschützt sein, weil der Datensatz nicht versiegelt ist. + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. Der Name "{0}" stimmt nicht mit dem entsprechenden Deconstruct-Parameter "{1}" überein. diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.es.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.es.xlf index 97fd9dabc3794..e776b15f4336c 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.es.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.es.xlf @@ -537,6 +537,11 @@ Un constructor de copia "{0}" debe ser público o estar protegido porque el registro no está sellado. + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. El nombre "{0}" no coincide con el parámetro de "Deconstruct" correspondiente, "{1}". diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.fr.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.fr.xlf index aed63cb49673e..ddceebdb38625 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.fr.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.fr.xlf @@ -537,6 +537,11 @@ Un constructeur de copie '{0}' doit être public ou protégé, car l'enregistrement n'est pas sealed. + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. Le nom '{0}' ne correspond pas au paramètre 'Deconstruct' correspondant '{1}'. diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.it.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.it.xlf index c688b9962573d..fd94a9ee799ce 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.it.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.it.xlf @@ -537,6 +537,11 @@ Un costruttore di copia '{0}' deve essere pubblico o protetto perché il record non è sealed. + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. Il nome '{0}' non corrisponde al parametro '{1}' di 'Deconstruct' corrispondente. diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.ja.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.ja.xlf index f005a99296f9a..65647430f1f04 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.ja.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.ja.xlf @@ -537,6 +537,11 @@ コピー コンストラクター '{0}' は、レコードが sealed ではないため、public または protected にする必要があります。 + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. 名前 '{0}' は対応する 'Deconstruct' パラメーター '{1}' と一致しません。 diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.ko.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.ko.xlf index 3d3a95f5953ff..b01d67981c2c0 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.ko.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.ko.xlf @@ -537,6 +537,11 @@ 레코드가 봉인되지 않았으므로 복사 생성자 '{0}'이(가) 퍼블릭이거나 보호되어야 합니다. + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. '{0}' 이름이 해당 'Deconstruct' 매개 변수 '{1}'과(와) 일치하지 않습니다. diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.pl.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.pl.xlf index 2e86434383d85..57dbceb0ec44e 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.pl.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.pl.xlf @@ -537,6 +537,11 @@ Konstruktor kopiujący „{0}” musi być publiczny lub chroniony, ponieważ rekord nie jest zapieczętowany. + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. Nazwa „{0}” nie jest zgodna z odpowiednim parametrem „Deconstruct” „{1}”. diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.pt-BR.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.pt-BR.xlf index 060041b7ff0b1..d0d94f1330ea7 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.pt-BR.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.pt-BR.xlf @@ -537,6 +537,11 @@ Um construtor de cópia '{0}' precisa ser público ou protegido porque o registro não está selado. + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. O nome '{0}' não corresponde ao parâmetro 'Deconstruct' '{1}'. diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.ru.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.ru.xlf index c041828059022..2c1f011baab3d 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.ru.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.ru.xlf @@ -537,6 +537,11 @@ Конструктор копий "{0}" должен быть открытым или защищенным, так как запись не запечатана. + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. Имя "{0}" не соответствует указанному параметру "Deconstruct" "{1}". diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.tr.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.tr.xlf index 5fb5ce580e40c..4e5bd6417ef0e 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.tr.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.tr.xlf @@ -537,6 +537,11 @@ Kayıt mühürlü olmadığından, '{0}' kopya oluşturucusu genel veya korumalı olmalıdır. + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. '{0}' adı ilgili '{1}' 'Deconstruct' parametresiyle eşleşmiyor. diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.zh-Hans.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.zh-Hans.xlf index d338387f671e6..2ba44fcc0e1bd 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.zh-Hans.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.zh-Hans.xlf @@ -537,6 +537,11 @@ 复制构造函数“{0}”必须是公共的或受保护的,因为该记录未密封。 + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. 名称“{0}”与相应 "Deconstruct" 参数“{1}”不匹配。 diff --git a/src/Compilers/CSharp/Portable/xlf/CSharpResources.zh-Hant.xlf b/src/Compilers/CSharp/Portable/xlf/CSharpResources.zh-Hant.xlf index f67ddd602bd43..4ae88a2fecd1e 100644 --- a/src/Compilers/CSharp/Portable/xlf/CSharpResources.zh-Hant.xlf +++ b/src/Compilers/CSharp/Portable/xlf/CSharpResources.zh-Hant.xlf @@ -537,6 +537,11 @@ 因為記錄不是密封的,所以複製建構函式 '{0}' 必須是公用或受保護。 + + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: {0} + + The name '{0}' does not match the corresponding 'Deconstruct' parameter '{1}'. 名稱 '{0}' 與對應的 'Deconstruct' 參數 '{1}' 不相符。 diff --git a/src/Compilers/CSharp/Test/Emit/Emit/EmitMetadataTests.cs b/src/Compilers/CSharp/Test/Emit/Emit/EmitMetadataTests.cs index 126f4d6ed3bbd..c8577b654b471 100644 --- a/src/Compilers/CSharp/Test/Emit/Emit/EmitMetadataTests.cs +++ b/src/Compilers/CSharp/Test/Emit/Emit/EmitMetadataTests.cs @@ -3175,6 +3175,27 @@ .maxstack 1 """); } + [Fact] + public void DataSectionStringLiterals_HashCollision() + { + var emitOptions = new EmitOptions + { + // Take only the first byte of each string as its hash to simulate collisions. + TestOnly_DataToHexViaXxHash128 = static (data) => data[0].ToString(), + }; + var source = """ + System.Console.Write("a"); + System.Console.Write("b"); + System.Console.Write("aa"); + """; + CreateCompilation(source, + parseOptions: TestOptions.Regular.WithFeature("experimental-data-section-string-literals", "0")) + .VerifyEmitDiagnostics(emitOptions, + // (3,22): error CS9274: Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal: a + // System.Console.Write("aa"); + Diagnostic(ErrorCode.ERR_DataSectionStringLiteralHashCollision, @"""aa""").WithArguments("a").WithLocation(3, 22)); + } + [Fact] public void DataSectionStringLiterals_SynthesizedTypes() { diff --git a/src/Compilers/CSharp/Test/Syntax/Diagnostics/DiagnosticTest.cs b/src/Compilers/CSharp/Test/Syntax/Diagnostics/DiagnosticTest.cs index e2c7ddb41920c..0f68ba4c2475f 100644 --- a/src/Compilers/CSharp/Test/Syntax/Diagnostics/DiagnosticTest.cs +++ b/src/Compilers/CSharp/Test/Syntax/Diagnostics/DiagnosticTest.cs @@ -2991,6 +2991,7 @@ public void TestIsBuildOnlyDiagnostic() case ErrorCode.ERR_PossibleAsyncIteratorWithoutYield: case ErrorCode.ERR_PossibleAsyncIteratorWithoutYieldOrAwait: case ErrorCode.ERR_RefLocalAcrossAwait: + case ErrorCode.ERR_DataSectionStringLiteralHashCollision: Assert.True(isBuildOnly, $"Check failed for ErrorCode.{errorCode}"); break; diff --git a/src/Compilers/Core/CodeAnalysisTest/Emit/EmitOptionsTests.cs b/src/Compilers/Core/CodeAnalysisTest/Emit/EmitOptionsTests.cs index 17db9893fe02c..16fafd03acb16 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Emit/EmitOptionsTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Emit/EmitOptionsTests.cs @@ -92,7 +92,8 @@ public void TestFieldsForEqualsAndGetHashCode() nameof(EmitOptions.IncludePrivateMembers), nameof(EmitOptions.InstrumentationKinds), nameof(EmitOptions.DefaultSourceFileEncoding), - nameof(EmitOptions.FallbackSourceFileEncoding)); + nameof(EmitOptions.FallbackSourceFileEncoding), + nameof(EmitOptions.TestOnly_DataToHexViaXxHash128)); } [Fact] diff --git a/src/Compilers/Core/Portable/CodeGen/PrivateImplementationDetails.cs b/src/Compilers/Core/Portable/CodeGen/PrivateImplementationDetails.cs index 167f24f09be32..4866750134114 100644 --- a/src/Compilers/Core/Portable/CodeGen/PrivateImplementationDetails.cs +++ b/src/Compilers/Core/Portable/CodeGen/PrivateImplementationDetails.cs @@ -100,6 +100,9 @@ internal sealed class PrivateImplementationDetails : DefaultTypeDef, Cci.INamesp // data section string literal holders (key is the full string literal) private readonly ConcurrentDictionary _dataSectionStringLiteralTypes = new ConcurrentDictionary(); + // map of data section string literal generated type names ( + hash) to the full text + private readonly ConcurrentDictionary _dataSectionStringLiteralNames = new ConcurrentDictionary(); + private ImmutableArray _orderedNestedTypes; internal PrivateImplementationDetails( @@ -333,16 +336,28 @@ internal MappedField GetOrAddDataField(ImmutableArray data, ushort alignme } var @this = moduleBuilder.GetPrivateImplClass(syntaxNode, diagnostics); - return @this._dataSectionStringLiteralTypes.GetOrAdd(text, static (key, arg) => + return @this._dataSectionStringLiteralTypes.GetOrAdd(text, static (text, arg) => { - var (@this, data, diagnostics) = arg; + var (@this, data, syntaxNode, diagnostics) = arg; - string name = "" + DataToHexViaXxHash128(data); + string name = "" + @this.DataToHexViaXxHash128(data); MappedField dataField = @this.GetOrAddDataField(data, alignment: 1); Cci.IMethodDefinition bytesToStringHelper = @this.GetOrSynthesizeBytesToStringHelper(diagnostics); + var previousText = @this._dataSectionStringLiteralNames.GetOrAdd(name, text); + if (previousText != text) + { + // If there is a hash collision, we cannot fallback to normal string literal emit strategy + // because the selection of which literal would get which emit strategy would not be deterministic. + var messageProvider = @this.ModuleBuilder.CommonCompilation.MessageProvider; + diagnostics.Add(messageProvider.CreateDiagnostic( + messageProvider.ERR_DataSectionStringLiteralHashCollision, + syntaxNode.GetLocation(), + previousText[..Math.Min(previousText.Length, 500)])); + } + return new DataSectionStringType( name: name, containingType: @this, @@ -350,7 +365,7 @@ internal MappedField GetOrAddDataField(ImmutableArray data, ushort alignme bytesToStringHelper: bytesToStringHelper, diagnostics: diagnostics); }, - (@this, ImmutableCollectionsMarshal.AsImmutableArray(data), diagnostics)).Field; + (@this, ImmutableCollectionsMarshal.AsImmutableArray(data), syntaxNode, diagnostics)).Field; } /// @@ -548,8 +563,13 @@ private static string DataToHex(ImmutableArray data) return HashToHex(hash.AsSpan()); } - private static string DataToHexViaXxHash128(ImmutableArray data) + private string DataToHexViaXxHash128(ImmutableArray data) { + if (ModuleBuilder.EmitOptions.TestOnly_DataToHexViaXxHash128 is { } handler) + { + return handler(data); + } + Span hash = stackalloc byte[sizeof(ulong) * 2]; int bytesWritten = XxHash128.Hash(data.AsSpan(), hash); Debug.Assert(bytesWritten == hash.Length); diff --git a/src/Compilers/Core/Portable/Diagnostic/CommonMessageProvider.cs b/src/Compilers/Core/Portable/Diagnostic/CommonMessageProvider.cs index 03af6e1a38996..b662165fee496 100644 --- a/src/Compilers/Core/Portable/Diagnostic/CommonMessageProvider.cs +++ b/src/Compilers/Core/Portable/Diagnostic/CommonMessageProvider.cs @@ -244,6 +244,7 @@ public string GetIdForErrorCode(int errorCode) public abstract int ERR_EncUpdateFailedMissingSymbol { get; } public abstract int ERR_InvalidDebugInfo { get; } public abstract int ERR_FunctionPointerTypesInAttributeNotSupported { get; } + public abstract int ERR_DataSectionStringLiteralHashCollision { get; } // Generators: public abstract int WRN_GeneratorFailedDuringInitialization { get; } diff --git a/src/Compilers/Core/Portable/Emit/EmitOptions.cs b/src/Compilers/Core/Portable/Emit/EmitOptions.cs index 07e92a2f3371e..fefce27c268cc 100644 --- a/src/Compilers/Core/Portable/Emit/EmitOptions.cs +++ b/src/Compilers/Core/Portable/Emit/EmitOptions.cs @@ -120,6 +120,8 @@ public sealed class EmitOptions : IEquatable /// private bool _testOnly_AllowLocalStateTracing; + internal Func, string>? TestOnly_DataToHexViaXxHash128 { get; init; } + // 1.2 BACKCOMPAT OVERLOAD -- DO NOT TOUCH public EmitOptions( bool metadataOnly, diff --git a/src/Compilers/Test/Core/Mocks/TestMessageProvider.cs b/src/Compilers/Test/Core/Mocks/TestMessageProvider.cs index 008e2eb21e579..b3fa3ea6172e3 100644 --- a/src/Compilers/Test/Core/Mocks/TestMessageProvider.cs +++ b/src/Compilers/Test/Core/Mocks/TestMessageProvider.cs @@ -470,6 +470,8 @@ public override int ERR_InvalidDebugInfo public override int ERR_FunctionPointerTypesInAttributeNotSupported => throw new NotImplementedException(); + public override int ERR_DataSectionStringLiteralHashCollision => throw new NotImplementedException(); + public override int? WRN_ByValArraySizeConstRequired => throw new NotImplementedException(); } } diff --git a/src/Compilers/VisualBasic/Portable/Errors/ErrorFacts.vb b/src/Compilers/VisualBasic/Portable/Errors/ErrorFacts.vb index 0ceb29c88e20a..ca8fcd8f48918 100644 --- a/src/Compilers/VisualBasic/Portable/Errors/ErrorFacts.vb +++ b/src/Compilers/VisualBasic/Portable/Errors/ErrorFacts.vb @@ -21,6 +21,7 @@ Namespace Microsoft.CodeAnalysis.VisualBasic ERRID.ERR_CannotGotoNonScopeBlocksWithClosure, ERRID.ERR_SymbolDefinedInAssembly ' Update src\Features\VisualBasic\Portable\Diagnostics\LanguageServer\VisualBasicLspBuildOnlyDiagnostics.vb + ' and TestIsBuildOnlyDiagnostic in src\Compilers\VisualBasic\Test\Semantic\Diagnostics\DiagnosticTests.vb ' whenever new values are added here. Return True Case ERRID.Void, diff --git a/src/Compilers/VisualBasic/Portable/Errors/MessageProvider.vb b/src/Compilers/VisualBasic/Portable/Errors/MessageProvider.vb index c600c952fafeb..0e573eaa217bd 100644 --- a/src/Compilers/VisualBasic/Portable/Errors/MessageProvider.vb +++ b/src/Compilers/VisualBasic/Portable/Errors/MessageProvider.vb @@ -594,6 +594,12 @@ Namespace Microsoft.CodeAnalysis.VisualBasic End Get End Property + Public Overrides ReadOnly Property ERR_DataSectionStringLiteralHashCollision As Integer + Get + Throw ExceptionUtilities.Unreachable + End Get + End Property + ' Generators Public Overrides ReadOnly Property WRN_GeneratorFailedDuringInitialization As Integer Get diff --git a/src/Features/CSharp/Portable/Diagnostics/LanguageServer/CSharpLspBuildOnlyDiagnostics.cs b/src/Features/CSharp/Portable/Diagnostics/LanguageServer/CSharpLspBuildOnlyDiagnostics.cs index 6bd117bf73410..c19cb644ff215 100644 --- a/src/Features/CSharp/Portable/Diagnostics/LanguageServer/CSharpLspBuildOnlyDiagnostics.cs +++ b/src/Features/CSharp/Portable/Diagnostics/LanguageServer/CSharpLspBuildOnlyDiagnostics.cs @@ -60,7 +60,8 @@ namespace Microsoft.CodeAnalysis.CSharp.LanguageServer; "CS9207", // ErrorCode.ERR_InterceptableMethodMustBeOrdinary "CS8419", // ErrorCode.ERR_PossibleAsyncIteratorWithoutYield "CS8420", // ErrorCode.ERR_PossibleAsyncIteratorWithoutYieldOrAwait - "CS9217" // ErrorCode.ERR_RefLocalAcrossAwait + "CS9217", // ErrorCode.ERR_RefLocalAcrossAwait + "CS9274" // ErrorCode.ERR_DataSectionStringLiteralHashCollision )] [Shared] internal sealed class CSharpLspBuildOnlyDiagnostics : ILspBuildOnlyDiagnostics