From b1eed86d6c9c3e0d755ced5cb11902b6c9b7d8a8 Mon Sep 17 00:00:00 2001 From: Xpl0itR Date: Tue, 31 Oct 2023 02:30:03 +0000 Subject: [PATCH] Various improvements including but not limited to: - Move parsing logic to library project - Expanding coverage of the proto3 spec - Option to parse types from only a specified assembly - More accurate field name translation - Accept custom lookup functions for types and names - Some resilience to obfuscation - Fix infinite recursion bug causing stack overflow --- AssemblyInspector.cs | 62 ---- IWriteable.cs | 12 - LibProtodec/AssemblyInspector.cs | 43 +++ ProtobufEnum.cs => LibProtodec/Enum.cs | 19 +- .../Extensions.cs | 55 +++- LibProtodec/LibProtodec.csproj | 25 ++ ProtobufMessage.cs => LibProtodec/Message.cs | 49 ++-- LibProtodec/Protobuf.cs | 41 +++ LibProtodec/Protodec.cs | 272 ++++++++++++++++++ Program.cs | 54 ---- Protodec.cs | 183 ------------ README.md | 17 +- protodec.csproj | 17 -- protodec.sln | 8 +- protodec/Program.cs | 60 ++++ protodec/protodec.csproj | 15 + 16 files changed, 552 insertions(+), 380 deletions(-) delete mode 100644 AssemblyInspector.cs delete mode 100644 IWriteable.cs create mode 100644 LibProtodec/AssemblyInspector.cs rename ProtobufEnum.cs => LibProtodec/Enum.cs (50%) rename StringExtensions.cs => LibProtodec/Extensions.cs (62%) create mode 100644 LibProtodec/LibProtodec.csproj rename ProtobufMessage.cs => LibProtodec/Message.cs (69%) create mode 100644 LibProtodec/Protobuf.cs create mode 100644 LibProtodec/Protodec.cs delete mode 100644 Program.cs delete mode 100644 Protodec.cs delete mode 100644 protodec.csproj create mode 100644 protodec/Program.cs create mode 100644 protodec/protodec.csproj diff --git a/AssemblyInspector.cs b/AssemblyInspector.cs deleted file mode 100644 index b4e150b..0000000 --- a/AssemblyInspector.cs +++ /dev/null @@ -1,62 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Reflection; -using System.Runtime.InteropServices; -using CommunityToolkit.Diagnostics; - -namespace protodec; - -public sealed class AssemblyInspector : IDisposable -{ - private const string DllPattern = "*.dll"; - - private readonly MetadataLoadContext _assemblyContext; - private readonly string[] _assemblyPaths; - private readonly Type _googleProtobufIMessage; - - public AssemblyInspector(string assemblyPath, bool includeRuntimeAssemblies) - { - if (File.Exists(assemblyPath)) - { - _assemblyPaths = new[] { assemblyPath }; - } - else if (Directory.Exists(assemblyPath)) - { - _assemblyPaths = Directory.EnumerateFiles(assemblyPath, DllPattern).ToArray(); - } - else - { - ThrowHelper.ThrowArgumentOutOfRangeException(assemblyPath); - } - - PathAssemblyResolver resolver = new(includeRuntimeAssemblies ? ConcatRuntimeAssemblyPaths(_assemblyPaths) : _assemblyPaths); - - _assemblyContext = new MetadataLoadContext(resolver); - _googleProtobufIMessage = _assemblyContext.LoadFromAssemblyName("Google.Protobuf") - .GetType("Google.Protobuf.IMessage")!; - } - - public IEnumerable GetProtobufMessageTypes() => - from assemblyPath - in _assemblyPaths - from type - in _assemblyContext.LoadFromAssemblyPath(assemblyPath).GetTypes() - where type.IsSealed - && type.Namespace != "Google.Protobuf.Reflection" - && type.Namespace != "Google.Protobuf.WellKnownTypes" - && type.IsAssignableTo(_googleProtobufIMessage) - select type; - - public void Dispose() => - _assemblyContext.Dispose(); - - private static IEnumerable ConcatRuntimeAssemblyPaths(IEnumerable paths) - { - string path = RuntimeEnvironment.GetRuntimeDirectory(); - - return Directory.EnumerateFiles(path, DllPattern) - .Concat(paths); - } -} \ No newline at end of file diff --git a/IWriteable.cs b/IWriteable.cs deleted file mode 100644 index 67dfc56..0000000 --- a/IWriteable.cs +++ /dev/null @@ -1,12 +0,0 @@ -using System.CodeDom.Compiler; - -namespace protodec; - -public interface IWritable -{ - string Name { get; } - - void WriteFileTo(IndentedTextWriter writer); - - void WriteTo(IndentedTextWriter writer); -} \ No newline at end of file diff --git a/LibProtodec/AssemblyInspector.cs b/LibProtodec/AssemblyInspector.cs new file mode 100644 index 0000000..56c4530 --- /dev/null +++ b/LibProtodec/AssemblyInspector.cs @@ -0,0 +1,43 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; + +namespace LibProtodec; + +public sealed class AssemblyInspector : IDisposable +{ + public readonly MetadataLoadContext AssemblyContext; + public readonly IReadOnlyList LoadedTypes; + + public AssemblyInspector(string assemblyDir, string? assemblyName = null) + { + string[] assemblyPaths = Directory.EnumerateFiles(assemblyDir, searchPattern: "*.dll") + .ToArray(); + + AssemblyContext = new MetadataLoadContext( + new PathAssemblyResolver(assemblyPaths)); + + LoadedTypes = assemblyName is null + ? assemblyPaths.SelectMany(path => AssemblyContext.LoadFromAssemblyPath(path).GetTypes()).ToList() + : AssemblyContext.LoadFromAssemblyName(assemblyName).GetTypes(); + } + + public IEnumerable GetProtobufMessageTypes() + { + Type googleProtobufIMessage = AssemblyContext.LoadFromAssemblyName("Google.Protobuf") + .GetType("Google.Protobuf.IMessage")!; + return from type + in LoadedTypes + where !type.IsNested + && type.IsSealed + && type.Namespace != "Google.Protobuf.Reflection" + && type.Namespace != "Google.Protobuf.WellKnownTypes" + && type.IsAssignableTo(googleProtobufIMessage) + select type; + } + + public void Dispose() => + AssemblyContext.Dispose(); +} \ No newline at end of file diff --git a/ProtobufEnum.cs b/LibProtodec/Enum.cs similarity index 50% rename from ProtobufEnum.cs rename to LibProtodec/Enum.cs index 6778f76..9f7d068 100644 --- a/ProtobufEnum.cs +++ b/LibProtodec/Enum.cs @@ -1,25 +1,30 @@ using System.CodeDom.Compiler; using System.Collections.Generic; -namespace protodec; +namespace LibProtodec; -public sealed record ProtobufEnum(string Name) : IWritable +public sealed class Enum : Protobuf { - public readonly Dictionary Fields = new(); + public readonly List> Fields = new(); - public void WriteFileTo(IndentedTextWriter writer) + public override void WriteFileTo(IndentedTextWriter writer) { - Protodec.WritePreambleTo(writer); + this.WritePreambleTo(writer); WriteTo(writer); } - public void WriteTo(IndentedTextWriter writer) + public override void WriteTo(IndentedTextWriter writer) { writer.Write("enum "); - writer.Write(Name); + writer.Write(this.Name); writer.WriteLine(" {"); writer.Indent++; + if (Fields.ContainsDuplicateKey()) + { + writer.WriteLine("""option allow_alias = true;"""); + } + foreach ((int id, string name) in Fields) { writer.Write(name); diff --git a/StringExtensions.cs b/LibProtodec/Extensions.cs similarity index 62% rename from StringExtensions.cs rename to LibProtodec/Extensions.cs index c99f48f..0c89b44 100644 --- a/StringExtensions.cs +++ b/LibProtodec/Extensions.cs @@ -1,25 +1,35 @@ -using System.Runtime.CompilerServices; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; -namespace protodec; +namespace LibProtodec; -public static class StringExtensions +public static class Extensions { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int CountUpper(this string str, int i = 0) + public static void Add(this ICollection> keyValuePairs, TKey key, TValue value) => + keyValuePairs.Add(new KeyValuePair(key, value)); + + public static bool ContainsDuplicateKey( + this IEnumerable> keyValuePairs, + IEqualityComparer? comparer = null) { - int upper = 0; + HashSet set = new(5, comparer); - for (; i < str.Length; i++) - if (char.IsAsciiLetterUpper(str[i])) - upper++; + foreach (KeyValuePair kvp in keyValuePairs) + { + if (!set.Add(kvp.Key)) + { + return true; + } + } - return upper; + return false; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - // ReSharper disable once IdentifierTypo - public static bool IsBeebyted(this string name) => - name.Length == 11 && CountUpper(name) == 11; + public static string TrimEnd(this string @string, string trimStr) => + @string.EndsWith(trimStr, StringComparison.Ordinal) + ? @string[..^trimStr.Length] + : @string; public static string ToSnakeCaseLower(this string str) => string.Create(str.Length + CountUpper(str, 1), str, (newString, oldString) => @@ -64,4 +74,21 @@ public static class StringExtensions } } }); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + // ReSharper disable once IdentifierTypo + public static bool IsBeebyted(this string name) => + name.Length == 11 && CountUpper(name) == 11; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int CountUpper(this string str, int i = 0) + { + int upper = 0; + + for (; i < str.Length; i++) + if (char.IsAsciiLetterUpper(str[i])) + upper++; + + return upper; + } } \ No newline at end of file diff --git a/LibProtodec/LibProtodec.csproj b/LibProtodec/LibProtodec.csproj new file mode 100644 index 0000000..4e5a51a --- /dev/null +++ b/LibProtodec/LibProtodec.csproj @@ -0,0 +1,25 @@ + + + + Xpl0itR + Copyright © 2023 Xpl0itR + A library to decompile protobuf parser/serializer classes compiled by protoc, from dotnet assemblies back into .proto definitions + true + true + 11 + enable + Library + MPL-2.0 + true + true + snupkg + net7.0 + + + + + + + + + \ No newline at end of file diff --git a/ProtobufMessage.cs b/LibProtodec/Message.cs similarity index 69% rename from ProtobufMessage.cs rename to LibProtodec/Message.cs index 533d938..16bab7a 100644 --- a/ProtobufMessage.cs +++ b/LibProtodec/Message.cs @@ -3,18 +3,18 @@ using System.Collections.Generic; using System.IO; using System.Linq; -namespace protodec; +namespace LibProtodec; -public sealed record ProtobufMessage(string Name) : IWritable +public sealed class Message : Protobuf { - public readonly HashSet Imports = new(); - public readonly Dictionary OneOfs = new(); - public readonly Dictionary Fields = new(); - public readonly Dictionary Nested = new(); + public readonly HashSet Imports = new(); + public readonly Dictionary OneOfs = new(); + public readonly Dictionary Fields = new(); + public readonly Dictionary Nested = new(); - public void WriteFileTo(IndentedTextWriter writer) + public override void WriteFileTo(IndentedTextWriter writer) { - Protodec.WritePreambleTo(writer); + this.WritePreambleTo(writer); if (Imports.Count > 0) { @@ -31,13 +31,23 @@ public sealed record ProtobufMessage(string Name) : IWritable WriteTo(writer); } - public void WriteTo(IndentedTextWriter writer) + public override void WriteTo(IndentedTextWriter writer) { writer.Write("message "); - writer.Write(Name); + writer.Write(this.Name); writer.WriteLine(" {"); writer.Indent++; + int[] oneOfs = OneOfs.SelectMany(oneOf => oneOf.Value).ToArray(); + + foreach ((int fieldId, (bool, string, string) field) in Fields) + { + if (oneOfs.Contains(fieldId)) + continue; + + WriteField(writer, fieldId, field); + } + foreach ((string name, int[] fieldIds) in OneOfs) { // ReSharper disable once StringLiteralTypo @@ -55,17 +65,7 @@ public sealed record ProtobufMessage(string Name) : IWritable writer.WriteLine('}'); } - int[] oneOfs = OneOfs.SelectMany(oneOf => oneOf.Value).ToArray(); - - foreach ((int fieldId, (string, string) field) in Fields) - { - if (oneOfs.Contains(fieldId)) - continue; - - WriteField(writer, fieldId, field); - } - - foreach (IWritable nested in Nested.Values) + foreach (Protobuf nested in Nested.Values) { nested.WriteTo(writer); writer.WriteLine(); @@ -75,8 +75,13 @@ public sealed record ProtobufMessage(string Name) : IWritable writer.Write('}'); } - private static void WriteField(TextWriter writer, int fieldId, (string Type, string Name) field) + private static void WriteField(TextWriter writer, int fieldId, (bool IsOptional, string Type, string Name) field) { + if (field.IsOptional) + { + writer.Write("optional "); + } + writer.Write(field.Type); writer.Write(' '); writer.Write(field.Name); diff --git a/LibProtodec/Protobuf.cs b/LibProtodec/Protobuf.cs new file mode 100644 index 0000000..364fe14 --- /dev/null +++ b/LibProtodec/Protobuf.cs @@ -0,0 +1,41 @@ +using System.CodeDom.Compiler; +using System.IO; + +namespace LibProtodec; + +public abstract class Protobuf +{ + public required string Name { get; init; } + + public string? AssemblyName { get; init; } + public string? Namespace { get; init; } + + public abstract void WriteFileTo(IndentedTextWriter writer); + + public abstract void WriteTo(IndentedTextWriter writer); + + protected void WritePreambleTo(TextWriter writer) => + WritePreambleTo(writer, AssemblyName, Namespace); + + // ReSharper disable once MethodOverloadWithOptionalParameter + public static void WritePreambleTo(TextWriter writer, string? assemblyName = null, string? @namespace = null) + { + writer.WriteLine("// Decompiled with protodec"); + + if (assemblyName is not null) + { + writer.Write("// Assembly: "); + writer.WriteLine(assemblyName); + } + + writer.WriteLine(); + writer.WriteLine("""syntax = "proto3";"""); + writer.WriteLine(); + + if (@namespace is not null) + { + writer.WriteLine($"""option csharp_namespace = "{@namespace}";"""); + writer.WriteLine(); + } + } +} \ No newline at end of file diff --git a/LibProtodec/Protodec.cs b/LibProtodec/Protodec.cs new file mode 100644 index 0000000..d1a95cf --- /dev/null +++ b/LibProtodec/Protodec.cs @@ -0,0 +1,272 @@ +using System; +using System.CodeDom.Compiler; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Linq; +using System.Reflection; +using CommunityToolkit.Diagnostics; + +namespace LibProtodec; + +public sealed class Protodec +{ + public delegate bool LookupFunc(string key, [MaybeNullWhen(false)] out string value); + + private readonly Dictionary _protobufs; + private readonly HashSet _currentDescent; + + public Protodec() + { + _protobufs = new Dictionary(); + _currentDescent = new HashSet(); + } + + public LookupFunc? CustomTypeLookup { get; init; } + + public LookupFunc? CustomNameLookup { get; init; } + + public IReadOnlyDictionary Protobufs => + _protobufs; + + public void WriteAllTo(IndentedTextWriter writer) + { + Protobuf.WritePreambleTo(writer); + + foreach (Protobuf proto in _protobufs.Values) + { + proto.WriteTo(writer); + writer.WriteLine(); + writer.WriteLine(); + } + } + + public void ParseMessage(Type type, bool skipEnums = false) + { + Guard.IsTrue(type.IsClass); + + ParseMessageInternal(type, skipEnums, null); + _currentDescent.Clear(); + } + + public void ParseEnum(Type type) + { + Guard.IsTrue(type.IsEnum); + + ParseEnumInternal(type, null); + _currentDescent.Clear(); + } + + private bool IsParsed(Type type, Message? parentMessage, out Dictionary protobufs) + { + protobufs = parentMessage is not null && type.IsNested + ? parentMessage.Nested + : _protobufs; + + return protobufs.ContainsKey(type.Name) + || !_currentDescent.Add(type.Name); + } + + private void ParseMessageInternal(Type messageClass, bool skipEnums, Message? parentMessage) + { + if (IsParsed(messageClass, parentMessage, out Dictionary protobufs)) + { + return; + } + + Message message = new() + { + Name = TranslateProtobufName(messageClass.Name), + AssemblyName = messageClass.Assembly.FullName, + Namespace = messageClass.Namespace + }; + + FieldInfo[] idFields = messageClass.GetFields(BindingFlags.Public | BindingFlags.Static); + PropertyInfo[] properties = messageClass.GetProperties(BindingFlags.Public | BindingFlags.Instance | BindingFlags.DeclaredOnly); + + for (int pi = 0, fi = 0; pi < properties.Length; pi++, fi++) + { + PropertyInfo property = properties[pi]; + + if (property.GetMethod is null || property.GetMethod.IsVirtual) + { + fi--; + continue; + } + + Type propertyType = property.PropertyType; + + // only OneOf enums are defined nested directly in the message class + if (propertyType.IsEnum && propertyType.DeclaringType?.Name == message.Name) + { + string oneOfName = TranslateOneOfName(property.Name); + int[] oneOfProtoFieldIds = propertyType.GetFields(BindingFlags.Public | BindingFlags.Static) + .Select(field => (int)field.GetRawConstantValue()!) + .Where(id => id > 0) + .ToArray(); + + message.OneOfs.Add(oneOfName, oneOfProtoFieldIds); + + fi--; + continue; + } + + FieldInfo idField = idFields[fi]; + Guard.IsTrue(idField.IsLiteral); + Guard.IsEqualTo(idField.FieldType.Name, nameof(Int32)); + + int msgFieldId = (int)idField.GetRawConstantValue()!; + bool msgFieldIsOptional = false; + string msgFieldType = ParseFieldType(propertyType, skipEnums, message); + string msgFieldName = TranslateMessageFieldName(property.Name); + + // optional protobuf fields will generate an additional "Has" get-only boolean property immediately after the real property + if (properties.Length > pi + 1 && properties[pi + 1].PropertyType.Name == nameof(Boolean) && !properties[pi + 1].CanWrite) + { + msgFieldIsOptional = true; + pi++; + } + + message.Fields.Add(msgFieldId, (msgFieldIsOptional, msgFieldType, msgFieldName)); + } + + protobufs.Add(message.Name, message); + } + + private void ParseEnumInternal(Type enumEnum, Message? parentMessage) + { + if (IsParsed(enumEnum, parentMessage, out Dictionary protobufs)) + { + return; + } + + Enum @enum = new() + { + Name = TranslateProtobufName(enumEnum.Name), + AssemblyName = enumEnum.Assembly.FullName, + Namespace = enumEnum.Namespace + }; + + foreach (FieldInfo field in enumEnum.GetFields(BindingFlags.Public | BindingFlags.Static)) + { + int enumFieldId = (int)field.GetRawConstantValue()!; + string enumFieldName = TranslateEnumFieldName(field, @enum.Name); + + @enum.Fields.Add(enumFieldId, enumFieldName); + } + + protobufs.Add(@enum.Name, @enum); + } + + private string ParseFieldType(Type type, bool skipEnums, Message message) + { + switch (type.Name) + { + case "ByteString": + return "bytes"; + case nameof(String): + return "string"; + case nameof(Boolean): + return "bool"; + case nameof(Double): + return "double"; + case nameof(UInt32): + return "uint32"; + case nameof(UInt64): + return "uint64"; + case nameof(Int32): + return "int32"; + case nameof(Int64): + return "int64"; + case nameof(Single): + return "float"; + } + + switch (type.GenericTypeArguments.Length) + { + case 1: + string t = ParseFieldType(type.GenericTypeArguments[0], skipEnums, message); + return "repeated " + t; + case 2: + string t1 = ParseFieldType(type.GenericTypeArguments[0], skipEnums, message); + string t2 = ParseFieldType(type.GenericTypeArguments[1], skipEnums, message); + return $"map<{t1}, {t2}>"; + } + + if (CustomTypeLookup?.Invoke(type.Name, out string? fieldType) == true) + { + return fieldType; + } + + if (type.IsEnum) + { + if (skipEnums) + { + return "int32"; + } + + ParseEnumInternal(type, message); + } + else + { + ParseMessageInternal(type, skipEnums, message); + } + + if (!type.IsNested) + { + message.Imports.Add(type.Name); + } + + return type.Name; + } + + private string TranslateProtobufName(string name) => + CustomNameLookup?.Invoke(name, out string? translatedName) == true + ? translatedName + : name; + + private string TranslateOneOfName(string oneOfEnumName) => + TranslateName(oneOfEnumName, out string translatedName) + ? translatedName.TrimEnd("Case") + : oneOfEnumName.TrimEnd("Case") + .ToSnakeCaseLower(); + + private string TranslateMessageFieldName(string fieldName) => + TranslateName(fieldName, out string translatedName) + ? translatedName + : fieldName.ToSnakeCaseLower(); + + private string TranslateEnumFieldName(FieldInfo field, string enumName) + { + if (field.GetCustomAttributesData() + .SingleOrDefault(attr => attr.AttributeType.Name == "OriginalNameAttribute") + ?.ConstructorArguments[0] + .Value + is string originalName) + { + return originalName; + } + + if (TranslateName(field.Name, out string translatedName)) + { + return translatedName; + } + + if (!enumName.IsBeebyted()) + { + enumName = enumName.ToSnakeCaseUpper(); + } + + return enumName + '_' + field.Name.ToSnakeCaseUpper(); + } + + private bool TranslateName(string name, out string translatedName) + { + if (CustomNameLookup?.Invoke(name, out translatedName!) == true) + { + return true; + } + + translatedName = name; + return name.IsBeebyted(); + } +} \ No newline at end of file diff --git a/Program.cs b/Program.cs deleted file mode 100644 index c713806..0000000 --- a/Program.cs +++ /dev/null @@ -1,54 +0,0 @@ -using System; -using System.CodeDom.Compiler; -using System.IO; -using System.Linq; -using protodec; - -const string indent = " "; -const string help = """ - Usage: protodec(.exe) [options] - Arguments: - target_assembly_path Either a single assembly or a directory of assemblies to be parsed. - out_path An existing directory to output into individual files, otherwise output to a single file. - Options: - --skip_enums Skip parsing enums and replace references to then with int32. - --include_runtime_assemblies Add the assemblies of the current runtime to the search path. - """; - -if (args.Length < 2) -{ - Console.WriteLine(help); - return; -} - -string assembly = args[0]; -string outPath = args[1]; -bool runtime = args.Contains("--include_runtime_assemblies"); -bool skipEnums = args.Contains("--skip_enums"); - -using AssemblyInspector inspector = new(assembly, runtime); -Protodec protodec = new(); - -foreach (Type message in inspector.GetProtobufMessageTypes()) -{ - protodec.ParseMessage(message, skipEnums); -} - -outPath = Path.GetFullPath(outPath); -if (Directory.Exists(outPath)) -{ - foreach (IWritable proto in protodec.Messages.Values.Concat(protodec.Enums.Values)) - { - using StreamWriter streamWriter = new(Path.Join(outPath, proto.Name + ".proto")); - using IndentedTextWriter indentWriter = new(streamWriter, indent); - - proto.WriteFileTo(indentWriter); - } -} -else -{ - using StreamWriter streamWriter = new(outPath); - using IndentedTextWriter indentWriter = new(streamWriter, indent); - - protodec.WriteAllTo(indentWriter); -} \ No newline at end of file diff --git a/Protodec.cs b/Protodec.cs deleted file mode 100644 index 8843a03..0000000 --- a/Protodec.cs +++ /dev/null @@ -1,183 +0,0 @@ -using System; -using System.CodeDom.Compiler; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Reflection; -using System.Runtime.CompilerServices; -using CommunityToolkit.Diagnostics; - -namespace protodec; - -public sealed class Protodec -{ - public readonly Dictionary Messages = new(); - public readonly Dictionary Enums = new(); - - private const BindingFlags PublicStatic = BindingFlags.Public | BindingFlags.Static; - - public void ParseMessage(Type messageClass, bool skipEnums = false) - { - Guard.IsTrue(messageClass.IsClass); - if (Messages.ContainsKey(messageClass.Name)) - return; - - ProtobufMessage message = new(messageClass.Name); - FieldInfo[] idFields = messageClass.GetFields(PublicStatic); - PropertyInfo[] properties = messageClass.GetProperties(BindingFlags.Public | BindingFlags.Instance); - - for (int i = 0; i < properties.Length; i++) - { - Type propertyType = properties[i].PropertyType; - - // only OneOf enums are defined nested directly in the message class - if (propertyType.IsEnum && propertyType.DeclaringType?.Name == messageClass.Name) - { - string oneOfName = TranslateMessageFieldName(properties[i].Name); - int[] oneOfProtoFieldIds = propertyType.GetFields(PublicStatic) - .Select(field => (int)field.GetRawConstantValue()!) - .Where(id => id > 0) - .ToArray(); - - message.OneOfs.Add(oneOfName, oneOfProtoFieldIds); - continue; - } - - FieldInfo idField = idFields[i]; - Guard.IsTrue(idField.IsLiteral); - Guard.IsEqualTo(idField.FieldType.Name, nameof(Int32)); - - int msgFieldId = (int)idField.GetRawConstantValue()!; - string msgFieldType = ParseType(propertyType, skipEnums, message); - string msgFieldName = TranslateMessageFieldName(properties[i].Name); - - message.Fields.Add(msgFieldId, (msgFieldType, msgFieldName)); - } - - Messages.Add(message.Name, message); - } - - private string ParseType(Type type, bool skipEnums, ProtobufMessage message) - { - switch (type.Name) - { - case "ByteString": - return "bytes"; - case nameof(String): - return "string"; - case nameof(Boolean): - return "bool"; - case nameof(Double): - return "double"; - case nameof(UInt32): - return "uint32"; - case nameof(UInt64): - return "uint64"; - case nameof(Int32): - return "int32"; - case nameof(Int64): - return "int64"; - case nameof(Single): - return "float"; - case "RepeatedField`1": - string typeName = ParseType(type.GenericTypeArguments[0], skipEnums, message); - return "repeated " + typeName; - case "MapField`2": - string t1 = ParseType(type.GenericTypeArguments[0], skipEnums, message); - string t2 = ParseType(type.GenericTypeArguments[1], skipEnums, message); - return $"map<{t1}, {t2}>"; - default: - { - if (type.IsEnum) - { - if (skipEnums) - return "int32"; - ParseEnum(type, message); - } - else - { - ParseMessage(type, skipEnums); - } - - message.Imports.Add(type.Name); - return type.Name; - } - } - } - - private void ParseEnum(Type enumEnum, ProtobufMessage message) - { - if ((enumEnum.IsNested && message.Nested.ContainsKey(enumEnum.Name)) - || Enums.ContainsKey(enumEnum.Name)) - return; - - ProtobufEnum protoEnum = new(enumEnum.Name); - foreach (FieldInfo field in enumEnum.GetFields(PublicStatic)) - { - int enumFieldId = (int)field.GetRawConstantValue()!; - string enumFieldName = field.GetCustomAttributesData() - .SingleOrDefault(attr => attr.AttributeType.Name == "OriginalNameAttribute") - ?.ConstructorArguments[0] - .Value - as string - ?? TranslateEnumFieldName(enumEnum.Name, field.Name); - - protoEnum.Fields.Add(enumFieldId, enumFieldName); - } - - if (enumEnum.IsNested) - { - message.Nested.Add(protoEnum.Name, protoEnum); - } - else - { - Enums.Add(protoEnum.Name, protoEnum); - } - } - - public void WriteAllTo(IndentedTextWriter writer) - { - WritePreambleTo(writer); - - foreach (IWritable proto in Messages.Values.Concat(Enums.Values)) - { - proto.WriteTo(writer); - writer.WriteLine(); - writer.WriteLine(); - } - } - - internal static void WritePreambleTo(TextWriter writer) - { - writer.WriteLine("// Decompiled with protodec"); - writer.WriteLine(); - writer.WriteLine("""syntax = "proto3";"""); - writer.WriteLine(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string TranslateMessageFieldName(string name) => - name.IsBeebyted() ? name : name.ToSnakeCaseLower(); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string TranslateEnumFieldName(string enumName, string fieldName) => - enumName.IsBeebyted() - ? enumName + '_' + fieldName.ToSnakeCaseUpper() - : (enumName + fieldName).ToSnakeCaseUpper(); - - private bool TryParseWriteToMethod(Type targetClass) - { - //MethodInfo method = targetClass.GetInterface("Google.Protobuf.IBufferMessage")?.GetMethod("InternalWriteTo", BindingFlags.Public | BindingFlags.Instance)!; - - byte[] cil = targetClass.GetMethod("WriteTo", BindingFlags.Public | BindingFlags.Instance)! - .GetMethodBody()! - .GetILAsByteArray()!; - - if (cil[0] == 0x2A) // ret - { - return false; - } - - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/README.md b/README.md index ada96fc..8971219 100644 --- a/README.md +++ b/README.md @@ -5,23 +5,24 @@ A tool to decompile protobuf parser/serializer classes compiled by [protoc](http Usage ----- ``` -Usage: protodec(.exe) [options] +Usage: protodec(.exe) [target_assembly_name] [options] Arguments: - target_assembly_path Either a single assembly or a directory of assemblies to be parsed. + target_assembly_dir A directory of assemblies to be loaded. out_path An existing directory to output into individual files, otherwise output to a single file. + target_assembly_name The name of an assembly to parse. If omitted, all assemblies in the target_assembly_dir will be parsed. Options: - --skip_enums Skip parsing enums and replace references to then with int32. - --include_runtime_assemblies Add the assemblies of the current runtime to the search path. + --skip_enums Skip parsing enums and replace references to them with int32. ``` Limitations ----------- - Integers are assumed to be (u)int32/64 as C# doesn't differentiate between them and sint32/64 and (s)fixed32/64. - This could be solved by parsing the writer methods, however this wouldn't work on hollow assemblies such as DummyDlls produced by Il2CppDumper -### Il2CppDumper -- The Name parameter of OriginalNameAttribute is not dumped. In this case the C# names are used after conforming them to protobuf conventions +### Decompiling from [Il2CppDumper](https://github.com/Perfare/Il2CppDumper) DummyDLLs +- The `Name` parameter of `OriginalNameAttribute` is not dumped. In this case the C# names are used after conforming them to protobuf conventions - Dumped assemblies depend on strong-named core libs, however the ones dumped are not strong-named. - This interferes with loading and can be bypassed by loading the strong-named libs from your runtime by passing the `--include_runtime_assemblies` flag + This interferes with loading and can be mitigated by copying the assemblies from your runtime into the target assembly directory. + +I recommend using [Cpp2IL](https://github.com/SamboyCoding/Cpp2IL) instead of Il2CppDumper. License ------- diff --git a/protodec.csproj b/protodec.csproj deleted file mode 100644 index 3c08cff..0000000 --- a/protodec.csproj +++ /dev/null @@ -1,17 +0,0 @@ - - - - 11 - enable - Exe - win-x64;linux-x64 - net7.0 - - - - - - - - - \ No newline at end of file diff --git a/protodec.sln b/protodec.sln index c6ac7b9..f7ffb1b 100644 --- a/protodec.sln +++ b/protodec.sln @@ -3,7 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.6.33513.286 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "protodec", "protodec.csproj", "{A5493BF4-F78C-4DCF-B449-D9A9A52FB5F0}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "protodec", "protodec\protodec.csproj", "{A5493BF4-F78C-4DCF-B449-D9A9A52FB5F0}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LibProtodec", "LibProtodec\LibProtodec.csproj", "{5F6DAD82-D9AD-4CE5-86E6-D20C9F059A4D}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -15,6 +17,10 @@ Global {A5493BF4-F78C-4DCF-B449-D9A9A52FB5F0}.Debug|Any CPU.Build.0 = Debug|Any CPU {A5493BF4-F78C-4DCF-B449-D9A9A52FB5F0}.Release|Any CPU.ActiveCfg = Release|Any CPU {A5493BF4-F78C-4DCF-B449-D9A9A52FB5F0}.Release|Any CPU.Build.0 = Release|Any CPU + {5F6DAD82-D9AD-4CE5-86E6-D20C9F059A4D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5F6DAD82-D9AD-4CE5-86E6-D20C9F059A4D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5F6DAD82-D9AD-4CE5-86E6-D20C9F059A4D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5F6DAD82-D9AD-4CE5-86E6-D20C9F059A4D}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/protodec/Program.cs b/protodec/Program.cs new file mode 100644 index 0000000..595c809 --- /dev/null +++ b/protodec/Program.cs @@ -0,0 +1,60 @@ +using System; +using System.CodeDom.Compiler; +using System.IO; +using System.Linq; +using LibProtodec; + +const string indent = " "; +const string help = """ + Usage: protodec(.exe) [target_assembly_name] [options] + Arguments: + target_assembly_dir A directory of assemblies to be loaded. + out_path An existing directory to output into individual files, otherwise output to a single file. + target_assembly_name The name of an assembly to parse. If omitted, all assemblies in the target_assembly_dir will be parsed. + Options: + --skip_enums Skip parsing enums and replace references to them with int32. + """; + +if (args.Length < 2) +{ + Console.WriteLine(help); + return; +} + +string? assemblyName = null; +if (args.Length > 2 && !args[2].StartsWith('-')) +{ + assemblyName = args[2]; +} + +string assemblyDir = args[0]; +string outPath = Path.GetFullPath(args[1]); +bool skipEnums = args.Contains("--skip_enums"); + +using AssemblyInspector inspector = new(assemblyDir, assemblyName); +Protodec protodec = new(); + +foreach (Type message in inspector.GetProtobufMessageTypes()) +{ + protodec.ParseMessage(message, skipEnums); +} + +if (Directory.Exists(outPath)) +{ + foreach (Protobuf proto in protodec.Protobufs.Values) + { + string protoPath = Path.Join(outPath, proto.Name + ".proto"); + + using StreamWriter streamWriter = new(protoPath); + using IndentedTextWriter indentWriter = new(streamWriter, indent); + + proto.WriteFileTo(indentWriter); + } +} +else +{ + using StreamWriter streamWriter = new(outPath); + using IndentedTextWriter indentWriter = new(streamWriter, indent); + + protodec.WriteAllTo(indentWriter); +} \ No newline at end of file diff --git a/protodec/protodec.csproj b/protodec/protodec.csproj new file mode 100644 index 0000000..8ae4569 --- /dev/null +++ b/protodec/protodec.csproj @@ -0,0 +1,15 @@ + + + + 11 + enable + Exe + win-x64;linux-x64 + net7.0 + + + + + + + \ No newline at end of file