diff --git a/AssemblyInspector.cs b/AssemblyInspector.cs new file mode 100644 index 0000000..b4e150b --- /dev/null +++ b/AssemblyInspector.cs @@ -0,0 +1,62 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Runtime.InteropServices; +using CommunityToolkit.Diagnostics; + +namespace protodec; + +public sealed class AssemblyInspector : IDisposable +{ + private const string DllPattern = "*.dll"; + + private readonly MetadataLoadContext _assemblyContext; + private readonly string[] _assemblyPaths; + private readonly Type _googleProtobufIMessage; + + public AssemblyInspector(string assemblyPath, bool includeRuntimeAssemblies) + { + if (File.Exists(assemblyPath)) + { + _assemblyPaths = new[] { assemblyPath }; + } + else if (Directory.Exists(assemblyPath)) + { + _assemblyPaths = Directory.EnumerateFiles(assemblyPath, DllPattern).ToArray(); + } + else + { + ThrowHelper.ThrowArgumentOutOfRangeException(assemblyPath); + } + + PathAssemblyResolver resolver = new(includeRuntimeAssemblies ? ConcatRuntimeAssemblyPaths(_assemblyPaths) : _assemblyPaths); + + _assemblyContext = new MetadataLoadContext(resolver); + _googleProtobufIMessage = _assemblyContext.LoadFromAssemblyName("Google.Protobuf") + .GetType("Google.Protobuf.IMessage")!; + } + + public IEnumerable GetProtobufMessageTypes() => + from assemblyPath + in _assemblyPaths + from type + in _assemblyContext.LoadFromAssemblyPath(assemblyPath).GetTypes() + where type.IsSealed + && type.Namespace != "Google.Protobuf.Reflection" + && type.Namespace != "Google.Protobuf.WellKnownTypes" + && type.IsAssignableTo(_googleProtobufIMessage) + select type; + + public void Dispose() => + _assemblyContext.Dispose(); + + private static IEnumerable ConcatRuntimeAssemblyPaths(IEnumerable paths) + { + string path = RuntimeEnvironment.GetRuntimeDirectory(); + + return Directory.EnumerateFiles(path, DllPattern) + .Concat(paths); + } +} \ No newline at end of file diff --git a/IWriteable.cs b/IWriteable.cs new file mode 100644 index 0000000..67dfc56 --- /dev/null +++ b/IWriteable.cs @@ -0,0 +1,12 @@ +using System.CodeDom.Compiler; + +namespace protodec; + +public interface IWritable +{ + string Name { get; } + + void WriteFileTo(IndentedTextWriter writer); + + void WriteTo(IndentedTextWriter writer); +} \ No newline at end of file diff --git a/Program.cs b/Program.cs index 1cb7716..85ab0bf 100644 --- a/Program.cs +++ b/Program.cs @@ -1 +1,54 @@ -System.Console.WriteLine("protodec!"); \ No newline at end of file +using System; +using System.CodeDom.Compiler; +using System.IO; +using System.Linq; +using protodec; + +const string indent = " "; +const string help = """ + Usage: protodec(.exe) [options] + Options: + --skip_enums Skip parsing enums and replace references to then with int32. + --include_runtime_assemblies Add the assemblies of the current runtime to the search path. + Arguments: + target_assembly_path Either a single assembly or a directory of assemblies to be parsed. + out_path An existing directory to output into individual files, otherwise output to a single file. + """; + +if (args.Length < 2) +{ + Console.WriteLine(help); + return; +} + +string assembly = args[0]; +string outPath = args[1]; +bool runtime = args.Contains("--include_runtime_assemblies"); +bool skipEnums = args.Contains("--skip_enums"); + +using AssemblyInspector inspector = new(assembly, runtime); +Protodec protodec = new(); + +foreach (Type message in inspector.GetProtobufMessageTypes()) +{ + protodec.ParseMessage(message, skipEnums); +} + +outPath = Path.GetFullPath(outPath); +if (Directory.Exists(outPath)) +{ + foreach (IWritable proto in protodec.Messages.Values.Concat(protodec.Enums.Values)) + { + using StreamWriter streamWriter = new(Path.Join(outPath, proto.Name + ".proto")); + using IndentedTextWriter indentWriter = new(streamWriter, indent); + + proto.WriteFileTo(indentWriter); + } +} +else +{ + using StreamWriter streamWriter = new(outPath); + using IndentedTextWriter indentWriter = new(streamWriter, indent); + + protodec.WriteAllTo(indentWriter); +} \ No newline at end of file diff --git a/ProtobufEnum.cs b/ProtobufEnum.cs new file mode 100644 index 0000000..6778f76 --- /dev/null +++ b/ProtobufEnum.cs @@ -0,0 +1,34 @@ +using System.CodeDom.Compiler; +using System.Collections.Generic; + +namespace protodec; + +public sealed record ProtobufEnum(string Name) : IWritable +{ + public readonly Dictionary Fields = new(); + + public void WriteFileTo(IndentedTextWriter writer) + { + Protodec.WritePreambleTo(writer); + WriteTo(writer); + } + + public void WriteTo(IndentedTextWriter writer) + { + writer.Write("enum "); + writer.Write(Name); + writer.WriteLine(" {"); + writer.Indent++; + + foreach ((int id, string name) in Fields) + { + writer.Write(name); + writer.Write(" = "); + writer.Write(id); + writer.WriteLine(';'); + } + + writer.Indent--; + writer.Write('}'); + } +} \ No newline at end of file diff --git a/ProtobufMessage.cs b/ProtobufMessage.cs new file mode 100644 index 0000000..533d938 --- /dev/null +++ b/ProtobufMessage.cs @@ -0,0 +1,87 @@ +using System.CodeDom.Compiler; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +namespace protodec; + +public sealed record ProtobufMessage(string Name) : IWritable +{ + public readonly HashSet Imports = new(); + public readonly Dictionary OneOfs = new(); + public readonly Dictionary Fields = new(); + public readonly Dictionary Nested = new(); + + public void WriteFileTo(IndentedTextWriter writer) + { + Protodec.WritePreambleTo(writer); + + if (Imports.Count > 0) + { + foreach (string import in Imports) + { + writer.Write("import \""); + writer.Write(import); + writer.WriteLine(".proto\";"); + } + + writer.WriteLine(); + } + + WriteTo(writer); + } + + public void WriteTo(IndentedTextWriter writer) + { + writer.Write("message "); + writer.Write(Name); + writer.WriteLine(" {"); + writer.Indent++; + + foreach ((string name, int[] fieldIds) in OneOfs) + { + // ReSharper disable once StringLiteralTypo + writer.Write("oneof "); + writer.Write(name); + writer.WriteLine(" {"); + writer.Indent++; + + foreach (int fieldId in fieldIds) + { + WriteField(writer, fieldId, Fields[fieldId]); + } + + writer.Indent--; + writer.WriteLine('}'); + } + + int[] oneOfs = OneOfs.SelectMany(oneOf => oneOf.Value).ToArray(); + + foreach ((int fieldId, (string, string) field) in Fields) + { + if (oneOfs.Contains(fieldId)) + continue; + + WriteField(writer, fieldId, field); + } + + foreach (IWritable nested in Nested.Values) + { + nested.WriteTo(writer); + writer.WriteLine(); + } + + writer.Indent--; + writer.Write('}'); + } + + private static void WriteField(TextWriter writer, int fieldId, (string Type, string Name) field) + { + writer.Write(field.Type); + writer.Write(' '); + writer.Write(field.Name); + writer.Write(" = "); + writer.Write(fieldId); + writer.WriteLine(';'); + } +} \ No newline at end of file diff --git a/Protodec.cs b/Protodec.cs new file mode 100644 index 0000000..4271526 --- /dev/null +++ b/Protodec.cs @@ -0,0 +1,182 @@ +using System; +using System.CodeDom.Compiler; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Runtime.CompilerServices; +using CommunityToolkit.Diagnostics; + +namespace protodec; + +public sealed class Protodec +{ + public readonly Dictionary Messages = new(); + public readonly Dictionary Enums = new(); + + private const BindingFlags PublicStatic = BindingFlags.Public | BindingFlags.Static; + + public void ParseMessage(Type messageClass, bool skipEnums = false) + { + Guard.IsTrue(messageClass.IsClass); + if (Messages.ContainsKey(messageClass.Name)) + return; + + ProtobufMessage message = new(messageClass.Name); + FieldInfo[] idFields = messageClass.GetFields(PublicStatic); + PropertyInfo[] properties = messageClass.GetProperties(BindingFlags.Public | BindingFlags.Instance); + + for (int i = 0; i < properties.Length; i++) + { + Type propertyType = properties[i].PropertyType; + + // only OneOf enums are defined nested directly in the message class + if (propertyType.IsEnum && propertyType.DeclaringType?.Name == messageClass.Name) + { + string oneOfName = TranslateMessageFieldName(properties[i].Name); + int[] oneOfProtoFieldIds = propertyType.GetFields(PublicStatic) + .Select(field => (int)field.GetRawConstantValue()!) + .Where(id => id > 0) + .ToArray(); + + message.OneOfs.Add(oneOfName, oneOfProtoFieldIds); + continue; + } + + FieldInfo idField = idFields[i]; + Guard.IsTrue(idField.IsLiteral); + Guard.IsEqualTo(idField.FieldType.Name, nameof(Int32)); + + int msgFieldId = (int)idField.GetRawConstantValue()!; + string msgFieldType = ParseType(propertyType, skipEnums, message); + string msgFieldName = TranslateMessageFieldName(properties[i].Name); + + message.Fields.Add(msgFieldId, (msgFieldType, msgFieldName)); + } + + Messages.Add(message.Name, message); + } + + private string ParseType(Type type, bool skipEnums, ProtobufMessage message) + { + switch (type.Name) + { + case "ByteString": + return "bytes"; + case nameof(String): + return "string"; + case nameof(Boolean): + return "bool"; + case nameof(Double): + return "double"; + case nameof(UInt32): + return "uint32"; + case nameof(UInt64): + return "uint64"; + case nameof(Int32): + return "int32"; + case nameof(Int64): + return "int64"; + case nameof(Single): + return "float"; + case "RepeatedField`1": + string typeName = ParseType(type.GenericTypeArguments[0], skipEnums, message); + return "repeated " + typeName; + case "MapField`2": + string t1 = ParseType(type.GenericTypeArguments[0], skipEnums, message); + string t2 = ParseType(type.GenericTypeArguments[1], skipEnums, message); + return $"map<{t1}, {t2}>"; + default: + { + if (type.IsEnum) + { + if (skipEnums) + return "int32"; + ParseEnum(type, message); + } + else + { + ParseMessage(type, skipEnums); + message.Imports.Add(type.Name); + } + + return type.Name; + } + } + } + + private void ParseEnum(Type enumEnum, ProtobufMessage message) + { + if ((enumEnum.IsNested && message.Nested.ContainsKey(enumEnum.Name)) + || Enums.ContainsKey(enumEnum.Name)) + return; + + ProtobufEnum protoEnum = new(enumEnum.Name); + foreach (FieldInfo field in enumEnum.GetFields(PublicStatic)) + { + int enumFieldId = (int)field.GetRawConstantValue()!; + string enumFieldName = field.GetCustomAttributesData() + .SingleOrDefault(attr => attr.AttributeType.Name == "OriginalNameAttribute") + ?.ConstructorArguments[0] + .Value + as string + ?? TranslateEnumFieldName(field.Name); + + protoEnum.Fields.Add(enumFieldId, enumFieldName); + } + + if (enumEnum.IsNested) + { + message.Nested.Add(protoEnum.Name, protoEnum); + } + else + { + message.Imports.Add(protoEnum.Name); + Enums.Add(protoEnum.Name, protoEnum); + } + } + + public void WriteAllTo(IndentedTextWriter writer) + { + WritePreambleTo(writer); + + foreach (IWritable proto in Messages.Values.Concat(Enums.Values)) + { + proto.WriteTo(writer); + writer.WriteLine(); + writer.WriteLine(); + } + } + + internal static void WritePreambleTo(TextWriter writer) + { + writer.WriteLine("// Decompiled with protodec"); + writer.WriteLine(); + writer.WriteLine("""syntax = "proto3";"""); + writer.WriteLine(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static string TranslateMessageFieldName(string name) => + name.IsBeebyted() ? name : name.ToSnakeCaseLower(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static string TranslateEnumFieldName(string name) => + name.IsBeebyted() ? name : name.ToSnakeCaseUpper(); + + private bool TryParseWriteToMethod(Type targetClass) + { + //MethodInfo method = targetClass.GetInterface("Google.Protobuf.IBufferMessage")?.GetMethod("InternalWriteTo", BindingFlags.Public | BindingFlags.Instance)!; + + byte[] cil = targetClass.GetMethod("WriteTo", BindingFlags.Public | BindingFlags.Instance)! + .GetMethodBody()! + .GetILAsByteArray()!; + + if (cil[0] == 0x2A) // ret + { + return false; + } + + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/README.md b/README.md index f82fa7c..caea989 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,28 @@ protodec -======== \ No newline at end of file +======== +A tool to decompile protobuf parser/serializer classes compiled by [protoc](https://github.com/protocolbuffers/protobuf), from dotnet assemblies back into .proto definitions. + +Usage +----- +``` +Usage: protodec(.exe) [options] +Options: + --skip_enums Skip parsing enums and replace references to then with int32. + --include_runtime_assemblies Add the assemblies of the current runtime to the search path. +Arguments: + target_assembly_path Either a single assembly or a directory of assemblies to be parsed. + out_path An existing directory to output into individual files, otherwise output to a single file. + ``` + +Limitations +----------- +- Integers are assumed to be (u)int32/64 as C# doesn't differentiate between them and sint32/64 and (s)fixed32/64. + This could be solved by parsing the writer methods, however this wouldn't work on hollow assemblies such as DummyDlls produced by Il2CppDumper +### Il2CppDumper +- The Name parameter of OriginalNameAttribute is not dumped. In this case the C# names are used after conforming them to protobuf conventions +- Dumped assemblies depend on strong-named core libs, however the ones dumped are not strong-named. + This interferes with loading and can be bypassed by loading the strong-named libs from your runtime by passing the `--include_runtime_assemblies` flag + +License +------- +This project is subject to the terms of the [Mozilla Public License, v. 2.0](./LICENSE). \ No newline at end of file diff --git a/StringExtensions.cs b/StringExtensions.cs new file mode 100644 index 0000000..c99f48f --- /dev/null +++ b/StringExtensions.cs @@ -0,0 +1,67 @@ +using System.Runtime.CompilerServices; + +namespace protodec; + +public static class StringExtensions +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int CountUpper(this string str, int i = 0) + { + int upper = 0; + + for (; i < str.Length; i++) + if (char.IsAsciiLetterUpper(str[i])) + upper++; + + return upper; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + // ReSharper disable once IdentifierTypo + public static bool IsBeebyted(this string name) => + name.Length == 11 && CountUpper(name) == 11; + + public static string ToSnakeCaseLower(this string str) => + string.Create(str.Length + CountUpper(str, 1), str, (newString, oldString) => + { + newString[0] = char.ToLowerInvariant(oldString[0]); + + char chr; + for (int i = 1, j = 1; i < oldString.Length; i++, j++) + { + chr = oldString[i]; + + if (char.IsAsciiLetterUpper(chr)) + { + newString[j++] = '_'; + newString[j] = char.ToLowerInvariant(chr); + } + else + { + newString[j] = chr; + } + } + }); + + public static string ToSnakeCaseUpper(this string str) => + string.Create(str.Length + CountUpper(str, 1), str, (newString, oldString) => + { + newString[0] = char.ToUpperInvariant(oldString[0]); + + char chr; + for (int i = 1, j = 1; i < oldString.Length; i++, j++) + { + chr = oldString[i]; + + if (char.IsAsciiLetterUpper(chr)) + { + newString[j++] = '_'; + newString[j] = chr; + } + else + { + newString[j] = char.ToUpperInvariant(chr); + } + } + }); +} \ No newline at end of file diff --git a/protodec.csproj b/protodec.csproj index 21dfc21..3c08cff 100644 --- a/protodec.csproj +++ b/protodec.csproj @@ -4,14 +4,14 @@ 11 enable Exe - true win-x64;linux-x64 net7.0 - + + \ No newline at end of file