Optimize RoboCaller::AddReceiver() for code size
Essentially, instead of having the inlined UntypedFunction::Create(f)
return an UntypedFunction which is then passed as an argument to
non-inlined RoboCallerReceivers::AddReceiverImpl(), we let
UntypedFunction::PrepareArgs(f) return a few different kinds of
trivial structs (depending on what sort of type f has) which are
passed as arguments to non-inlined RoboCallerReceivers::AddReceiver()
(which then converts them to UntypedFunction by calling
UntypedFunction::Create()). These structs are smaller than
UntypedFunction and optimized for argument passing, so many fewer
instructions are needed.
Example code:
struct Foo {
void Receive(int, float, int, float);
void TestAddLambdaReceiver();
webrtc::RoboCaller<int, float, int, float> rc;
};
void Foo::TestAddLambdaReceiver() {
rc.AddReceiver([this](int a, float b, int c, float d){
Receive(a, b, c, d);});
}
On arm32, we get before this CL:
Foo::TestAddLambdaReceiver():
push {r11, lr}
mov r11, sp
sub sp, sp, #24
ldr r1, .LCPI0_0
mov r2, #0
stm sp, {r0, r2}
add r1, pc, r1
str r2, [sp, #20]
str r1, [sp, #16]
mov r1, sp
bl RoboCallerReceivers::AddReceiverImpl
mov sp, r11
pop {r11, pc}
.LCPI0_0:
.long CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>
CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>:
ldr r0, [r0]
b Foo::Receive(int, float, int, float)
After this CL:
Foo::TestAddLambdaReceiver():
ldr r3, .LCPI0_0
mov r2, r0
add r3, pc, r3
b RoboCallerReceivers::AddReceiver<1u>
.LCPI0_0:
.long CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>
CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>:
ldr r0, [r0]
b Foo::Receive(int, float, int, float)
(Symbol names abbreviated so that they'll fit on one line.)
So a reduction from 64 to 28 bytes. The improvements on arm64 and
x86_64 are similar.
Bug: webrtc:11943
Change-Id: I93fbba083be0235051c3279d3e3f6852a4a9fdad
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/185960
Commit-Queue: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32244}
This commit is contained in:
@ -57,6 +57,7 @@ rtc_source_set("robo_caller") {
|
||||
":untyped_function",
|
||||
"../api:function_view",
|
||||
"system:assume",
|
||||
"system:inline",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@ -16,10 +16,6 @@ namespace robo_caller_impl {
|
||||
RoboCallerReceivers::RoboCallerReceivers() = default;
|
||||
RoboCallerReceivers::~RoboCallerReceivers() = default;
|
||||
|
||||
void RoboCallerReceivers::AddReceiverImpl(UntypedFunction* f) {
|
||||
receivers_.push_back(std::move(*f));
|
||||
}
|
||||
|
||||
void RoboCallerReceivers::Foreach(
|
||||
rtc::FunctionView<void(UntypedFunction&)> fv) {
|
||||
for (auto& r : receivers_) {
|
||||
@ -27,5 +23,18 @@ void RoboCallerReceivers::Foreach(
|
||||
}
|
||||
}
|
||||
|
||||
template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::TrivialUntypedFunctionArgs<1>);
|
||||
template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::TrivialUntypedFunctionArgs<2>);
|
||||
template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::TrivialUntypedFunctionArgs<3>);
|
||||
template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::TrivialUntypedFunctionArgs<4>);
|
||||
template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::NontrivialUntypedFunctionArgs);
|
||||
template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::FunctionPointerUntypedFunctionArgs);
|
||||
|
||||
} // namespace robo_caller_impl
|
||||
} // namespace webrtc
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include "api/function_view.h"
|
||||
#include "rtc_base/system/assume.h"
|
||||
#include "rtc_base/system/inline.h"
|
||||
#include "rtc_base/untyped_function.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -30,20 +31,30 @@ class RoboCallerReceivers {
|
||||
RoboCallerReceivers& operator=(RoboCallerReceivers&&) = delete;
|
||||
~RoboCallerReceivers();
|
||||
|
||||
void AddReceiver(UntypedFunction&& f) {
|
||||
AddReceiverImpl(&f);
|
||||
// Assume that f was moved from and is now trivially destructible.
|
||||
// This helps the compiler optimize away the destructor call.
|
||||
RTC_ASSUME(f.IsTriviallyDestructible());
|
||||
template <typename UntypedFunctionArgsT>
|
||||
RTC_NO_INLINE void AddReceiver(UntypedFunctionArgsT args) {
|
||||
receivers_.push_back(UntypedFunction::Create(args));
|
||||
}
|
||||
|
||||
void Foreach(rtc::FunctionView<void(UntypedFunction&)> fv);
|
||||
|
||||
private:
|
||||
void AddReceiverImpl(UntypedFunction* f);
|
||||
std::vector<UntypedFunction> receivers_;
|
||||
};
|
||||
|
||||
extern template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::TrivialUntypedFunctionArgs<1>);
|
||||
extern template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::TrivialUntypedFunctionArgs<2>);
|
||||
extern template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::TrivialUntypedFunctionArgs<3>);
|
||||
extern template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::TrivialUntypedFunctionArgs<4>);
|
||||
extern template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::NontrivialUntypedFunctionArgs);
|
||||
extern template void RoboCallerReceivers::AddReceiver(
|
||||
UntypedFunction::FunctionPointerUntypedFunctionArgs);
|
||||
|
||||
} // namespace robo_caller_impl
|
||||
|
||||
// A collection of receivers (callable objects) that can be called all at once.
|
||||
@ -71,7 +82,7 @@ class RoboCaller {
|
||||
template <typename F>
|
||||
void AddReceiver(F&& f) {
|
||||
receivers_.AddReceiver(
|
||||
UntypedFunction::Create<void(ArgT...)>(std::forward<F>(f)));
|
||||
UntypedFunction::PrepareArgs<void(ArgT...)>(std::forward<F>(f)));
|
||||
}
|
||||
|
||||
// Calls all receivers with the given arguments.
|
||||
|
||||
@ -11,6 +11,8 @@
|
||||
#ifndef RTC_BASE_UNTYPED_FUNCTION_H_
|
||||
#define RTC_BASE_UNTYPED_FUNCTION_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
@ -25,9 +27,17 @@ using FunVoid = void();
|
||||
union VoidUnion {
|
||||
void* void_ptr;
|
||||
FunVoid* fun_ptr;
|
||||
typename std::aligned_storage<16>::type inline_storage;
|
||||
typename std::aligned_storage<4 * sizeof(uintptr_t)>::type inline_storage;
|
||||
};
|
||||
|
||||
// Returns the number of elements of the `inline_storage` array required to
|
||||
// store an object of type T.
|
||||
template <typename T>
|
||||
constexpr size_t InlineStorageSize() {
|
||||
// sizeof(T) / sizeof(uintptr_t), but rounded up.
|
||||
return (sizeof(T) + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct CallHelpers;
|
||||
template <typename RetT, typename... ArgT>
|
||||
@ -64,76 +74,155 @@ struct CallHelpers<RetT(ArgT...)> {
|
||||
// size.
|
||||
class UntypedFunction final {
|
||||
public:
|
||||
// Create function for lambdas and other callables; it accepts every type of
|
||||
// argument except those noted in its enable_if call.
|
||||
// The *UntypedFunctionArgs structs are used to transfer arguments from
|
||||
// PrepareArgs() to Create(). They are trivial, but may own heap allocations,
|
||||
// so make sure to pass them to Create() exactly once!
|
||||
//
|
||||
// The point of doing Create(PrepareArgs(foo)) instead of just Create(foo) is
|
||||
// to separate the code that has to be inlined (PrepareArgs) from the code
|
||||
// that can be noninlined (Create); the *UntypedFunctionArgs types are
|
||||
// designed to efficiently carry the required information from one to the
|
||||
// other.
|
||||
template <size_t N>
|
||||
struct TrivialUntypedFunctionArgs {
|
||||
// We use an uintptr_t array here instead of std::aligned_storage, because
|
||||
// the former can be efficiently passed in registers when using
|
||||
// TrivialUntypedFunctionArgs as a function argument. (We can't do the same
|
||||
// in VoidUnion, because std::aligned_storage but not uintptr_t can be
|
||||
// legally reinterpret_casted to arbitrary types.
|
||||
// TrivialUntypedFunctionArgs, on the other hand, only needs to handle
|
||||
// placement new and memcpy.)
|
||||
alignas(std::max_align_t) uintptr_t inline_storage[N];
|
||||
webrtc_function_impl::FunVoid* call;
|
||||
};
|
||||
struct NontrivialUntypedFunctionArgs {
|
||||
void* void_ptr;
|
||||
webrtc_function_impl::FunVoid* call;
|
||||
void (*del)(webrtc_function_impl::VoidUnion*);
|
||||
};
|
||||
struct FunctionPointerUntypedFunctionArgs {
|
||||
webrtc_function_impl::FunVoid* fun_ptr;
|
||||
webrtc_function_impl::FunVoid* call;
|
||||
};
|
||||
|
||||
// Create function for lambdas and other callables that are trivial and small;
|
||||
// it accepts every type of argument except those noted in its enable_if call.
|
||||
template <
|
||||
typename Signature,
|
||||
typename F,
|
||||
typename F_deref = typename std::remove_reference<F>::type,
|
||||
typename std::enable_if<
|
||||
// Not for function pointers; we have another overload for that below.
|
||||
!std::is_function<typename std::remove_pointer<
|
||||
typename std::remove_reference<F>::type>::type>::value &&
|
||||
!std::is_function<
|
||||
typename std::remove_pointer<F_deref>::type>::value &&
|
||||
|
||||
// Not for nullptr; we have another overload for that below.
|
||||
// Not for nullptr; we have a constructor for that below.
|
||||
!std::is_same<std::nullptr_t,
|
||||
typename std::remove_cv<F>::type>::value &&
|
||||
|
||||
// Not for UntypedFunction objects; we have another overload for that.
|
||||
// Not for UntypedFunction objects; use move construction or
|
||||
// assignment.
|
||||
!std::is_same<UntypedFunction,
|
||||
typename std::remove_cv<typename std::remove_reference<
|
||||
F>::type>::type>::value>::type* = nullptr>
|
||||
static UntypedFunction Create(F&& f) {
|
||||
using F_deref = typename std::remove_reference<F>::type;
|
||||
// TODO(C++17): Use `constexpr if` here. The compiler appears to do the
|
||||
// right thing anyway w.r.t. resolving the branch statically and
|
||||
// eliminating dead code, but it would be good for readability.
|
||||
if (std::is_trivially_move_constructible<F_deref>::value &&
|
||||
std::is_trivially_destructible<F_deref>::value &&
|
||||
sizeof(F_deref) <=
|
||||
sizeof(webrtc_function_impl::VoidUnion::inline_storage)) {
|
||||
// The callable is trivial and small enough, so we just store its bytes
|
||||
// in the inline storage.
|
||||
webrtc_function_impl::VoidUnion vu;
|
||||
new (&vu.inline_storage) F_deref(std::forward<F>(f));
|
||||
return UntypedFunction(
|
||||
vu,
|
||||
reinterpret_cast<webrtc_function_impl::FunVoid*>(
|
||||
webrtc_function_impl::CallHelpers<
|
||||
Signature>::template CallInlineStorage<F_deref>),
|
||||
nullptr);
|
||||
} else {
|
||||
// The callable is either nontrivial or too large, so we can't keep it
|
||||
// in the inline storage; use the heap instead.
|
||||
webrtc_function_impl::VoidUnion vu;
|
||||
vu.void_ptr = new F_deref(std::forward<F>(f));
|
||||
return UntypedFunction(
|
||||
vu,
|
||||
reinterpret_cast<webrtc_function_impl::FunVoid*>(
|
||||
webrtc_function_impl::CallHelpers<
|
||||
Signature>::template CallVoidPtr<F_deref>),
|
||||
static_cast<void (*)(webrtc_function_impl::VoidUnion*)>(
|
||||
[](webrtc_function_impl::VoidUnion* vu) {
|
||||
// Assuming that this pointer isn't null allows the
|
||||
// compiler to eliminate a null check in the (inlined)
|
||||
// delete operation.
|
||||
RTC_ASSUME(vu->void_ptr != nullptr);
|
||||
delete reinterpret_cast<F_deref*>(vu->void_ptr);
|
||||
}));
|
||||
}
|
||||
typename std::remove_cv<F_deref>::type>::value &&
|
||||
|
||||
// Only for trivial callables that will fit in
|
||||
// VoidUnion::inline_storage.
|
||||
std::is_trivially_move_constructible<F_deref>::value &&
|
||||
std::is_trivially_destructible<F_deref>::value &&
|
||||
sizeof(F_deref) <=
|
||||
sizeof(webrtc_function_impl::VoidUnion::inline_storage)>::type* =
|
||||
nullptr,
|
||||
size_t InlineSize = webrtc_function_impl::InlineStorageSize<F_deref>()>
|
||||
static TrivialUntypedFunctionArgs<InlineSize> PrepareArgs(F&& f) {
|
||||
// The callable is trivial and small enough, so we just store its bytes
|
||||
// in the inline storage.
|
||||
TrivialUntypedFunctionArgs<InlineSize> args;
|
||||
new (&args.inline_storage) F_deref(std::forward<F>(f));
|
||||
args.call = reinterpret_cast<webrtc_function_impl::FunVoid*>(
|
||||
webrtc_function_impl::CallHelpers<
|
||||
Signature>::template CallInlineStorage<F_deref>);
|
||||
return args;
|
||||
}
|
||||
template <size_t InlineSize>
|
||||
static UntypedFunction Create(TrivialUntypedFunctionArgs<InlineSize> args) {
|
||||
webrtc_function_impl::VoidUnion vu;
|
||||
std::memcpy(&vu.inline_storage, args.inline_storage,
|
||||
sizeof(args.inline_storage));
|
||||
return UntypedFunction(vu, args.call, nullptr);
|
||||
}
|
||||
|
||||
// Create function for lambdas and other callables that are nontrivial or
|
||||
// large; it accepts every type of argument except those noted in its
|
||||
// enable_if call.
|
||||
template <
|
||||
typename Signature,
|
||||
typename F,
|
||||
typename F_deref = typename std::remove_reference<F>::type,
|
||||
typename std::enable_if<
|
||||
// Not for function pointers; we have another overload for that below.
|
||||
!std::is_function<
|
||||
typename std::remove_pointer<F_deref>::type>::value &&
|
||||
|
||||
// Not for nullptr; we have a constructor for that below.
|
||||
!std::is_same<std::nullptr_t,
|
||||
typename std::remove_cv<F>::type>::value &&
|
||||
|
||||
// Not for UntypedFunction objects; use move construction or
|
||||
// assignment.
|
||||
!std::is_same<UntypedFunction,
|
||||
typename std::remove_cv<F_deref>::type>::value &&
|
||||
|
||||
// Only for nontrivial callables, or callables that won't fit in
|
||||
// VoidUnion::inline_storage.
|
||||
!(std::is_trivially_move_constructible<F_deref>::value &&
|
||||
std::is_trivially_destructible<F_deref>::value &&
|
||||
sizeof(F_deref) <= sizeof(webrtc_function_impl::VoidUnion::
|
||||
inline_storage))>::type* = nullptr>
|
||||
static NontrivialUntypedFunctionArgs PrepareArgs(F&& f) {
|
||||
// The callable is either nontrivial or too large, so we can't keep it
|
||||
// in the inline storage; use the heap instead.
|
||||
NontrivialUntypedFunctionArgs args;
|
||||
args.void_ptr = new F_deref(std::forward<F>(f));
|
||||
args.call = reinterpret_cast<webrtc_function_impl::FunVoid*>(
|
||||
webrtc_function_impl::CallHelpers<Signature>::template CallVoidPtr<
|
||||
F_deref>);
|
||||
args.del = static_cast<void (*)(webrtc_function_impl::VoidUnion*)>(
|
||||
[](webrtc_function_impl::VoidUnion* vu) {
|
||||
// Assuming that this pointer isn't null allows the
|
||||
// compiler to eliminate a null check in the (inlined)
|
||||
// delete operation.
|
||||
RTC_ASSUME(vu->void_ptr != nullptr);
|
||||
delete reinterpret_cast<F_deref*>(vu->void_ptr);
|
||||
});
|
||||
return args;
|
||||
}
|
||||
static UntypedFunction Create(NontrivialUntypedFunctionArgs args) {
|
||||
webrtc_function_impl::VoidUnion vu;
|
||||
vu.void_ptr = args.void_ptr;
|
||||
return UntypedFunction(vu, args.call, args.del);
|
||||
}
|
||||
|
||||
// Create function that accepts function pointers. If the argument is null,
|
||||
// the result is an empty UntypedFunction.
|
||||
template <typename Signature>
|
||||
static UntypedFunction Create(Signature* f) {
|
||||
static FunctionPointerUntypedFunctionArgs PrepareArgs(Signature* f) {
|
||||
FunctionPointerUntypedFunctionArgs args;
|
||||
args.fun_ptr = reinterpret_cast<webrtc_function_impl::FunVoid*>(f);
|
||||
args.call = reinterpret_cast<webrtc_function_impl::FunVoid*>(
|
||||
webrtc_function_impl::CallHelpers<Signature>::CallFunPtr);
|
||||
return args;
|
||||
}
|
||||
static UntypedFunction Create(FunctionPointerUntypedFunctionArgs args) {
|
||||
webrtc_function_impl::VoidUnion vu;
|
||||
vu.fun_ptr = reinterpret_cast<webrtc_function_impl::FunVoid*>(f);
|
||||
return UntypedFunction(
|
||||
vu,
|
||||
f ? reinterpret_cast<webrtc_function_impl::FunVoid*>(
|
||||
webrtc_function_impl::CallHelpers<Signature>::CallFunPtr)
|
||||
: nullptr,
|
||||
nullptr);
|
||||
vu.fun_ptr = args.fun_ptr;
|
||||
return UntypedFunction(vu, args.fun_ptr == nullptr ? nullptr : args.call,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
// Prepares arguments and creates an UntypedFunction in one go.
|
||||
template <typename Signature, typename F>
|
||||
static UntypedFunction Create(F&& f) {
|
||||
return Create(PrepareArgs<Signature>(std::forward<F>(f)));
|
||||
}
|
||||
|
||||
// Default constructor. Creates an empty UntypedFunction.
|
||||
|
||||
Reference in New Issue
Block a user