Skip to content

Commit

Permalink
[EraVM] Add C-API for checking ELF files.
Browse files Browse the repository at this point in the history
  • Loading branch information
PavelKopyl committed Sep 11, 2024
1 parent 2ed625d commit 8c28956
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 13 deletions.
19 changes: 19 additions & 0 deletions lld/include/lld-c/LLDAsLibraryC.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,25 @@ LLVMBool LLVMLinkEraVM(LLVMMemoryBufferRef inBuffer,
const char *const *linkerSymbols,
const char linkerSymbolValues[][LINKER_SYMBOL_SIZE],
uint64_t numLinkerSymbols, char **errorMessage);

/** Returns true if the \p inBuffer contains an ELF object file. */
LLVMBool LLVMIsELF(LLVMMemoryBufferRef inBuffer);

/** Returns undefined linker symbol names of the ELF object file passed
* in \p inBuffer. The names are returned via \p linkerSymbols as the
* array of null-terminated strings. \p numLinkerSymbols will point to
* the number of names. For example, if the file has undefined symbols of
* the form "library_path_[0-4]", then this returns just "library_path".
* Caller should dispose the memory allocated for the \p linkerSymbols
* using LLVMDisposeUndefinedSymbolsEraVM */
void LLVMGetUndefinedSymbolsEraVM(LLVMMemoryBufferRef inBuffer,
char **linkerSymbols[],
uint64_t *numLinkerSymbols);

/** Disposes an array with linker symbols returned by the
* LLVMGetUndefinedSymbolsEraVM(). */
void LLVMDisposeUndefinedSymbolsEraVM(char *linkerSymbols[],
uint64_t numLinkerSymbols);
LLVM_C_EXTERN_C_END

#endif // LLD_C_LLDASLIBRARYC_H
76 changes: 76 additions & 0 deletions lld/lld-c/LLDAsLibraryC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"

#include <array>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <memory>
#include <string.h>
#include <string>
Expand Down Expand Up @@ -244,3 +246,77 @@ LLVMBool LLVMLinkEraVM(LLVMMemoryBufferRef inBuffer,

return false;
}

/// Returns true if the \p inBuffer contains an ELF object file.
LLVMBool LLVMIsELF(LLVMMemoryBufferRef inBuffer) {
Expected<std::unique_ptr<Binary>> inBinaryOrErr =
createBinary(unwrap(inBuffer)->getMemBufferRef());
if (!inBinaryOrErr) {
handleAllErrors(inBinaryOrErr.takeError(), [](const ErrorInfoBase &EI) {});
return false;
}
return inBinaryOrErr.get()->isELF();
}

/// Returns true if the \p inBuffer contains an ELF object file with undefined
/// linker symbols, otherwise false.
void LLVMGetUndefinedSymbolsEraVM(LLVMMemoryBufferRef inBuffer,
char **linkerSymbols[],
uint64_t *numLinkerSymbols) {
if (!LLVMIsELF(inBuffer)) {
*linkerSymbols = nullptr;
*numLinkerSymbols = 0;
return;
}

Regex subSymRegex(R"(.*_[0-4]$)");
StringSet<> undefSymbols;
StringSet<> undefSubSymbols;
std::unique_ptr<Binary> inBinary =
cantFail(createBinary(unwrap(inBuffer)->getMemBufferRef()));
const auto *oFile = static_cast<ObjectFile *>(inBinary.get());
for (const SymbolRef &sym : oFile->symbols()) {
uint32_t symFlags = cantFail(sym.getFlags());
uint8_t other = ELFSymbolRef(sym).getOther();
if ((other == ELF::STO_ERAVM_LIBRARY_ADDRESS) &&
(symFlags & object::SymbolRef::SF_Undefined)) {
StringRef subName = cantFail(sym.getName());
undefSubSymbols.insert(subName);
if (!subSymRegex.match(subName))
llvm_unreachable("Unexpected suffix of library sub-symbol name");

StringRef symName = subName.take_front(subName.size() - 2);
undefSymbols.insert(symName);
}
}

*numLinkerSymbols = undefSymbols.size();
if (!undefSymbols.size()) {
*linkerSymbols = nullptr;
return;
}

*linkerSymbols = reinterpret_cast<char **>(
std::malloc(undefSymbols.size() * sizeof(char *)));
unsigned idx = 0;
for (const StringSet<>::value_type &entry : undefSymbols) {
StringRef sym = entry.first();
// Check that 'undefSybSymbols' form a set of groups each consisting of
// five sub-symbols.
for (unsigned idx = 0; idx < 5; idx++) {
Twine subSym = sym + "_" + std::to_string(idx);
if (!undefSubSymbols.contains(subSym.str()))
llvm_unreachable("missing a library sub-symbol");
}
(*linkerSymbols)[idx++] = strdup(sym.str().c_str());
}
}

/// Disposes an array with linker symbols returned by the
/// LLVMGetUndefinedSymbolsEraVM().
void LLVMDisposeUndefinedSymbolsEraVM(char *linkerSymbols[],
uint64_t numLinkerSymbols) {
for (unsigned idx = 0; idx < numLinkerSymbols; ++idx)
std::free(linkerSymbols[idx]);
std::free(linkerSymbols);
}
62 changes: 53 additions & 9 deletions lld/unittests/EraVM/LLDTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,14 +194,20 @@ target triple = \"eravm\" \n\
declare i256 @llvm.eravm.linkersymbol(metadata) \n\
\n\
define i256 @test() { \n\
%res = call i256 @llvm.eravm.linkersymbol(metadata !1) \n\
%res2 = call i256 @llvm.eravm.linkersymbol(metadata !2) \n\
%res3 = add i256 %res, %res2 \n\
%sym = call i256 @llvm.eravm.linkersymbol(metadata !1) \n\
%sym2 = call i256 @llvm.eravm.linkersymbol(metadata !2) \n\
%sym3 = call i256 @llvm.eravm.linkersymbol(metadata !3) \n\
%sym4 = call i256 @llvm.eravm.linkersymbol(metadata !4) \n\
%res = add i256 %sym, %sym2 \n\
%res2 = add i256 %res, %sym3 \n\
%res3 = add i256 %res2, %sym4 \n\
ret i256 %res3 \n\
} \n\
\n\
!1 = !{!\"library_id\"} \n\
!2 = !{!\"library_id2\"}";
!1 = !{!\"/file/path()`~!@#$%^&*-+=/library:id\"} \n\
!2 = !{!\"C:\\file\\path()`~!@#$%^&*-+=\\library:id2\"} \n\
!3 = !{!\"~/file/path()`~!@#$%^&*-+=/library:id3\"} \n\
!4 = !{!\"/()`~!@#$%^&*-+=|\\{}[ ]:;'<>,?/_library:id4\"}";

// Wrap Source in a MemoryBuffer
LLVMMemoryBufferRef IrMemBuffer = LLVMCreateMemoryBufferWithMemoryRange(
Expand All @@ -226,22 +232,35 @@ define i256 @test() { \n\
LLVMDisposeModule(M);

LLVMMemoryBufferRef BinMemBuffer;
const char *LinkerSymbol[2] = {"library_id", "library_id2"};
const char LinkerSymbolVal[2][20] = {
const char *LinkerSymbol[4] = {
"/file/path()`~!@#$%^&*-+=/library:id",
"C:\\file\\path()`~!@#$%^&*-+=\\library:id2",
"~/file/path()`~!@#$%^&*-+=/library:id3",
"/()`~!@#$%^&*-+=|\\{}[ ]:;'<>,?/_library:id4"};
const char LinkerSymbolVal[4][20] = {
{1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5},
{6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 11, 12, 13}};
{6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10},
{11, 11, 11, 11, 12, 12, 12, 12, 13, 13,
13, 13, 14, 14, 14, 14, 15, 15, 15, 15},
{16, 16, 16, 16, 17, 17, 17, 17, 18, 18,
18, 18, 19, 19, 19, 19, 20, 20, 20, 20},
};
if (LLVMLinkEraVM(ObjMemBuffer, &BinMemBuffer, LinkerSymbol, LinkerSymbolVal,
2, &ErrMsg)) {
4, &ErrMsg)) {
FAIL() << "Failed to link:" << ErrMsg;
LLVMDisposeMessage(ErrMsg);
return;
}
StringRef Val1(LinkerSymbolVal[0], 20);
StringRef Val2(LinkerSymbolVal[1], 20);
StringRef Val3(LinkerSymbolVal[2], 20);
StringRef Val4(LinkerSymbolVal[3], 20);
StringRef Binary(LLVMGetBufferStart(BinMemBuffer),
LLVMGetBufferSize(BinMemBuffer));
EXPECT_TRUE(Binary.find(Val1) != StringRef::npos);
EXPECT_TRUE(Binary.find(Val2) != StringRef::npos);
EXPECT_TRUE(Binary.find(Val3) != StringRef::npos);
EXPECT_TRUE(Binary.find(Val4) != StringRef::npos);
EXPECT_TRUE(LLVMGetBufferSize(BinMemBuffer) % 64 == 32);
LLVMDisposeMemoryBuffer(ObjMemBuffer);
LLVMDisposeMemoryBuffer(BinMemBuffer);
Expand Down Expand Up @@ -285,11 +304,23 @@ define i256 @test() { \n\
}
LLVMDisposeModule(M);

EXPECT_TRUE(LLVMIsELF(ObjMemBuffer));

char **UndefLibSymbols = nullptr;
uint64_t NumUndefLibSymbols = 0;
const char *LinkerSymbols[2] = {"library_id", "library_id2"};
const char LinkerSymbolVals[2][20] = {
{1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5},
{6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 11, 12, 13}};

LLVMGetUndefinedSymbolsEraVM(ObjMemBuffer, &UndefLibSymbols,
&NumUndefLibSymbols);
EXPECT_TRUE(NumUndefLibSymbols == 2);
EXPECT_TRUE(std::strcmp(UndefLibSymbols[0], LinkerSymbols[0]) == 0);
EXPECT_TRUE(std::strcmp(UndefLibSymbols[1], LinkerSymbols[1]) == 0);

LLVMDisposeUndefinedSymbolsEraVM(UndefLibSymbols, NumUndefLibSymbols);

// Pass only the first linker symbol.
LLVMMemoryBufferRef Obj2MemBuffer;
if (LLVMLinkEraVM(ObjMemBuffer, &Obj2MemBuffer, LinkerSymbols,
Expand All @@ -299,6 +330,14 @@ define i256 @test() { \n\
return;
}

EXPECT_TRUE(LLVMIsELF(Obj2MemBuffer));
LLVMGetUndefinedSymbolsEraVM(Obj2MemBuffer, &UndefLibSymbols,
&NumUndefLibSymbols);
EXPECT_TRUE(NumUndefLibSymbols == 1);
EXPECT_TRUE(std::strcmp(UndefLibSymbols[0], LinkerSymbols[1]) == 0);

LLVMDisposeUndefinedSymbolsEraVM(UndefLibSymbols, NumUndefLibSymbols);

// Pass only the second linker symbol. This time
// the linker should emit the final bytecode, as all the
// symbols are resolved.
Expand All @@ -310,6 +349,11 @@ define i256 @test() { \n\
return;
}

EXPECT_FALSE(LLVMIsELF(BinMemBuffer));
LLVMGetUndefinedSymbolsEraVM(BinMemBuffer, &UndefLibSymbols,
&NumUndefLibSymbols);
EXPECT_TRUE(NumUndefLibSymbols == 0);

StringRef Val1(LinkerSymbolVals[0], 20);
StringRef Val2(LinkerSymbolVals[1], 20);
StringRef Binary(LLVMGetBufferStart(BinMemBuffer),
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/EraVM/AsmParser/EraVMAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -807,14 +807,15 @@ bool EraVMAsmParser::ParseDirective(AsmToken DirectiveID) {
Lex(); // eat "@" token

StringRef SymbolName;
// if (getTok().is(AsmToken::Identifier))
// SymbolName = getTok().getString().str();
if (getParser().parseIdentifier(SymbolName))
return TokError("expected symbol name");

if (parseEOL())
return true;

if (getContext().lookupSymbol(SymbolName))
return TokError("duplicating library symbols");

MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolName);
auto *TS = getStreamer().getTargetStreamer();
static_cast<EraVMTargetStreamer *>(TS)->emitLibraryAddressSymbol(Symbol);
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Target/EraVM/MCTargetDesc/EraVMELFStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,11 @@ void EraVMTargetELFStreamer::emitLibraryAddressSymbol(const MCSymbol *Symbol) {

// Emits 4-byte fixup to cover a part of the 20-byte linker symbol value.
auto EmitFixup = [&S, &Ctx, &SymStr, &DF](unsigned Idx) {
std::string IdxStr = std::to_string(Idx);
auto *Sym = cast<MCSymbolELF>(Ctx.getOrCreateSymbol(SymStr + "_" + IdxStr));
Twine SubSymName = Twine(SymStr) + "_" + std::to_string(Idx);
if (Ctx.lookupSymbol(SubSymName))
llvm_unreachable("Duplicating library sub-symbols");

auto *Sym = cast<MCSymbolELF>(Ctx.getOrCreateSymbol(SubSymName));
Sym->setOther(ELF::STO_ERAVM_LIBRARY_ADDRESS);
const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
S.visitUsedExpr(*Expr);
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/MC/EraVM/asm-parser/data-errors.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
; STDOUT: .rodata
; STDOUT-NOT: {{.+}}

.text
.linker_symbol:
.library_address_cell @"library:id"

.linker_symbol1:
.library_address_cell @"library:id"

.rodata
.cell 1 2
.cell 1, 2
Expand All @@ -15,6 +22,7 @@


; COM: Autogenerated checks below, see README.md.
; CHECK: duplicating library symbols
; CHECK: <stdin>:{{[0-9]+}}:11: error: expected newline
; CHECK-NEXT: .cell 1 2
; CHECK-NEXT: ^
Expand Down

0 comments on commit 8c28956

Please sign in to comment.