[PATCH v2 2/7] BaseTools-GenFw:Add new x86_64 Elf relocation types for PIC/PIE code


Ard Biesheuvel
 

On 8 July 2016 at 10:42, Shi, Steven <steven.shi@intel.com> wrote:
Add support to convert new Elf relocation types
(R_X86_64_PLT32, R_X86_64_GOTPCREL, R_X86_64_GOTPCRELX,
R_X86_64_REX_GOTPCRELX) to PeCoff, which are required by
position independent code (PIC) built Elf image.

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Steven Shi <steven.shi@intel.com>
---
BaseTools/Source/C/GenFw/Elf64Convert.c | 105 +++++++++++++++++++++++++++++---
BaseTools/Source/C/GenFw/elf_common.h | 9 ++-
2 files changed, 105 insertions(+), 9 deletions(-)
mode change 100644 => 100755 BaseTools/Source/C/GenFw/elf_common.h

diff --git a/BaseTools/Source/C/GenFw/Elf64Convert.c b/BaseTools/Source/C/GenFw/Elf64Convert.c
index 7c838f3..fc241ab 100755
--- a/BaseTools/Source/C/GenFw/Elf64Convert.c
+++ b/BaseTools/Source/C/GenFw/Elf64Convert.c
@@ -755,6 +755,11 @@ WriteSections64 (
// Determine how to handle each relocation type based on the machine type.
//
if (mEhdr->e_machine == EM_X86_64) {
+ //
+ // See Relocation Types details semantics in Table 4.9 of
+ // SystemV x86_64 abi Draft Version 0.99.8
+ // https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-secure.pdf
+ //
switch (ELF_R_TYPE(Rel->r_info)) {
case R_X86_64_NONE:
break;
@@ -763,24 +768,24 @@ WriteSections64 (
// Absolute relocation.
//
VerboseMsg ("R_X86_64_64");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%016LX",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%016LX",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
No change?

*(UINT64 *)Targ);
*(UINT64 *)Targ = *(UINT64 *)Targ - SymShdr->sh_addr + mCoffSectionsOffset[Sym->st_shndx];
VerboseMsg ("Relocation: 0x%016LX", *(UINT64*)Targ);
break;
case R_X86_64_32:
VerboseMsg ("R_X86_64_32");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
and here

*(UINT32 *)Targ);
*(UINT32 *)Targ = (UINT32)((UINT64)(*(UINT32 *)Targ) - SymShdr->sh_addr + mCoffSectionsOffset[Sym->st_shndx]);
VerboseMsg ("Relocation: 0x%08X", *(UINT32*)Targ);
break;
case R_X86_64_32S:
VerboseMsg ("R_X86_64_32S");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
and here

*(UINT32 *)Targ);
*(INT32 *)Targ = (INT32)((INT64)(*(INT32 *)Targ) - SymShdr->sh_addr + mCoffSectionsOffset[Sym->st_shndx]);
VerboseMsg ("Relocation: 0x%08X", *(UINT32*)Targ);
@@ -790,14 +795,45 @@ WriteSections64 (
// Relative relocation: Symbol - Ip + Addend
//
VerboseMsg ("R_X86_64_PC32");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
and here

*(UINT32 *)Targ);
*(UINT32 *)Targ = (UINT32) (*(UINT32 *)Targ
+ (mCoffSectionsOffset[Sym->st_shndx] - SymShdr->sh_addr)
- (SecOffset - SecShdr->sh_addr));
VerboseMsg ("Relocation: 0x%08X", *(UINT32 *)Targ);
break;
+ case R_X86_64_PLT32:
+ //
+ // Relative relocation: L + A - P
+ //
+ VerboseMsg ("R_X86_64_PLT32");
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ *(UINT32 *)Targ);
+ *(UINT32 *)Targ = (UINT32) (*(UINT32 *)Targ
+ + (mCoffSectionsOffset[Sym->st_shndx] - SymShdr->sh_addr)
+ - (SecOffset - SecShdr->sh_addr));
+ VerboseMsg ("Relocation: 0x%08X", *(UINT32 *)Targ);
+ break;
+ case R_X86_64_GOTPCREL:
+ //
+ // Relative relocation: G + GOT + A - P
+ //
+ VerboseMsg ("R_X86_64_GOTPCREL");
+ break;
+ case R_X86_64_GOTPCRELX:
+ //
+ // Relative relocation: G + GOT + A - P
+ //
+ VerboseMsg ("R_X86_64_GOTPCRELX");
+ break;
+ case R_X86_64_REX_GOTPCRELX:
+ //
+ // Relative relocation: G + GOT + A - P
+ //
+ VerboseMsg ("R_X86_64_REX_GOTPCRELX");
+ break;
default:
Error (NULL, 0, 3000, "Invalid", "%s unsupported ELF EM_X86_64 relocation 0x%x.", mInImageName, (unsigned) ELF_R_TYPE(Rel->r_info));
}
@@ -887,6 +923,12 @@ WriteRelocations64 (
UINT32 Index;
EFI_IMAGE_OPTIONAL_HEADER_UNION *NtHdr;
EFI_IMAGE_DATA_DIRECTORY *Dir;
+ UINT64 GoTPcRelPtrOffset = 0;
+ UINT64 *RipDisplacementPtr;
+ UINT64 *ElfFileGoTPcRelPtr;
+ UINT64 *CoffFileGoTPcRelPtr;
+ Elf_Shdr *shdr;
+ UINT32 i;

for (Index = 0; Index < mEhdr->e_shnum; Index++) {
Elf_Shdr *RelShdr = GetShdrByIndex(Index);
@@ -902,6 +944,53 @@ WriteRelocations64 (
switch (ELF_R_TYPE(Rel->r_info)) {
case R_X86_64_NONE:
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
+ break;
+ case R_X86_64_GOTPCREL:
+ case R_X86_64_GOTPCRELX:
+ case R_X86_64_REX_GOTPCRELX:
+ //
+ // link script force .got and .got.* in .text section, so GoTPcRel pointer must be in .text section
+ // but its value might point to .text or .data section
+ //
+ RipDisplacementPtr = (UINT64 *)((UINT8*)mEhdr + SecShdr->sh_offset + Rel->r_offset - SecShdr->sh_addr);
+ GoTPcRelPtrOffset = Rel->r_offset + 4 + (INT32)(*RipDisplacementPtr) - SecShdr->sh_addr;
+ ElfFileGoTPcRelPtr = (UINT64 *)((UINT8*)mEhdr + SecShdr->sh_offset + GoTPcRelPtrOffset);
+ CoffFileGoTPcRelPtr = (UINT64 *)(mCoffFile + mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset);
+ //
+ // Check which section the GoTPcRel pointer point to, and calculate Elf to Coff sections displacement accordingly
+ //
+ shdr = NULL;
+ for (i = 0; i < mEhdr->e_shnum; i++) {
+ shdr = GetShdrByIndex(i);
+ if ((*ElfFileGoTPcRelPtr >= shdr->sh_addr) &&
+ (*ElfFileGoTPcRelPtr < shdr->sh_addr + shdr->sh_size)) {
+ break;
+ }
+ }
+ //
+ // Fix the Elf to Coff sections displacement
+ //
+ if(IsDataShdr(shdr)) {
+ *CoffFileGoTPcRelPtr = *CoffFileGoTPcRelPtr + mDataOffset - shdr->sh_addr;
+ }else if (IsTextShdr(SecShdr)){
+ *CoffFileGoTPcRelPtr = *CoffFileGoTPcRelPtr + mTextOffset - shdr->sh_addr;
+ }else {
+ //
+ // link script force to merge .rodata .rodata.*, .got and .got.* in .text section,
+ // not support GoTPcRel point to section other than .data or .text
+ //
+ Error (NULL, 0, 3000, "Invalid", "Not support relocate R_X86_64_GOTPCREL address in section other than .data or .text");
+ assert (FALSE);
+ }
+
+ VerboseMsg ("R_X86_64_GOTPCREL to EFI_IMAGE_REL_BASED_DIR64 Offset: 0x%08X",
+ mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset);
+ CoffAddFixup(
+ (UINT32)(UINTN)((UINT64) mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset),
+ EFI_IMAGE_REL_BASED_DIR64);
Is this necessary? I would expect the GOT entry itself to be already
covered by a R_X86_64_64 relocation, so I don't think there is a need
to emit a EFI_IMAGE_REL_BASED_DIR64 PE/COFF reloc here.

Thanks,
Ard.


Shi, Steven <steven.shi@...>
 

diff --git a/BaseTools/Source/C/GenFw/Elf64Convert.c
b/BaseTools/Source/C/GenFw/Elf64Convert.c
index 7c838f3..fc241ab 100755
--- a/BaseTools/Source/C/GenFw/Elf64Convert.c
+++ b/BaseTools/Source/C/GenFw/Elf64Convert.c
@@ -755,6 +755,11 @@ WriteSections64 (
// Determine how to handle each relocation type based on the
machine type.
//
if (mEhdr->e_machine == EM_X86_64) {
+ //
+ // See Relocation Types details semantics in Table 4.9 of
+ // SystemV x86_64 abi Draft Version 0.99.8
+ // https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-
secure.pdf
+ //
switch (ELF_R_TYPE(Rel->r_info)) {
case R_X86_64_NONE:
break;
@@ -763,24 +768,24 @@ WriteSections64 (
// Absolute relocation.
//
VerboseMsg ("R_X86_64_64");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%016LX",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%016LX",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
No change?
[Steven]: remove the unnecessary blank at end of line, which is required by coding style


*(UINT64 *)Targ);
*(UINT64 *)Targ = *(UINT64 *)Targ - SymShdr->sh_addr +
mCoffSectionsOffset[Sym->st_shndx];
VerboseMsg ("Relocation: 0x%016LX", *(UINT64*)Targ);
break;
case R_X86_64_32:
VerboseMsg ("R_X86_64_32");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
and here
[Steven]: remove the unnecessary blank at end of line, which is required by coding style


*(UINT32 *)Targ);
*(UINT32 *)Targ = (UINT32)((UINT64)(*(UINT32 *)Targ) - SymShdr-
sh_addr + mCoffSectionsOffset[Sym->st_shndx]);
VerboseMsg ("Relocation: 0x%08X", *(UINT32*)Targ);
break;
case R_X86_64_32S:
VerboseMsg ("R_X86_64_32S");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
and here
[Steven]: remove the unnecessary blank at end of line, which is required by coding style


*(UINT32 *)Targ);
*(INT32 *)Targ = (INT32)((INT64)(*(INT32 *)Targ) - SymShdr-
sh_addr + mCoffSectionsOffset[Sym->st_shndx]);
VerboseMsg ("Relocation: 0x%08X", *(UINT32*)Targ);
@@ -790,14 +795,45 @@ WriteSections64 (
// Relative relocation: Symbol - Ip + Addend
//
VerboseMsg ("R_X86_64_PC32");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
and here
[Steven]: remove the unnecessary blank at end of line, which is required by coding style


*(UINT32 *)Targ);
*(UINT32 *)Targ = (UINT32) (*(UINT32 *)Targ
+ (mCoffSectionsOffset[Sym->st_shndx] - SymShdr->sh_addr)
- (SecOffset - SecShdr->sh_addr));
VerboseMsg ("Relocation: 0x%08X", *(UINT32 *)Targ);
break;
+ case R_X86_64_PLT32:
+ //
+ // Relative relocation: L + A - P
+ //
+ VerboseMsg ("R_X86_64_PLT32");
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ *(UINT32 *)Targ);
+ *(UINT32 *)Targ = (UINT32) (*(UINT32 *)Targ
+ + (mCoffSectionsOffset[Sym->st_shndx] - SymShdr->sh_addr)
+ - (SecOffset - SecShdr->sh_addr));
+ VerboseMsg ("Relocation: 0x%08X", *(UINT32 *)Targ);
+ break;
+ case R_X86_64_GOTPCREL:
+ //
+ // Relative relocation: G + GOT + A - P
+ //
+ VerboseMsg ("R_X86_64_GOTPCREL");
+ break;
+ case R_X86_64_GOTPCRELX:
+ //
+ // Relative relocation: G + GOT + A - P
+ //
+ VerboseMsg ("R_X86_64_GOTPCRELX");
+ break;
+ case R_X86_64_REX_GOTPCRELX:
+ //
+ // Relative relocation: G + GOT + A - P
+ //
+ VerboseMsg ("R_X86_64_REX_GOTPCRELX");
+ break;
default:
Error (NULL, 0, 3000, "Invalid", "%s unsupported ELF EM_X86_64
relocation 0x%x.", mInImageName, (unsigned) ELF_R_TYPE(Rel->r_info));
}
@@ -887,6 +923,12 @@ WriteRelocations64 (
UINT32 Index;
EFI_IMAGE_OPTIONAL_HEADER_UNION *NtHdr;
EFI_IMAGE_DATA_DIRECTORY *Dir;
+ UINT64 GoTPcRelPtrOffset = 0;
+ UINT64 *RipDisplacementPtr;
+ UINT64 *ElfFileGoTPcRelPtr;
+ UINT64 *CoffFileGoTPcRelPtr;
+ Elf_Shdr *shdr;
+ UINT32 i;

for (Index = 0; Index < mEhdr->e_shnum; Index++) {
Elf_Shdr *RelShdr = GetShdrByIndex(Index);
@@ -902,6 +944,53 @@ WriteRelocations64 (
switch (ELF_R_TYPE(Rel->r_info)) {
case R_X86_64_NONE:
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
+ break;
+ case R_X86_64_GOTPCREL:
+ case R_X86_64_GOTPCRELX:
+ case R_X86_64_REX_GOTPCRELX:
+ //
+ // link script force .got and .got.* in .text section, so GoTPcRel
pointer must be in .text section
+ // but its value might point to .text or .data section
+ //
+ RipDisplacementPtr = (UINT64 *)((UINT8*)mEhdr + SecShdr-
sh_offset + Rel->r_offset - SecShdr->sh_addr);
+ GoTPcRelPtrOffset = Rel->r_offset + 4 +
(INT32)(*RipDisplacementPtr) - SecShdr->sh_addr;
+ ElfFileGoTPcRelPtr = (UINT64 *)((UINT8*)mEhdr + SecShdr-
sh_offset + GoTPcRelPtrOffset);
+ CoffFileGoTPcRelPtr = (UINT64 *)(mCoffFile +
mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset);
+ //
+ // Check which section the GoTPcRel pointer point to, and
calculate Elf to Coff sections displacement accordingly
+ //
+ shdr = NULL;
+ for (i = 0; i < mEhdr->e_shnum; i++) {
+ shdr = GetShdrByIndex(i);
+ if ((*ElfFileGoTPcRelPtr >= shdr->sh_addr) &&
+ (*ElfFileGoTPcRelPtr < shdr->sh_addr + shdr->sh_size)) {
+ break;
+ }
+ }
+ //
+ // Fix the Elf to Coff sections displacement
+ //
+ if(IsDataShdr(shdr)) {
+ *CoffFileGoTPcRelPtr = *CoffFileGoTPcRelPtr + mDataOffset -
shdr->sh_addr;
+ }else if (IsTextShdr(SecShdr)){
+ *CoffFileGoTPcRelPtr = *CoffFileGoTPcRelPtr + mTextOffset -
shdr->sh_addr;
+ }else {
+ //
+ // link script force to merge .rodata .rodata.*, .got and .got.*
in .text section,
+ // not support GoTPcRel point to section other than .data or .text
+ //
+ Error (NULL, 0, 3000, "Invalid", "Not support relocate
R_X86_64_GOTPCREL address in section other than .data or .text");
+ assert (FALSE);
+ }
+
+ VerboseMsg ("R_X86_64_GOTPCREL to
EFI_IMAGE_REL_BASED_DIR64 Offset: 0x%08X",
+ mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset);
+ CoffAddFixup(
+ (UINT32)(UINTN)((UINT64) mCoffSectionsOffset[RelShdr-
sh_info] + GoTPcRelPtrOffset),
+ EFI_IMAGE_REL_BASED_DIR64);
Is this necessary? I would expect the GOT entry itself to be already
covered by a R_X86_64_64 relocation, so I don't think there is a need
to emit a EFI_IMAGE_REL_BASED_DIR64 PE/COFF reloc here.
[Steven]: Do you ask whether it is still necessary to support the GOTPCREL/GOTPCRELX/REX_GOTPCRELX new relocation types? I think they are nice to have now, since new type support has no impact to old ones, we could just leave the code there for reference.

Thanks
Steven


Ard Biesheuvel
 

On 30 July 2016 at 16:09, Shi, Steven <steven.shi@intel.com> wrote:
diff --git a/BaseTools/Source/C/GenFw/Elf64Convert.c
b/BaseTools/Source/C/GenFw/Elf64Convert.c
index 7c838f3..fc241ab 100755
--- a/BaseTools/Source/C/GenFw/Elf64Convert.c
+++ b/BaseTools/Source/C/GenFw/Elf64Convert.c
@@ -755,6 +755,11 @@ WriteSections64 (
// Determine how to handle each relocation type based on the
machine type.
//
if (mEhdr->e_machine == EM_X86_64) {
+ //
+ // See Relocation Types details semantics in Table 4.9 of
+ // SystemV x86_64 abi Draft Version 0.99.8
+ // https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-
secure.pdf
+ //
switch (ELF_R_TYPE(Rel->r_info)) {
case R_X86_64_NONE:
break;
@@ -763,24 +768,24 @@ WriteSections64 (
// Absolute relocation.
//
VerboseMsg ("R_X86_64_64");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%016LX",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%016LX",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
No change?
[Steven]: remove the unnecessary blank at end of line, which is required by coding style


*(UINT64 *)Targ);
*(UINT64 *)Targ = *(UINT64 *)Targ - SymShdr->sh_addr +
mCoffSectionsOffset[Sym->st_shndx];
VerboseMsg ("Relocation: 0x%016LX", *(UINT64*)Targ);
break;
case R_X86_64_32:
VerboseMsg ("R_X86_64_32");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
and here
[Steven]: remove the unnecessary blank at end of line, which is required by coding style


*(UINT32 *)Targ);
*(UINT32 *)Targ = (UINT32)((UINT64)(*(UINT32 *)Targ) - SymShdr-
sh_addr + mCoffSectionsOffset[Sym->st_shndx]);
VerboseMsg ("Relocation: 0x%08X", *(UINT32*)Targ);
break;
case R_X86_64_32S:
VerboseMsg ("R_X86_64_32S");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
and here
[Steven]: remove the unnecessary blank at end of line, which is required by coding style


*(UINT32 *)Targ);
*(INT32 *)Targ = (INT32)((INT64)(*(INT32 *)Targ) - SymShdr-
sh_addr + mCoffSectionsOffset[Sym->st_shndx]);
VerboseMsg ("Relocation: 0x%08X", *(UINT32*)Targ);
@@ -790,14 +795,45 @@ WriteSections64 (
// Relative relocation: Symbol - Ip + Addend
//
VerboseMsg ("R_X86_64_PC32");
- VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
- (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
and here
[Steven]: remove the unnecessary blank at end of line, which is required by coding style
OK. Could we do that in a separate patch?


*(UINT32 *)Targ);
*(UINT32 *)Targ = (UINT32) (*(UINT32 *)Targ
+ (mCoffSectionsOffset[Sym->st_shndx] - SymShdr->sh_addr)
- (SecOffset - SecShdr->sh_addr));
VerboseMsg ("Relocation: 0x%08X", *(UINT32 *)Targ);
break;
+ case R_X86_64_PLT32:
+ //
+ // Relative relocation: L + A - P
+ //
+ VerboseMsg ("R_X86_64_PLT32");
+ VerboseMsg ("Offset: 0x%08X, Addend: 0x%08X",
+ (UINT32)(SecOffset + (Rel->r_offset - SecShdr->sh_addr)),
+ *(UINT32 *)Targ);
+ *(UINT32 *)Targ = (UINT32) (*(UINT32 *)Targ
+ + (mCoffSectionsOffset[Sym->st_shndx] - SymShdr->sh_addr)
+ - (SecOffset - SecShdr->sh_addr));
+ VerboseMsg ("Relocation: 0x%08X", *(UINT32 *)Targ);
+ break;
+ case R_X86_64_GOTPCREL:
+ //
+ // Relative relocation: G + GOT + A - P
+ //
+ VerboseMsg ("R_X86_64_GOTPCREL");
+ break;
+ case R_X86_64_GOTPCRELX:
+ //
+ // Relative relocation: G + GOT + A - P
+ //
+ VerboseMsg ("R_X86_64_GOTPCRELX");
+ break;
+ case R_X86_64_REX_GOTPCRELX:
+ //
+ // Relative relocation: G + GOT + A - P
+ //
+ VerboseMsg ("R_X86_64_REX_GOTPCRELX");
+ break;
default:
Error (NULL, 0, 3000, "Invalid", "%s unsupported ELF EM_X86_64
relocation 0x%x.", mInImageName, (unsigned) ELF_R_TYPE(Rel->r_info));
}
@@ -887,6 +923,12 @@ WriteRelocations64 (
UINT32 Index;
EFI_IMAGE_OPTIONAL_HEADER_UNION *NtHdr;
EFI_IMAGE_DATA_DIRECTORY *Dir;
+ UINT64 GoTPcRelPtrOffset = 0;
+ UINT64 *RipDisplacementPtr;
+ UINT64 *ElfFileGoTPcRelPtr;
+ UINT64 *CoffFileGoTPcRelPtr;
+ Elf_Shdr *shdr;
+ UINT32 i;

for (Index = 0; Index < mEhdr->e_shnum; Index++) {
Elf_Shdr *RelShdr = GetShdrByIndex(Index);
@@ -902,6 +944,53 @@ WriteRelocations64 (
switch (ELF_R_TYPE(Rel->r_info)) {
case R_X86_64_NONE:
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
+ break;
+ case R_X86_64_GOTPCREL:
+ case R_X86_64_GOTPCRELX:
+ case R_X86_64_REX_GOTPCRELX:
+ //
+ // link script force .got and .got.* in .text section, so GoTPcRel
pointer must be in .text section
+ // but its value might point to .text or .data section
+ //
+ RipDisplacementPtr = (UINT64 *)((UINT8*)mEhdr + SecShdr-
sh_offset + Rel->r_offset - SecShdr->sh_addr);
+ GoTPcRelPtrOffset = Rel->r_offset + 4 +
(INT32)(*RipDisplacementPtr) - SecShdr->sh_addr;
+ ElfFileGoTPcRelPtr = (UINT64 *)((UINT8*)mEhdr + SecShdr-
sh_offset + GoTPcRelPtrOffset);
+ CoffFileGoTPcRelPtr = (UINT64 *)(mCoffFile +
mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset);
+ //
+ // Check which section the GoTPcRel pointer point to, and
calculate Elf to Coff sections displacement accordingly
+ //
+ shdr = NULL;
+ for (i = 0; i < mEhdr->e_shnum; i++) {
+ shdr = GetShdrByIndex(i);
+ if ((*ElfFileGoTPcRelPtr >= shdr->sh_addr) &&
+ (*ElfFileGoTPcRelPtr < shdr->sh_addr + shdr->sh_size)) {
+ break;
+ }
+ }
+ //
+ // Fix the Elf to Coff sections displacement
+ //
+ if(IsDataShdr(shdr)) {
+ *CoffFileGoTPcRelPtr = *CoffFileGoTPcRelPtr + mDataOffset -
shdr->sh_addr;
+ }else if (IsTextShdr(SecShdr)){
+ *CoffFileGoTPcRelPtr = *CoffFileGoTPcRelPtr + mTextOffset -
shdr->sh_addr;
+ }else {
+ //
+ // link script force to merge .rodata .rodata.*, .got and .got.*
in .text section,
+ // not support GoTPcRel point to section other than .data or .text
+ //
+ Error (NULL, 0, 3000, "Invalid", "Not support relocate
R_X86_64_GOTPCREL address in section other than .data or .text");
+ assert (FALSE);
+ }
+
+ VerboseMsg ("R_X86_64_GOTPCREL to
EFI_IMAGE_REL_BASED_DIR64 Offset: 0x%08X",
+ mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset);
+ CoffAddFixup(
+ (UINT32)(UINTN)((UINT64) mCoffSectionsOffset[RelShdr-
sh_info] + GoTPcRelPtrOffset),
+ EFI_IMAGE_REL_BASED_DIR64);
Is this necessary? I would expect the GOT entry itself to be already
covered by a R_X86_64_64 relocation, so I don't think there is a need
to emit a EFI_IMAGE_REL_BASED_DIR64 PE/COFF reloc here.
[Steven]: Do you ask whether it is still necessary to support the GOTPCREL/GOTPCRELX/REX_GOTPCRELX new relocation types? I think they are nice to have now, since new type support has no impact to old ones, we could just leave the code there for reference.
No. The question is whether it is necessary to emit the
EFI_IMAGE_REL_BASED_DIR64 fixup reloc here. The relocation place is
the instruction, not the GOT entry, and I would expect the GOT entry
to be covered by a R_X86_64_64 relocation already


Shi, Steven <steven.shi@...>
 

OK. Could we do that in a separate patch?
[Steven]: Yes. We could separate it in a new patch.

Is this necessary? I would expect the GOT entry itself to be already
covered by a R_X86_64_64 relocation, so I don't think there is a need
to emit a EFI_IMAGE_REL_BASED_DIR64 PE/COFF reloc here.
[Steven]: Do you ask whether it is still necessary to support the
GOTPCREL/GOTPCRELX/REX_GOTPCRELX new relocation types? I think they
are nice to have now, since new type support has no impact to old ones, we
could just leave the code there for reference.

No. The question is whether it is necessary to emit the
EFI_IMAGE_REL_BASED_DIR64 fixup reloc here. The relocation place is
the instruction, not the GOT entry, and I would expect the GOT entry
to be covered by a R_X86_64_64 relocation already
[Steven]: Yes, it is necessary. Even the R_X86_64_64 relocation need to emit the EFI_IMAGE_REL_BASED_DIR64 fixup reloc like below in the original GenFw code. You can test it like this, remove the below CoffAddFixup() code and build the OVMF with GCC5, you will see the Ovmf boot failure with cpu exception.

case R_X86_64_64:
VerboseMsg ("EFI_IMAGE_REL_BASED_DIR64 Offset: 0x%08X",
mCoffSectionsOffset[RelShdr->sh_info] + (Rel->r_offset - SecShdr->sh_addr));
CoffAddFixup(
(UINT32) ((UINT64) mCoffSectionsOffset[RelShdr->sh_info]
+ (Rel->r_offset - SecShdr->sh_addr)),
EFI_IMAGE_REL_BASED_DIR64);

Steven Shi
Intel\SSG\STO\UEFI Firmware

Tel: +86 021-61166522
iNet: 821-6522


Ard Biesheuvel
 

On 31 July 2016 at 05:08, Shi, Steven <steven.shi@intel.com> wrote:
OK. Could we do that in a separate patch?
[Steven]: Yes. We could separate it in a new patch.

Is this necessary? I would expect the GOT entry itself to be already
covered by a R_X86_64_64 relocation, so I don't think there is a need
to emit a EFI_IMAGE_REL_BASED_DIR64 PE/COFF reloc here.
[Steven]: Do you ask whether it is still necessary to support the
GOTPCREL/GOTPCRELX/REX_GOTPCRELX new relocation types? I think they
are nice to have now, since new type support has no impact to old ones, we
could just leave the code there for reference.

No. The question is whether it is necessary to emit the
EFI_IMAGE_REL_BASED_DIR64 fixup reloc here. The relocation place is
the instruction, not the GOT entry, and I would expect the GOT entry
to be covered by a R_X86_64_64 relocation already
[Steven]: Yes, it is necessary. Even the R_X86_64_64 relocation need to emit the EFI_IMAGE_REL_BASED_DIR64 fixup reloc like below in the original GenFw code. You can test it like this, remove the below CoffAddFixup() code and build the OVMF with GCC5, you will see the Ovmf boot failure with cpu exception.

case R_X86_64_64:
VerboseMsg ("EFI_IMAGE_REL_BASED_DIR64 Offset: 0x%08X",
mCoffSectionsOffset[RelShdr->sh_info] + (Rel->r_offset - SecShdr->sh_addr));
CoffAddFixup(
(UINT32) ((UINT64) mCoffSectionsOffset[RelShdr->sh_info]
+ (Rel->r_offset - SecShdr->sh_addr)),
EFI_IMAGE_REL_BASED_DIR64);
That was not my point. With your code, how many
EFI_IMAGE_REL_BASED_DIR64 fixups are added to the .reloc section for
the GOT entry of 'n'?

int n;
int f () { return n; }
int g () { return n; }
int h () { return n; }


Ard Biesheuvel
 

On 31 July 2016 at 07:42, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
On 31 July 2016 at 05:08, Shi, Steven <steven.shi@intel.com> wrote:
OK. Could we do that in a separate patch?
[Steven]: Yes. We could separate it in a new patch.

Is this necessary? I would expect the GOT entry itself to be already
covered by a R_X86_64_64 relocation, so I don't think there is a need
to emit a EFI_IMAGE_REL_BASED_DIR64 PE/COFF reloc here.
[Steven]: Do you ask whether it is still necessary to support the
GOTPCREL/GOTPCRELX/REX_GOTPCRELX new relocation types? I think they
are nice to have now, since new type support has no impact to old ones, we
could just leave the code there for reference.

No. The question is whether it is necessary to emit the
EFI_IMAGE_REL_BASED_DIR64 fixup reloc here. The relocation place is
the instruction, not the GOT entry, and I would expect the GOT entry
to be covered by a R_X86_64_64 relocation already
[Steven]: Yes, it is necessary. Even the R_X86_64_64 relocation need to emit the EFI_IMAGE_REL_BASED_DIR64 fixup reloc like below in the original GenFw code. You can test it like this, remove the below CoffAddFixup() code and build the OVMF with GCC5, you will see the Ovmf boot failure with cpu exception.

case R_X86_64_64:
VerboseMsg ("EFI_IMAGE_REL_BASED_DIR64 Offset: 0x%08X",
mCoffSectionsOffset[RelShdr->sh_info] + (Rel->r_offset - SecShdr->sh_addr));
CoffAddFixup(
(UINT32) ((UINT64) mCoffSectionsOffset[RelShdr->sh_info]
+ (Rel->r_offset - SecShdr->sh_addr)),
EFI_IMAGE_REL_BASED_DIR64);
That was not my point. With your code, how many
EFI_IMAGE_REL_BASED_DIR64 fixups are added to the .reloc section for
the GOT entry of 'n'?

int n;
int f () { return n; }
int g () { return n; }
int h () { return n; }
I am also concerned about the GOTPCRELX/REX_GOTPCRELX relocations.
Reading the x86_64 ABI docs, it appears that these may refer to
instructions that have been modified by the linker. In that case, how
do we deal with the relocation? Also, according to the doc, mov
instructions may be emitted by the linker in some cases that are only
valid in the lowest 2 GB of the address space.

All in all, I think supporting GOT based relocations is a can of
worms, and I would prefer to get rid of them completely if we can
(i.e., using hidden visibility even for LTO, I have a fix for that I
will sent out separately)

Thanks,
Ard.


Shi, Steven <steven.shi@...>
 


That was not my point. With your code, how many
EFI_IMAGE_REL_BASED_DIR64 fixups are added to the .reloc section for
the GOT entry of 'n'?

int n;
int f () { return n; }
int g () { return n; }
int h () { return n; }
[Steven]: If the above global " n " need GOTPCREL type relocation. It should need only once EFI_IMAGE_REL_BASED_DIR64 fixups in my code.


I am also concerned about the GOTPCRELX/REX_GOTPCRELX relocations.
Reading the x86_64 ABI docs, it appears that these may refer to
instructions that have been modified by the linker. In that case, how
do we deal with the relocation? Also, according to the doc, mov
instructions may be emitted by the linker in some cases that are only
valid in the lowest 2 GB of the address space.
[Steven]: Frankly to say, the x86_64 ABI docs is only good for compiler domain developer and not very good for other domain developers to understand it.
My overall understanding for these different relocation type is like this: compiler generate PIC code with different "level of indirection to all global data and function references in the code." And these different level of indirection is implemented through GOT and PLT structure with different addressing calculation pattern. The different calculation patterns are the different relocation types which are defined by x86_64 ABI Table 4.9. We don't need worry about how compiler correctly generate code to work with these relocation types, we just need correctly understand their addressing calculation pattern.

The GOTPCRELX/REX_GOTPCRELX has the same calculation definition in x86_64 ABI Table 4.9 as "G + GOT + A - P". So, I assume their difference is not in the relocation calculation pattern, but how to co-work with specific instructions to finish these calculation in a hardware optimized way.

One important thing worthy to mention that our gcc link script (GccBase.lds) has forced the GOT related sections , e.g. '.got', '.got.plt' , into .text section. So, all the GOT relocation fixup target .text section in fact.

I find below article help me a lot to understand some PIC simple relocation types. Hope it also can help you. I wish Eli, the author of below articles, could be invited as one author of x86_64 ABI spec, which will surely make the spec be more readable to other domain developers. :)
Position Independent Code (PIC) in shared libraries
http://eli.thegreenplace.net/2011/11/03/position-independent-code-pic-in-shared-libraries/
Position Independent Code (PIC) in shared libraries on x64
http://eli.thegreenplace.net/2011/11/11/position-independent-code-pic-in-shared-libraries-on-x64


All in all, I think supporting GOT based relocations is a can of
worms, and I would prefer to get rid of them completely if we can
(i.e., using hidden visibility even for LTO, I have a fix for that I
will sent out separately)
[Steven]: I agree we should try to avoid generating complex relocation type code for Uefi. But to make Uefi code build more portable to various version compilers and linker, I think it is still necessary to support these GOT based relocations.
I've tested the GenFw new relocation types support with both GCC5/GCC49 (with default visibility) and CLANG38 in my branch in before. It can works. I suggest we could just keep these code there for reference.


Steven Shi
Intel\SSG\STO\UEFI Firmware

Tel: +86 021-61166522
iNet: 821-6522


Ard Biesheuvel
 

On 1 August 2016 at 06:39, Shi, Steven <steven.shi@intel.com> wrote:

That was not my point. With your code, how many
EFI_IMAGE_REL_BASED_DIR64 fixups are added to the .reloc section for
the GOT entry of 'n'?

int n;
int f () { return n; }
int g () { return n; }
int h () { return n; }
[Steven]: If the above global " n " need GOTPCREL type relocation. It should need only once EFI_IMAGE_REL_BASED_DIR64 fixups in my code.
Yes, I believe so. I think it would be possible to sort the .reloc
section and eliminate duplicates, but doing this correctly is
non-trivial in any case.


I am also concerned about the GOTPCRELX/REX_GOTPCRELX relocations.
Reading the x86_64 ABI docs, it appears that these may refer to
instructions that have been modified by the linker. In that case, how
do we deal with the relocation? Also, according to the doc, mov
instructions may be emitted by the linker in some cases that are only
valid in the lowest 2 GB of the address space.
[Steven]: Frankly to say, the x86_64 ABI docs is only good for compiler domain developer and not very good for other domain developers to understand it.
My overall understanding for these different relocation type is like this: compiler generate PIC code with different "level of indirection to all global data and function references in the code." And these different level of indirection is implemented through GOT and PLT structure with different addressing calculation pattern. The different calculation patterns are the different relocation types which are defined by x86_64 ABI Table 4.9. We don't need worry about how compiler correctly generate code to work with these relocation types, we just need correctly understand their addressing calculation pattern.

The GOTPCRELX/REX_GOTPCRELX has the same calculation definition in x86_64 ABI Table 4.9 as "G + GOT + A - P". So, I assume their difference is not in the relocation calculation pattern, but how to co-work with specific instructions to finish these calculation in a hardware optimized way.
No, that is not what these are for. The special types mark
instructions that can be converted by the linker into simpler
sequences if the symbol turns out to be in the same module. From the
doc:

mov foo@GOTPCREL(%rip), %reg

could be converted by the linker into

lea foo(%rip), %reg

if the reference to 'foo' is satisfied by a non-preemptible local
definition. This is a useful optimization, since it eliminates a
memory load. The problem is that we cannot recalculate such
relocations in GenFw without checking whether the linker has applied
this optimization or not.

One important thing worthy to mention that our gcc link script (GccBase.lds) has forced the GOT related sections , e.g. '.got', '.got.plt' , into .text section. So, all the GOT relocation fixup target .text section in fact.

I find below article help me a lot to understand some PIC simple relocation types. Hope it also can help you. I wish Eli, the author of below articles, could be invited as one author of x86_64 ABI spec, which will surely make the spec be more readable to other domain developers. :)
Position Independent Code (PIC) in shared libraries
http://eli.thegreenplace.net/2011/11/03/position-independent-code-pic-in-shared-libraries/
Position Independent Code (PIC) in shared libraries on x64
http://eli.thegreenplace.net/2011/11/11/position-independent-code-pic-in-shared-libraries-on-x64


All in all, I think supporting GOT based relocations is a can of
worms, and I would prefer to get rid of them completely if we can
(i.e., using hidden visibility even for LTO, I have a fix for that I
will sent out separately)
[Steven]: I agree we should try to avoid generating complex relocation type code for Uefi. But to make Uefi code build more portable to various version compilers and linker, I think it is still necessary to support these GOT based relocations.
I've tested the GenFw new relocation types support with both GCC5/GCC49 (with default visibility) and CLANG38 in my branch in before. It can works. I suggest we could just keep these code there for reference.
The fact that it works does not make it safe. Having multiple fixups
for the same symbol in the .reloc section is a problem, and so is
reapplying GOTPCRELX to places where the original instruction has been
replaced by the linker.

--
Ard.


Shi, Steven <steven.shi@...>
 


I am also concerned about the GOTPCRELX/REX_GOTPCRELX relocations.
Reading the x86_64 ABI docs, it appears that these may refer to
instructions that have been modified by the linker. In that case, how
do we deal with the relocation? Also, according to the doc, mov
instructions may be emitted by the linker in some cases that are only
valid in the lowest 2 GB of the address space.
[Steven]: Frankly to say, the x86_64 ABI docs is only good for compiler
domain developer and not very good for other domain developers to
understand it.
My overall understanding for these different relocation type is like this:
compiler generate PIC code with different "level of indirection to all global
data and function references in the code." And these different level of
indirection is implemented through GOT and PLT structure with different
addressing calculation pattern. The different calculation patterns are the
different relocation types which are defined by x86_64 ABI Table 4.9. We
don't need worry about how compiler correctly generate code to work with
these relocation types, we just need correctly understand their addressing
calculation pattern.

The GOTPCRELX/REX_GOTPCRELX has the same calculation definition in
x86_64 ABI Table 4.9 as "G + GOT + A - P". So, I assume their difference is not
in the relocation calculation pattern, but how to co-work with specific
instructions to finish these calculation in a hardware optimized way.
No, that is not what these are for. The special types mark
instructions that can be converted by the linker into simpler
sequences if the symbol turns out to be in the same module. From the
doc:

mov foo@GOTPCREL(%rip), %reg

could be converted by the linker into

lea foo(%rip), %reg

if the reference to 'foo' is satisfied by a non-preemptible local
definition. This is a useful optimization, since it eliminates a
memory load. The problem is that we cannot recalculate such
relocations in GenFw without checking whether the linker has applied
this optimization or not.
[Steven]: Do you mean the linker will apply above optimization but not remove the original GOTPCREL item? It sounds like a severe linker bug.


The fact that it works does not make it safe. Having multiple fixups
for the same symbol in the .reloc section is a problem, and so is
reapplying GOTPCRELX to places where the original instruction has been
replaced by the linker.
[Steven]: I still don't understand why there will be multiple fixups for the same symbol in the .reloc section?


Ard Biesheuvel
 

On 1 August 2016 at 08:13, Shi, Steven <steven.shi@intel.com> wrote:

I am also concerned about the GOTPCRELX/REX_GOTPCRELX relocations.
Reading the x86_64 ABI docs, it appears that these may refer to
instructions that have been modified by the linker. In that case, how
do we deal with the relocation? Also, according to the doc, mov
instructions may be emitted by the linker in some cases that are only
valid in the lowest 2 GB of the address space.
[Steven]: Frankly to say, the x86_64 ABI docs is only good for compiler
domain developer and not very good for other domain developers to
understand it.
My overall understanding for these different relocation type is like this:
compiler generate PIC code with different "level of indirection to all global
data and function references in the code." And these different level of
indirection is implemented through GOT and PLT structure with different
addressing calculation pattern. The different calculation patterns are the
different relocation types which are defined by x86_64 ABI Table 4.9. We
don't need worry about how compiler correctly generate code to work with
these relocation types, we just need correctly understand their addressing
calculation pattern.

The GOTPCRELX/REX_GOTPCRELX has the same calculation definition in
x86_64 ABI Table 4.9 as "G + GOT + A - P". So, I assume their difference is not
in the relocation calculation pattern, but how to co-work with specific
instructions to finish these calculation in a hardware optimized way.
No, that is not what these are for. The special types mark
instructions that can be converted by the linker into simpler
sequences if the symbol turns out to be in the same module. From the
doc:

mov foo@GOTPCREL(%rip), %reg

could be converted by the linker into

lea foo(%rip), %reg

if the reference to 'foo' is satisfied by a non-preemptible local
definition. This is a useful optimization, since it eliminates a
memory load. The problem is that we cannot recalculate such
relocations in GenFw without checking whether the linker has applied
this optimization or not.
[Steven]: Do you mean the linker will apply above optimization but not remove the original GOTPCREL item? It sounds like a severe linker bug.
I checked quickly, and it appears the linker does the right thing
here, i.e., it performs the optimization and also modifies the
relocation emitted into the .rela.text section

So this:

.globl bar
.type bar, @function
bar:
mov foo@GOTPCREL(%rip), %eax
ret

.globl foo
foo:
.quad 0

compiles into

/tmp/pie.o: file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <bar>:
0: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 6 <bar+0x6>
2: R_X86_64_GOTPCRELX foo-0x4
6: c3 retq

0000000000000007 <foo>:
...

but after linking (ld -o /tmp/pie -e bar -q /tmp/pie.o)

/tmp/pie: file format elf64-x86-64


Disassembly of section .text:

00000000004000b0 <bar>:
4000b0: 8d 05 01 00 00 00 lea 0x1(%rip),%eax # 4000b7 <foo>
4000b2: R_X86_64_PC32 foo-0x4
4000b6: c3 retq

00000000004000b7 <foo>:
...



The fact that it works does not make it safe. Having multiple fixups
for the same symbol in the .reloc section is a problem, and so is
reapplying GOTPCRELX to places where the original instruction has been
replaced by the linker.
[Steven]: I still don't understand why there will be multiple fixups for the same symbol in the .reloc section?
Remember this example

int n;
int f () { return n; }
int g () { return n; }
int h () { return n; }
If every 'return n' results in a GOTPCREL relocation, how are you
going to make sure that the GOT entry for 'n' is only fixed up a
single time?


Shi, Steven <steven.shi@...>
 


The fact that it works does not make it safe. Having multiple fixups
for the same symbol in the .reloc section is a problem, and so is
reapplying GOTPCRELX to places where the original instruction has been
replaced by the linker.
[Steven]: I still don't understand why there will be multiple fixups for the
same symbol in the .reloc section?
Remember this example

int n;
int f () { return n; }
int g () { return n; }
int h () { return n; }
If every 'return n' results in a GOTPCREL relocation, how are you
going to make sure that the GOT entry for 'n' is only fixed up a
single time?
[Steven]: the 'return n' will not result in relocation, but the 'int n' will result in the relocation in GOT. The three 'return n' will point to the same 'int n' relocation item. So, we need only fixup 'int n' once, all three 'return n' will use the correct global 'n' value. BTW, the 'int n' relocation type in your code on X64 should be R_X86_64_GLOB_DAT

You can see the 'int myglob' in Eli's example in http://eli.thegreenplace.net/2011/11/03/position-independent-code-pic-in-shared-libraries/. The 'int myglob' is same as your 'int n' example.

int myglob = 42;
int ml_func(int a, int b)
{
return myglob + a + b;
}


Ard Biesheuvel
 

On 1 August 2016 at 09:19, Shi, Steven <steven.shi@intel.com> wrote:

The fact that it works does not make it safe. Having multiple fixups
for the same symbol in the .reloc section is a problem, and so is
reapplying GOTPCRELX to places where the original instruction has been
replaced by the linker.
[Steven]: I still don't understand why there will be multiple fixups for the
same symbol in the .reloc section?
Remember this example

int n;
int f () { return n; }
int g () { return n; }
int h () { return n; }
If every 'return n' results in a GOTPCREL relocation, how are you
going to make sure that the GOT entry for 'n' is only fixed up a
single time?
[Steven]: the 'return n' will not result in relocation, but the 'int n' will result in the relocation in GOT. The three 'return n' will point to the same 'int n' relocation item. So, we need only fixup 'int n' once, all three 'return n' will use the correct global 'n' value.
Every 'return n' will result in a GOTPCREL relocation against n. And
your code emits a relocation for the GOT entry every time.

BTW, the 'int n' relocation type in your code on X64 should be
R_X86_64_GLOB_DAT
R_X86_64_GLOB_DAT is a dynamic relocation type. These are only emitted
when linking a shared object or a PIE executable, which I would like
to avoid as well.

The problem with PIE executables is that the .rela.xxx sections
emitted for each section, and the single .rela section containing the
dynamic relocations overlap with each other, i.e., places containing
absolute symbol addresses in the binary will appear in both, and
emitting reloc fixups for all relocations will result in duplicates.

You can see the 'int myglob' in Eli's example in http://eli.thegreenplace.net/2011/11/03/position-independent-code-pic-in-shared-libraries/. The 'int myglob' is same as your 'int n' example.

int myglob = 42;
int ml_func(int a, int b)
{
return myglob + a + b;
}
Yes, and every reference to 'myglob' will result in a GOTPCREL
relocation. We must not emit a fixup for myglob every time.


Shi, Steven <steven.shi@...>
 

On 1 August 2016 at 09:19, Shi, Steven <steven.shi@intel.com<mailto:steven.shi@intel.com>> wrote:
The fact that it works does not make it safe. Having multiple fixups
for the same symbol in the .reloc section is a problem, and so is
reapplying GOTPCRELX to places where the original instruction has
been
replaced by the linker.
[Steven]: I still don't understand why there will be multiple fixups for the
same symbol in the .reloc section?
Remember this example
int n;
int f () { return n; }
int g () { return n; }
int h () { return n; }
If every 'return n' results in a GOTPCREL relocation, how are you
going to make sure that the GOT entry for 'n' is only fixed up a
single time?
[Steven]: the 'return n' will not result in relocation, but the 'int n' will result
in the relocation in GOT. The three 'return n' will point to the same 'int n'
relocation item. So, we need only fixup 'int n' once, all three 'return n' will
use the correct global 'n' value.
Every 'return n' will result in a GOTPCREL relocation against n. And
your code emits a relocation for the GOT entry every time.
[Steven]: I don't think so. please give a real case and offer its source code to prove " Every 'return n' will result in a GOTPCREL relocation against n ".



BTW, the 'int n' relocation type in your code on X64 should be
R_X86_64_GLOB_DAT
R_X86_64_GLOB_DAT is a dynamic relocation type. These are only emitted
when linking a shared object or a PIE executable, which I would like
to avoid as well.
The problem with PIE executables is that the .rela.xxx sections
emitted for each section, and the single .rela section containing the
dynamic relocations overlap with each other, i.e., places containing
absolute symbol addresses in the binary will appear in both, and
emitting reloc fixups for all relocations will result in duplicates.
[Steven]: the current GenFw will ignore the single .rela section, because the .rela section info is 0, which target invalid section at all. See below sections example. Current GenFw will only handle the .rela.text and .rela.data relocation sections.



Section Headers:

[Nr] Name Type Address Offset

Size EntSize Flags Link Info Align

[ 0] NULL 0000000000000000 00000000

0000000000000000 0000000000000000 0 0 0

[ 1] .note.gnu.build-i NOTE 0000000000000000 00000100

0000000000000024 0000000000000000 A 0 0 4

[ 2] .text PROGBITS 0000000000000240 00000340

00000000000059f8 0000000000000008 AX 0 0 64

[ 3] .rela.text RELA 0000000000000000 00008728

00000000000044d0 0000000000000018 I 10 2 8

[ 4] .data PROGBITS 0000000000005c40 00005d40

0000000000000a68 0000000000000000 WA 0 0 64

[ 5] .rela.data RELA 0000000000000000 0000cbf8

00000000000004b0 0000000000000018 I 10 4 8

[ 6] .rela RELA 00000000000066c0 000067c0

00000000000004b0 0000000000000018 A 0 0 8



You can see the 'int myglob' in Eli's example in
http://eli.thegreenplace.net/2011/11/03/position-independent-code-pic-in-<http://eli.thegreenplace.net/2011/11/03/position-independent-code-pic-in-shared-libraries/>
shared-libraries/<http://eli.thegreenplace.net/2011/11/03/position-independent-code-pic-in-shared-libraries/>. The 'int myglob' is same as your 'int n' example.
int myglob = 42;
int ml_func(int a, int b)
{
return myglob + a + b;
}
Yes, and every reference to 'myglob' will result in a GOTPCREL
relocation. We must not emit a fixup for myglob every time.
[Steven]: Please give a real case and offer its source code to prove "every reference to 'myglob' will result in a GOTPCREL relocation".


Ard Biesheuvel
 

On 1 August 2016 at 09:54, Shi, Steven <steven.shi@intel.com> wrote:
On 1 August 2016 at 09:19, Shi, Steven <steven.shi@intel.com<mailto:steven.shi@intel.com>> wrote:

The fact that it works does not make it safe. Having multiple fixups
for the same symbol in the .reloc section is a problem, and so is
reapplying GOTPCRELX to places where the original instruction has
been
replaced by the linker.
[Steven]: I still don't understand why there will be multiple fixups for the
same symbol in the .reloc section?
Remember this example

int n;
int f () { return n; }
int g () { return n; }
int h () { return n; }
If every 'return n' results in a GOTPCREL relocation, how are you
going to make sure that the GOT entry for 'n' is only fixed up a
single time?
[Steven]: the 'return n' will not result in relocation, but the 'int n' will result
in the relocation in GOT. The three 'return n' will point to the same 'int n'
relocation item. So, we need only fixup 'int n' once, all three 'return n' will
use the correct global 'n' value.

Every 'return n' will result in a GOTPCREL relocation against n. And
your code emits a relocation for the GOT entry every time.
[Steven]: I don't think so. please give a real case and offer its source code to prove " Every 'return n' will result in a GOTPCREL relocation against n ".
Compiling the code above using

gcc -c -O -fpic /tmp/pie.c -o pie.o

and dumping it using

readelf -r pie.o

gives me

Relocation section '.rela.text' at offset 0x250 contains 3 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000000003 000a0000002a R_X86_64_REX_GOTP 0000000000000004 n - 4
00000000000d 000a0000002a R_X86_64_REX_GOTP 0000000000000004 n - 4
000000000017 000a0000002a R_X86_64_REX_GOTP 0000000000000004 n - 4
...

--
Ard.


Shi, Steven <steven.shi@...>
 

On 1 August 2016 at 09:54, Shi, Steven <steven.shi@intel.com> wrote:
On 1 August 2016 at 09:19, Shi, Steven
<steven.shi@intel.com<mailto:steven.shi@intel.com>> wrote:

The fact that it works does not make it safe. Having multiple fixups
for the same symbol in the .reloc section is a problem, and so is
reapplying GOTPCRELX to places where the original instruction has
been
replaced by the linker.
[Steven]: I still don't understand why there will be multiple fixups for
the
same symbol in the .reloc section?
Remember this example

int n;
int f () { return n; }
int g () { return n; }
int h () { return n; }
If every 'return n' results in a GOTPCREL relocation, how are you
going to make sure that the GOT entry for 'n' is only fixed up a
single time?
[Steven]: the 'return n' will not result in relocation, but the 'int n' will
result
in the relocation in GOT. The three 'return n' will point to the same 'int n'
relocation item. So, we need only fixup 'int n' once, all three 'return n' will
use the correct global 'n' value.

Every 'return n' will result in a GOTPCREL relocation against n. And
your code emits a relocation for the GOT entry every time.
[Steven]: I don't think so. please give a real case and offer its source code
to prove " Every 'return n' will result in a GOTPCREL relocation against n ".
Compiling the code above using

gcc -c -O -fpic /tmp/pie.c -o pie.o

and dumping it using

readelf -r pie.o

gives me

Relocation section '.rela.text' at offset 0x250 contains 3 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000000003 000a0000002a R_X86_64_REX_GOTP 0000000000000004 n -
4
00000000000d 000a0000002a R_X86_64_REX_GOTP 0000000000000004 n -
4
000000000017 000a0000002a R_X86_64_REX_GOTP 0000000000000004 n -
4
...
[Steven]: In this example, the pie.o is just the object file which is not linked. And if you link this pie.o file, the linker will solve all these three R_X86_64_REX_GOTP symbol with same 'int n' symbol address, and will create only one relocation item for 'int n' in the linked executable relocation section. So, we only need fixup once for the 'int n' in the linked executable.

Thanks
Steven


Ard Biesheuvel
 

(adding back our friends on cc)

On 1 August 2016 at 12:36, Shi, Steven <steven.shi@intel.com> wrote:
On 1 August 2016 at 12:16, Shi, Steven <steven.shi@intel.com> wrote:
OK, another example:

pie.s:

.globl foo
foo:
pushq n@GOTPCREL(%rip)
popq %rax
ret

.globl bar
bar:
pushq n@GOTPCREL(%rip)
popq %rax
ret

.globl n
n:
.quad 0

compile and link using

gcc -c -o pie.o /tmp/pie.s
ld -q -o pie pie.o -e foo

gives me

Relocation section '.rela.text' at offset 0x260 contains 2 entries:
Offset Info Type Sym. Value Sym. Name + Addend
0000004000b2 000700000009 R_X86_64_GOTPCREL 00000000004000be
n -
4
0000004000b9 000700000009 R_X86_64_GOTPCREL 00000000004000be
n -
4

Here, pie is a fully linked binary.
[Steven]: In this example, there are two R_X86_64_GOTPCREL relocation
address in the .text section need to fix up with same symbol runtime address,
these two relocation addresses are not same, and every relocation address
will be fixed up once. I don't see the problem of "Having multiple fixups for
the same symbol in the .reloc section", and current GenFw code should has
no problem on this example.
How many times will your code call CoffAddFixup() for the address of n?
[Steven]: My understanding is the n address (6000c8) is not a GOTPCREL relocation in .text section, but the 4000b2 and 4000b2 are GOTPCREL relocation in .text section. My CoffAddFixup() will only call twice for 4000b2 and 4000b2, but not for n address (6000c8).

Disassembly of section .text:

00000000004000b0 <foo>:
4000b0: ff 35 12 00 20 00 pushq 0x200012(%rip) # 6000c8 <n+0x200008>
4000b6: 58 pop %rax
4000b7: c3 retq

00000000004000b8 <bar>:
4000b8: ff 35 0a 00 20 00 pushq 0x20000a(%rip) # 6000c8 <n+0x200008>
4000be: 58 pop %rax
4000bf: c3 retq

00000000004000c0 <n>:
...
CoffAddFixup() must be used for absolute symbol references only. These
instructions contain relative symbol references, which are
recalculated in WriteSections64().

The only absolute symbol reference is the GOT entry for 'n', and your
code (in WriteRelocations64()) calculates the address of the GOT entry
(which is always in .text BTW) and adds a fixup for it, i.e.,

+ CoffAddFixup(
+ (UINT32)(UINTN)((UINT64)
mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset),
+ EFI_IMAGE_REL_BASED_DIR64);

This code adds a fixup to the PE/COFF .reloc section for the GOT entry
containing the address of 'n', and the instructions perform a IP
relative load of the contents of the GOT entry to retrieve the address
of 'n'.

By adding two fixups, the PE/COFF loader will apply the load offset
twice, resulting in an incorrect value.

--
Ard.


Shi, Steven <steven.shi@...>
 


CoffAddFixup() must be used for absolute symbol references only. These
instructions contain relative symbol references, which are
recalculated in WriteSections64().

The only absolute symbol reference is the GOT entry for 'n', and your
code (in WriteRelocations64()) calculates the address of the GOT entry
(which is always in .text BTW) and adds a fixup for it, i.e.,

+ CoffAddFixup(
+ (UINT32)(UINTN)((UINT64)
mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset),
+ EFI_IMAGE_REL_BASED_DIR64);

This code adds a fixup to the PE/COFF .reloc section for the GOT entry
containing the address of 'n', and the instructions perform a IP
relative load of the contents of the GOT entry to retrieve the address
of 'n'.

By adding two fixups, the PE/COFF loader will apply the load offset
twice, resulting in an incorrect value.
OK, I get your point now. Yes, the current patch could generate multiple fixups for the same GOT relocation entry. How about we introduce a simple IsDuplicatedCoffFixup() to check whether a converting fixup offset is duplicated before we use CoffAddFixup() to really add it? If it is new, we add it, otherwise just skip it.


Ard Biesheuvel
 

On 2 August 2016 at 13:40, Shi, Steven <steven.shi@intel.com> wrote:

CoffAddFixup() must be used for absolute symbol references only. These
instructions contain relative symbol references, which are
recalculated in WriteSections64().

The only absolute symbol reference is the GOT entry for 'n', and your
code (in WriteRelocations64()) calculates the address of the GOT entry
(which is always in .text BTW) and adds a fixup for it, i.e.,

+ CoffAddFixup(
+ (UINT32)(UINTN)((UINT64)
mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset),
+ EFI_IMAGE_REL_BASED_DIR64);

This code adds a fixup to the PE/COFF .reloc section for the GOT entry
containing the address of 'n', and the instructions perform a IP
relative load of the contents of the GOT entry to retrieve the address
of 'n'.

By adding two fixups, the PE/COFF loader will apply the load offset
twice, resulting in an incorrect value.
OK, I get your point now. Yes, the current patch could generate multiple fixups for the same GOT relocation entry. How about we introduce a simple IsDuplicatedCoffFixup() to check whether a converting fixup offset is duplicated before we use CoffAddFixup() to really add it? If it is new, we add it, otherwise just skip it.
That could work, but you have to be aware that fixups are best emitted
in the order they need to be applied in the binary, or it will become
very inefficient. (Please refer to the PE/COFF spec section that
explains the layout of the .reloc section)

What it comes down to is that relocations are grouped by target page,
and for every place in the page that requires a relocation to be
applied, a 4 bit type is emitted followed by a 12-bit offset, which is
the offset into the current page. If you emit fixups for the current
instruction, followed by one for the GOT, it will basically take two
'page switches' every time.

So it would be better to simply emit the relocations, but introduce a
sorting pass that merges all duplicates as well.

Thanks,
Ard.


Jordan Justen
 

Where does this patch stand? I think Ard was asking for it to be
split, but also wondering if it was really required...

Some devs are still reporting unsupported relocation errors during the
build. (mischief on irc, for example.)

-Jordan

On 2016-08-02 05:00:43, Ard Biesheuvel wrote:
On 2 August 2016 at 13:40, Shi, Steven <steven.shi@intel.com> wrote:

CoffAddFixup() must be used for absolute symbol references only. These
instructions contain relative symbol references, which are
recalculated in WriteSections64().

The only absolute symbol reference is the GOT entry for 'n', and your
code (in WriteRelocations64()) calculates the address of the GOT entry
(which is always in .text BTW) and adds a fixup for it, i.e.,

+ CoffAddFixup(
+ (UINT32)(UINTN)((UINT64)
mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset),
+ EFI_IMAGE_REL_BASED_DIR64);

This code adds a fixup to the PE/COFF .reloc section for the GOT entry
containing the address of 'n', and the instructions perform a IP
relative load of the contents of the GOT entry to retrieve the address
of 'n'.

By adding two fixups, the PE/COFF loader will apply the load offset
twice, resulting in an incorrect value.
OK, I get your point now. Yes, the current patch could generate multiple fixups for the same GOT relocation entry. How about we introduce a simple IsDuplicatedCoffFixup() to check whether a converting fixup offset is duplicated before we use CoffAddFixup() to really add it? If it is new, we add it, otherwise just skip it.
That could work, but you have to be aware that fixups are best emitted
in the order they need to be applied in the binary, or it will become
very inefficient. (Please refer to the PE/COFF spec section that
explains the layout of the .reloc section)

What it comes down to is that relocations are grouped by target page,
and for every place in the page that requires a relocation to be
applied, a 4 bit type is emitted followed by a 12-bit offset, which is
the offset into the current page. If you emit fixups for the current
instruction, followed by one for the GOT, it will basically take two
'page switches' every time.

So it would be better to simply emit the relocations, but introduce a
sorting pass that merges all duplicates as well.

Thanks,
Ard.
_______________________________________________
edk2-devel mailing list
edk2-devel@lists.01.org
https://lists.01.org/mailman/listinfo/edk2-devel


Ard Biesheuvel
 

On 3 aug. 2016, at 22:13, Jordan Justen <jordan.l.justen@intel.com> wrote:

Where does this patch stand? I think Ard was asking for it to be
split, but also wondering if it was really required...

Some devs are still reporting unsupported relocation errors during the
build. (mischief on irc, for example.)
The patch is not correct in its current form. PLT relocations (0x4) can be handled ok, so we can split that off and merge it. The GOT based relocation handling needs non-trivial rework, and may currently create corrupt binaries. Which unhandled reloc types are being reported?



On 2016-08-02 05:00:43, Ard Biesheuvel wrote:
On 2 August 2016 at 13:40, Shi, Steven <steven.shi@intel.com> wrote:

CoffAddFixup() must be used for absolute symbol references only. These
instructions contain relative symbol references, which are
recalculated in WriteSections64().

The only absolute symbol reference is the GOT entry for 'n', and your
code (in WriteRelocations64()) calculates the address of the GOT entry
(which is always in .text BTW) and adds a fixup for it, i.e.,

+ CoffAddFixup(
+ (UINT32)(UINTN)((UINT64)
mCoffSectionsOffset[RelShdr->sh_info] + GoTPcRelPtrOffset),
+ EFI_IMAGE_REL_BASED_DIR64);

This code adds a fixup to the PE/COFF .reloc section for the GOT entry
containing the address of 'n', and the instructions perform a IP
relative load of the contents of the GOT entry to retrieve the address
of 'n'.

By adding two fixups, the PE/COFF loader will apply the load offset
twice, resulting in an incorrect value.
OK, I get your point now. Yes, the current patch could generate multiple fixups for the same GOT relocation entry. How about we introduce a simple IsDuplicatedCoffFixup() to check whether a converting fixup offset is duplicated before we use CoffAddFixup() to really add it? If it is new, we add it, otherwise just skip it.
That could work, but you have to be aware that fixups are best emitted
in the order they need to be applied in the binary, or it will become
very inefficient. (Please refer to the PE/COFF spec section that
explains the layout of the .reloc section)

What it comes down to is that relocations are grouped by target page,
and for every place in the page that requires a relocation to be
applied, a 4 bit type is emitted followed by a 12-bit offset, which is
the offset into the current page. If you emit fixups for the current
instruction, followed by one for the GOT, it will basically take two
'page switches' every time.

So it would be better to simply emit the relocations, but introduce a
sorting pass that merges all duplicates as well.

Thanks,
Ard.
_______________________________________________
edk2-devel mailing list
edk2-devel@lists.01.org
https://lists.01.org/mailman/listinfo/edk2-devel