- 
                Notifications
    You must be signed in to change notification settings 
- Fork 144
Support Arm dynamic linking #244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Conversation
| Currently, only the stage 0 compiler and stage 1 compiler can be generated, and the stage 1 compiler will encounter a Segmentation fault when running. However, the stage 0 compiler can still compile a simple program and run the executable via QEMU: /* test.c */
int main(void)
{
    printf("%x %x %x\n", 1, 2, 3);
    printf("%x %x %x %x\n", 1, 2, 3, 4);
    printf("%x %x %x %x %x\n", 1, 2, 3, 4, 5);
    return 0;
}$ out/shecc --dynlink -o test test.cThen, we can use  $ arm-linux-gnueabi-readelf --relocs test
Relocation section '.rel.plt' at offset 0x260 contains 2 entries:
 Offset     Info    Type            Sym.Value  Sym. Name
000102a8  00000116 R_ARM_JUMP_SLOT   00000000   __libc_start_main
000102ac  00000216 R_ARM_JUMP_SLOT   00000000   printfHowever, I ran the  $ qemu-arm -L /usr/arm-linux-gnueabi/ test
1 2 3
1 2 3 40830000
1 2 3 40830000 10224Notice that the second and third  I think this is a potential issue that shecc pushes wrong values to the stack to make (glibc's)  | 
| FWIW, I disassemble the  test.asm$ arm-linux-gnueabi-objdump -d test
test:     file format elf32-littlearm
Disassembly of section .text:
000100b4 <.text>:
   100b4:	e3a0b000 	mov	fp, #0
   100b8:	e3a0e000 	mov	lr, #0
   100bc:	e49d1004 	pop	{r1}		@ (ldr r1, [sp], #4)
   100c0:	e1a0200d 	mov	r2, sp
   100c4:	e52d2004 	push	{r2}		@ (str r2, [sp, #-4]!)
   100c8:	e52d0004 	push	{r0}		@ (str r0, [sp, #-4]!)
   100cc:	e3a0c000 	mov	ip, #0
   100d0:	e52dc004 	push	{ip}		@ (str ip, [sp, #-4]!)
   100d4:	e30000ec 	movw	r0, #236	@ 0xec
   100d8:	e3400001 	movt	r0, #1
   100dc:	e3a03000 	mov	r3, #0
   100e0:	eb000067 	bl	0x10284
   100e4:	e3a0007f 	mov	r0, #127	@ 0x7f
   100e8:	eb000005 	bl	0x10104
   100ec:	e1a09000 	mov	r9, r0
   100f0:	e1a0a001 	mov	sl, r1
   100f4:	e3008004 	movw	r8, #4
   100f8:	e3408000 	movt	r8, #0
   100fc:	e04dd008 	sub	sp, sp, r8
   10100:	e1a0c00d 	mov	ip, sp
   10104:	eb000005 	bl	0x10120
   10108:	e3008004 	movw	r8, #4
   1010c:	e3408000 	movt	r8, #0
   10110:	e08dd008 	add	sp, sp, r8
   10114:	e1a00000 	nop			@ (mov r0, r0)
   10118:	e3a07001 	mov	r7, #1
   1011c:	ef000000 	svc	0x00000000
   10120:	e1a00009 	mov	r0, r9
   10124:	e1a0100a 	mov	r1, sl
   10128:	eaffffff 	b	0x1012c
   1012c:	e50de004 	str	lr, [sp, #-4]
   10130:	e3008044 	movw	r8, #68	@ 0x44
   10134:	e3408000 	movt	r8, #0
   10138:	e04dd008 	sub	sp, sp, r8
   1013c:	e3000224 	movw	r0, #548	@ 0x224
   10140:	e3400001 	movt	r0, #1
   10144:	e3a01001 	mov	r1, #1
   10148:	e3a02002 	mov	r2, #2
   1014c:	e3a03003 	mov	r3, #3
   10150:	e58d0004 	str	r0, [sp, #4]
   10154:	e58d1008 	str	r1, [sp, #8]
   10158:	e58d200c 	str	r2, [sp, #12]
   1015c:	e58d3010 	str	r3, [sp, #16]
   10160:	e59d0004 	ldr	r0, [sp, #4]
   10164:	e59d1008 	ldr	r1, [sp, #8]
   10168:	e59d200c 	ldr	r2, [sp, #12]
   1016c:	e59d3010 	ldr	r3, [sp, #16]
+  10170:	eb000046 	bl	0x10290
   10174:	e300022e 	movw	r0, #558	@ 0x22e
   10178:	e3400001 	movt	r0, #1
   1017c:	e3a01001 	mov	r1, #1
   10180:	e3a02002 	mov	r2, #2
   10184:	e3a03003 	mov	r3, #3
   10188:	e3a04004 	mov	r4, #4
   1018c:	e58d0014 	str	r0, [sp, #20]
   10190:	e58d1018 	str	r1, [sp, #24]
   10194:	e58d201c 	str	r2, [sp, #28]
   10198:	e58d3020 	str	r3, [sp, #32]
   1019c:	e58d4024 	str	r4, [sp, #36]	@ 0x24
   101a0:	e59d0014 	ldr	r0, [sp, #20]
   101a4:	e59d1018 	ldr	r1, [sp, #24]
   101a8:	e59d201c 	ldr	r2, [sp, #28]
   101ac:	e59d3020 	ldr	r3, [sp, #32]
   101b0:	e59d4024 	ldr	r4, [sp, #36]	@ 0x24
+  101b4:	eb000035 	bl	0x10290
   101b8:	e300023b 	movw	r0, #571	@ 0x23b
   101bc:	e3400001 	movt	r0, #1
   101c0:	e3a01001 	mov	r1, #1
   101c4:	e3a02002 	mov	r2, #2
   101c8:	e3a03003 	mov	r3, #3
   101cc:	e3a04004 	mov	r4, #4
   101d0:	e3a05005 	mov	r5, #5
   101d4:	e58d0028 	str	r0, [sp, #40]	@ 0x28
   101d8:	e58d102c 	str	r1, [sp, #44]	@ 0x2c
   101dc:	e58d2030 	str	r2, [sp, #48]	@ 0x30
   101e0:	e58d3034 	str	r3, [sp, #52]	@ 0x34
   101e4:	e58d4038 	str	r4, [sp, #56]	@ 0x38
   101e8:	e58d503c 	str	r5, [sp, #60]	@ 0x3c
   101ec:	e59d0028 	ldr	r0, [sp, #40]	@ 0x28
   101f0:	e59d102c 	ldr	r1, [sp, #44]	@ 0x2c
   101f4:	e59d2030 	ldr	r2, [sp, #48]	@ 0x30
   101f8:	e59d3034 	ldr	r3, [sp, #52]	@ 0x34
   101fc:	e59d4038 	ldr	r4, [sp, #56]	@ 0x38
   10200:	e59d503c 	ldr	r5, [sp, #60]	@ 0x3c
+  10204:	eb000021 	bl	0x10290
   10208:	e3a00000 	mov	r0, #0
   1020c:	e1a00000 	nop			@ (mov r0, r0)
   10210:	e3008044 	movw	r8, #68	@ 0x44
   10214:	e3408000 	movt	r8, #0
   10218:	e08dd008 	add	sp, sp, r8
   1021c:	e51de004 	ldr	lr, [sp, #-4]
   10220:	e12fff3e 	blx	lr
Disassembly of section .plt:
00010270 <.plt>:
   10270:	e52de004 	push	{lr}		@ (str lr, [sp, #-4]!)
   10274:	e300a2a4 	movw	sl, #676	@ 0x2a4
   10278:	e340a001 	movt	sl, #1
   1027c:	e1a0e00a 	mov	lr, sl
   10280:	e59ef000 	ldr	pc, [lr]
   10284:	e300c2a8 	movw	ip, #680	@ 0x2a8
   10288:	e340c001 	movt	ip, #1
   1028c:	e59cf000 	ldr	pc, [ip]
+  10290:	e300c2ac 	movw	ip, #684	@ 0x2ac
   10294:	e340c001 	movt	ip, #1
   10298:	e59cf000 	ldr	pc, [ip]
 | 
b698f38    to
    13dfa35      
    Compare
  
    | Consider the minimal change below: --- a/src/main.c
+++ b/src/main.c
@@ -85,7 +85,7 @@ int main(int argc, char *argv[])
     global_init();
     /* include libc */
-    if (libc)
+    if (libc && !dynlink)
         libc_generate();
     /* load and parse source code into IR */It disables the built-in libc when  | 
| 
 
 
 This suggests that parameter passing is handled differently. Currently, the virtual registers (0-7) are mapped to ARM physical registers (r0-r7). Looking at the code,  
 In ARM calling convention, arguments beyond r3 should go to the stack, not to r4-r7. This is definitely a bug. When | 
| The original code had a bug where function calls with more than 4 arguments violated the AAPCS: 
 However, shecc was incorrectly placing all arguments (0-7) in registers r0-r7, causing stack-based arguments to be passed incorrectly. Consider the changes below: diff --git a/src/reg-alloc.c b/src/reg-alloc.c
index c66a061..51cd2ea 100644
--- a/src/reg-alloc.c
+++ b/src/reg-alloc.c
@@ -520,12 +520,42 @@ void reg_alloc(void)
                         is_pushing_args = 1;
                     }
 
-                    src0 = prepare_operand(bb, insn->rs1, -1);
-                    ir = bb_add_ph2_ir(bb, OP_assign);
-                    ir->src0 = src0;
-                    ir->dest = args++;
-                    REGS[ir->dest].var = insn->rs1;
-                    REGS[ir->dest].polluted = 0;
+                    /* Check if next call is to external function (for ARM
+                     * calling convention)
+                     */
+                    insn_t *next_insn = insn->next;
+                    func_t *target_func = NULL;
+                    bool is_external_call = false;
+
+                    /* Look ahead for the OP_call to determine if it's external
+                     */
+                    while (next_insn && next_insn->opcode == OP_push)
+                        next_insn = next_insn->next;
+                    if (next_insn && next_insn->opcode == OP_call) {
+                        target_func = find_func(next_insn->str);
+                        is_external_call = target_func && !target_func->bbs;
+                    }
+
+                    /* ARM calling convention for external functions: first 4
+                     * args in r0-r3, rest on stack
+                     */
+                    if (is_external_call && args >= 4) {
+                        /* Arguments 4+: keep on stack, don't load into
+                         * registers. The variable is already on stack from
+                         * earlier spill_alive().
+                         */
+                    } else {
+                        /* Normal behavior for internal functions or first 4
+                         * args
+                         */
+                        src0 = prepare_operand(bb, insn->rs1, -1);
+                        ir = bb_add_ph2_ir(bb, OP_assign);
+                        ir->src0 = src0;
+                        ir->dest = args;
+                        REGS[ir->dest].var = insn->rs1;
+                        REGS[ir->dest].polluted = 0;
+                    }
+                    args++;
                     break;
                 case OP_call:
                     callee_func = find_func(insn->str);
@@ -535,8 +565,8 @@ void reg_alloc(void)
                     ir = bb_add_ph2_ir(bb, OP_call);
                     strcpy(ir->func_name, insn->str);
                     if (dynlink) {
-                        func_t *target_func = find_func(ir->func_name);
-                        target_func->is_used = true;
+                        func_t *target_fn = find_func(ir->func_name);
+                        target_fn->is_used = true;
                     }
 
                     is_pushing_args = 0;Before the fix: All arguments were always loaded into sequential registers (r0, r1, r2, r3, r4, r5, r6, r7). 
 Before Fix (Incorrect) After Fix (Correct)  | 
      
        
              This comment was marked as resolved.
        
        
      
    
  This comment was marked as resolved.
| I would like to ask @lecopzer for reviewing. | 
| I tried to fix the Arm calling convention issue, but I found that the main problem seems not be register allocation. The actual problem is stack manipulation. Consider a code like     1042c:	e3a03005 	mov	r3, #5
+  10430:	e58d3004 	str	r3, [sp, #4]
   10434:	e3a03004 	mov	r3, #4
+  10438:	e58d3000 	str	r3, [sp]
   1043c:	e3a03003 	mov	r3, #3
   10440:	e3a02002 	mov	r2, #2
   10444:	e3a01001 	mov	r1, #1
   10448:	e59f0010 	ldr	r0, [pc, #16]	@ 10460 <main+0x40>
   1044c:	ebffffac 	bl	10304 <printf@plt>We can notice that only  However, if using shecc to compile, it produces as follows:    10148:	e30001b4 	movw	r0, #436	@ 0x1b4
   1014c:	e3400001 	movt	r0, #1
   10150:	e3a01001 	mov	r1, #1
   10154:	e3a02002 	mov	r2, #2
   10158:	e3a03003 	mov	r3, #3
   1015c:	e3a04004 	mov	r4, #4
   10160:	e3a05005 	mov	r5, #5
+  10164:	e58d0004 	str	r0, [sp, #4]
+  10168:	e58d1008 	str	r1, [sp, #8]
+  1016c:	e58d200c 	str	r2, [sp, #12]
+  10170:	e58d3010 	str	r3, [sp, #16]
+  10174:	e58d4014 	str	r4, [sp, #20]
+  10178:	e58d5018 	str	r5, [sp, #24]
   1017c:	e59d0004 	ldr	r0, [sp, #4]
   10180:	e59d1008 	ldr	r1, [sp, #8]
   10184:	e59d200c 	ldr	r2, [sp, #12]
   10188:	e59d3010 	ldr	r3, [sp, #16]
   1018c:	e59d4014 	ldr	r4, [sp, #20]
   10190:	e59d5018 	ldr	r5, [sp, #24]
   10194:	eb00001b 	bl	0x10208The machine code uses r0-r5 to store the arguments, pushes all of them onto stack and load the values back from stack. This causes glibc's  I think shecc generates machine code that pushes all arguments onto stack because  Lines 581 to 584 in 6a97bd7 
 I'm not sure why shecc behaves as described above, but I will try to review and fix it for AAPCS compliance. | 
      
        
              This comment was marked as resolved.
        
        
      
    
  This comment was marked as resolved.
13dfa35    to
    1aceea3      
    Compare
  
    | I have rebased onto the  
 I have also temporarily created a new commit to apply part of the changes, and I will review everything to resolve any potential issues. | 
f81adb8    to
    a96619d      
    Compare
  
    2a23bfc    to
    e76c3af      
    Compare
  
    Enable the GitHub Actions to validate the compiler under the dynamic linking mode. However, if the target architecture is RISC-V, the workflow will skip validation because because this mode has not been implemented for it.
Originally, the default path of ld-linux.so is set to a specific path for Arm architecture. However, Considering that developers may manually install the ARM GNU toolchain, the sysroot path may not match the default path. This commit improves arm.mk to use a more convenient method to automatically detect the correct sysroot path.
This commit refines the program headers to generate a better ELF
executable file, with the following changes:
- Always create two load segments, regardless of static or dynamic
  linking.
  - The first load segment is readable and executable, and includes
    the read-only sections such as .text, .rodata, etc.
  - The second load segment is readable and writable, and contains the
    remaining sections, including .data and .bss.
  - In dynamic linking mode, the .rel.plt and .plt sections reside in
    the first segment, while the other dynamic sections are placed in
    the second segment.
- Set the alignment value of both load segments to 0x1000 (the default
  page size) to meet the alignment requirement when the ELF interpreter
  loads the executable.
- Adjust the offset and virtual address of the first load segment to
  ensure that dynamically linked executable can be correctly loaded by
  the kernel program loader.
  - Now, the ELF header and program headers are also loaded into the
    first segment but they are never used.
- Update other segments as necessary to reflect the above changes.
    Since the .rodata section currently resides in the read-only segment of the compiled program, this commit modifies a test case that originally attempted to write to .rodata to avoid such an operation.
Since the dynamic linking was introduced in the previous commit, this commit updates the README to describe how to use dynamic linking mode and provides basic usage examples for illustration.
In dynamic linking mode, consider external functions may use data types such as 'long long int' or other equivalents. The program may encounter a Bus error if external functions access 8-byte data on the stack that is not properly aligned. To prevent the aforementioned issue, these changes adjust the Arm code generator to ensure that the program's stack is always aligned with 8 bytes. Since the RISC-V architecture does not yet support dynamic linking, the corresponding code generator is not modified in this commit.
Since lib/c.h was introduced in the previous commit, both c.c and c.h contained duplicated definitions. This commit adjusts the files to remove the duplicated definitions by making c.c include c.h and reuse the necessary definitions, such as macros and forward declarations. The inliner tool is also enhanced to generate two functions, 'libc_decl()' and 'libc_impl()'. The compiler will use these functions to automatically generate the source of c.h and c.c for compiled programs when needed.
Because the current compiler supports static linking and dynamic linking modes, the snapshots differ between these modes. This commit updates the related shell scripts and the build system to adjust the snapshot generation process according to the target architecture and the linking mode.
Since the compiler supports both static linking and dynamic linking, this commit adds two new documents to explain the following: - Describe how to build static/dynamic linking version of shecc. - Stack frame layout in static/dynamic linking modes. - Function arguments handling and calling convention. - Runtime execution flow. - Explain the dynamic sections for dynamic linking mode.
5e6ef42    to
    cd9ceba      
    Compare
  
    | Since  Additionally, the required documentation has been added. | 
| if (dynlink) { | ||
| phnum += 2; | ||
| shnum += 7; | ||
| shstrndx += 7; | ||
| shoff += | ||
| dynamic_sections.elf_interp->size + | ||
| dynamic_sections.elf_relplt->size + dynamic_sections.elf_plt->size + | ||
| dynamic_sections.elf_got->size + dynamic_sections.elf_dynstr->size + | ||
| dynamic_sections.elf_dynsym->size + | ||
| dynamic_sections.elf_dynamic->size; | ||
| } | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Problems:
- Magic numbers 2and7lack comments or constant definitions
- Calculation logic scattered across multiple locations, prone to inconsistency
| if (func->bbs) | ||
| elf_offset += 4; | ||
| else if (dynlink) | ||
| elf_offset += 12; | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mention accurate PLT call offset calculation (12 bytes = 3 instructions) in comments.
| if (is_external_call) | ||
| emit(__sw(__AL, __r12, __sp, 16)); | ||
| emit(__bl(__AL, ofs)); | ||
| if (is_external_call) | ||
| emit(__lw(__AL, __r12, __sp, 16)); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why save r12?
- r12 (IP) is a scratch register in AAPCS, external functions can freely modify it
- shecc uses r12 to store the global stack pointer
- Therefore, every external call must save/restore it
Consolidate comments.
| The  
 However from the implementation, internal functions still use r0-r7 for parameters, only external functions follow AAPCS. This is a compiler optimization, but has risks. Consider the code below: // Internal function
void internal_func(int a, int b, int c, int d, int e) {
    // shecc will read e from r4
}
// If this function is later changed to be exported via dynamic linking...
// External callers will put e on stack, internal code reads from r4 → Error!Actions: 
 | 
|  | ||
| (Currently not supported) | ||
|  | ||
| ## Runtime execution flow | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add interaction diagram with glibc dynamic linker.
Supplement explanation of lazy binding vs. immediate binding.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you have any recommended tools for creating the required diagram?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you have any recommended tools for creating the required diagram?
See:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
4 issues found across 34 files
Prompt for AI agents (all 4 issues)
Understand the root cause of the following 4 issues and fix them.
<file name="docs/dynamic-linking.md">
<violation number="1" location="docs/dynamic-linking.md:106">
The PLT example uses `#:upeer16`, but the ARM assembler directive is `#:upper16`; the current typo makes the documented instruction invalid.</violation>
</file>
<file name="src/arm.c">
<violation number="1" location="src/arm.c:367">
The POP helper fails to increment SP because the encoded instruction clears the write-back bit; set W=1 so popping updates the stack pointer.</violation>
</file>
<file name="Makefile">
<violation number="1" location="Makefile:117">
make check-snapshot now fails unless DYNLINK is set because $(DYNLINK) expands to empty and the script expects a second argument. Please default this call to static mode when DYNLINK is unset.</violation>
<violation number="2" location="Makefile:128">
make update-snapshot now aborts unless DYNLINK is defined because the helper script insists on two arguments. Please provide a default value (e.g., static mode) when DYNLINK is unset.</violation>
</file>
React with 👍 or 👎 to teach cubic. Mention @cubic-dev-ai to give feedback, ask questions, or re-run the review.
| ``` | ||
| push {lr} @ (str lr, [sp, #-4]!) | ||
| movw sl, #:lower16:(&GOT[2]) | ||
| movt sl, #:upeer16:(&GOT[2]) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The PLT example uses #:upeer16, but the ARM assembler directive is #:upper16; the current typo makes the documented instruction invalid.
Prompt for AI agents
Address the following comment on docs/dynamic-linking.md at line 106:
<comment>The PLT example uses `#:upeer16`, but the ARM assembler directive is `#:upper16`; the current typo makes the documented instruction invalid.</comment>
<file context>
@@ -0,0 +1,128 @@
+```
+push	{lr}		@ (str lr, [sp, #-4]!)
+movw	sl, #:lower16:(&GOT[2])
+movt	sl, #:upeer16:(&GOT[2])
+mov	lr, sl
+ldr	pc, [lr]
</file context>
| movt sl, #:upeer16:(&GOT[2]) | |
| movt sl, #:upper16:(&GOT[2]) | 
|  | ||
| int __pop_word(arm_cond_t cond, arm_reg rt) | ||
| { | ||
| return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The POP helper fails to increment SP because the encoded instruction clears the write-back bit; set W=1 so popping updates the stack pointer.
Prompt for AI agents
Address the following comment on src/arm.c at line 367:
<comment>The POP helper fails to increment SP because the encoded instruction clears the write-back bit; set W=1 so popping updates the stack pointer.</comment>
<file context>
@@ -357,6 +357,16 @@ int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list)
+
+int __pop_word(arm_cond_t cond, arm_reg rt)
+{
+    return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4);
+}
+
</file context>
| return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4); | |
| return arm_encode(cond, (0x4 << 4) | 0xB, 0xd, rt, 0x4); | 
| $(VECHO) "Updating snapshot for %s\n" $(ARCH) | ||
| tests/update-snapshots.sh $(ARCH) | ||
| $(VECHO) "Updating snapshot for %s (DYNLINK=%s)\n" $(ARCH) $(DYNLINK) | ||
| tests/update-snapshots.sh $(ARCH) $(DYNLINK) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make update-snapshot now aborts unless DYNLINK is defined because the helper script insists on two arguments. Please provide a default value (e.g., static mode) when DYNLINK is unset.
Prompt for AI agents
Address the following comment on Makefile at line 128:
<comment>make update-snapshot now aborts unless DYNLINK is defined because the helper script insists on two arguments. Please provide a default value (e.g., static mode) when DYNLINK is unset.</comment>
<file context>
@@ -72,44 +86,46 @@ config:
-	$(VECHO) "Updating snapshot for %s\n" $(ARCH)
-	tests/update-snapshots.sh $(ARCH)
+	$(VECHO) "Updating snapshot for %s (DYNLINK=%s)\n" $(ARCH) $(DYNLINK)
+	tests/update-snapshots.sh $(ARCH) $(DYNLINK)
 	$(VECHO) "  OK\n"
 
</file context>
| tests/update-snapshots.sh $(ARCH) $(DYNLINK) | |
| tests/update-snapshots.sh $(ARCH) $(if $(DYNLINK),$(DYNLINK),0) | 
| $(VECHO) "Checking snapshot for %s\n" $(ARCH) | ||
| tests/check-snapshots.sh $(ARCH) | ||
| $(VECHO) "Checking snapshot for %s (%s mode)\n" $(ARCH) $(DYNLINK) | ||
| tests/check-snapshots.sh $(ARCH) $(DYNLINK) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make check-snapshot now fails unless DYNLINK is set because $(DYNLINK) expands to empty and the script expects a second argument. Please default this call to static mode when DYNLINK is unset.
Prompt for AI agents
Address the following comment on Makefile at line 117:
<comment>make check-snapshot now fails unless DYNLINK is set because $(DYNLINK) expands to empty and the script expects a second argument. Please default this call to static mode when DYNLINK is unset.</comment>
<file context>
@@ -72,44 +86,46 @@ config:
-	$(VECHO) "Checking snapshot for %s\n" $(ARCH)
-	tests/check-snapshots.sh $(ARCH)
+	$(VECHO) "Checking snapshot for %s (%s mode)\n" $(ARCH) $(DYNLINK)
+	tests/check-snapshots.sh $(ARCH) $(DYNLINK)
 	$(VECHO) "  OK\n"
 
</file context>
| tests/check-snapshots.sh $(ARCH) $(DYNLINK) | |
| tests/check-snapshots.sh $(ARCH) $(if $(DYNLINK),$(DYNLINK),0) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
6 issues found across 34 files
Prompt for AI agents (all 6 issues)
Understand the root cause of the following 6 issues and fix them.
<file name="lib/c.h">
<violation number="1" location="lib/c.h:18">
Define INT_MIN as a signed negative constant; 0x80000000 has unsigned type, so using it for INT_MIN causes incorrect comparisons after integer promotions.</violation>
</file>
<file name="docs/static-linking.md">
<violation number="1" location="docs/static-linking.md:43">
The epilogue description reverses the stack restoration direction; the generated ARM return code adds total_size back into sp before popping lr, so documenting a subtraction misleads readers about how frames unwind.</violation>
</file>
<file name="docs/dynamic-linking.md">
<violation number="1" location="docs/dynamic-linking.md:106">
The PLT example misspells the ARM relocation modifier: it should use `#:upper16`, otherwise the listed instruction will not assemble successfully.</violation>
</file>
<file name="mk/arm.mk">
<violation number="1" location="mk/arm.mk:46">
The `ifndef LD_LINUX_PATH` check never fires because the variable was already defined (possibly as an empty string) by the earlier `:= $(shell ...)` assignment. Use an emptiness check so we fail fast when the sysroot lookup fails.</violation>
</file>
<file name="Makefile">
<violation number="1" location="Makefile:117">
`make check-snapshot` no longer works out of the box because the DYNLINK argument is omitted when the variable is unset, causing the helper script to exit with an error.</violation>
<violation number="2" location="Makefile:128">
`make update-snapshot` now fails because the DYNLINK parameter is missing when the variable is not explicitly set, triggering the helper script’s usage error.</violation>
</file>
React with 👍 or 👎 to teach cubic. Mention @cubic-dev-ai to give feedback, ask questions, or re-run the review.
| #define false 0 | ||
|  | ||
| #define INT_MAX 0x7fffffff | ||
| #define INT_MIN 0x80000000 | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Define INT_MIN as a signed negative constant; 0x80000000 has unsigned type, so using it for INT_MIN causes incorrect comparisons after integer promotions.
Prompt for AI agents
Address the following comment on lib/c.h at line 18:
<comment>Define INT_MIN as a signed negative constant; 0x80000000 has unsigned type, so using it for INT_MIN causes incorrect comparisons after integer promotions.</comment>
<file context>
@@ -0,0 +1,85 @@
+#define false 0
+
+#define INT_MAX 0x7fffffff
+#define INT_MIN 0x80000000
+
+#if defined(__arm__)
</file context>
| #define INT_MIN 0x80000000 | |
| #define INT_MIN (-2147483647 - 1) | 
| * For Arm32, the total size will be aligned to 8 bytes by the code generator. | ||
| * The size of the unused space is 20 bytes and is only used in dynamic linking mode. | ||
|  | ||
| When a function completes execution, it restores the caller's stack pointer by subtracting `total_size` from `sp`, retrieves the return address from `[sp - 4]` and transfers control back to the caller. | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The epilogue description reverses the stack restoration direction; the generated ARM return code adds total_size back into sp before popping lr, so documenting a subtraction misleads readers about how frames unwind.
Prompt for AI agents
Address the following comment on docs/static-linking.md at line 43:
<comment>The epilogue description reverses the stack restoration direction; the generated ARM return code adds total_size back into sp before popping lr, so documenting a subtraction misleads readers about how frames unwind.</comment>
<file context>
@@ -0,0 +1,66 @@
+  * For Arm32, the total size will be aligned to 8 bytes by the code generator.
+  * The size of the unused space is 20 bytes and is only used in dynamic linking mode.
+
+When a function completes execution, it restores the caller's stack pointer by subtracting `total_size` from `sp`, retrieves the return address from `[sp - 4]` and transfers control back to the caller.
+
+## About function arguments handling
</file context>
| ``` | ||
| push {lr} @ (str lr, [sp, #-4]!) | ||
| movw sl, #:lower16:(&GOT[2]) | ||
| movt sl, #:upeer16:(&GOT[2]) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The PLT example misspells the ARM relocation modifier: it should use #:upper16, otherwise the listed instruction will not assemble successfully.
Prompt for AI agents
Address the following comment on docs/dynamic-linking.md at line 106:
<comment>The PLT example misspells the ARM relocation modifier: it should use `#:upper16`, otherwise the listed instruction will not assemble successfully.</comment>
<file context>
@@ -0,0 +1,128 @@
+```
+push	{lr}		@ (str lr, [sp, #-4]!)
+movw	sl, #:lower16:(&GOT[2])
+movt	sl, #:upeer16:(&GOT[2])
+mov	lr, sl
+ldr	pc, [lr]
</file context>
| movt sl, #:upeer16:(&GOT[2]) | |
| movt sl, #:upper16:(&GOT[2]) | 
| LD_LINUX_PATH := $(shell cd $(LD_LINUX_PATH) 2>/dev/null && pwd) | ||
| LD_LINUX_PATH := $(LD_LINUX_PATH)/$(shell echo $(CROSS_COMPILE) | sed s'/.$$//')/libc | ||
| LD_LINUX_PATH := $(shell cd $(LD_LINUX_PATH) 2>/dev/null && pwd) | ||
| ifndef LD_LINUX_PATH | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The ifndef LD_LINUX_PATH check never fires because the variable was already defined (possibly as an empty string) by the earlier := $(shell ...) assignment. Use an emptiness check so we fail fast when the sysroot lookup fails.
Prompt for AI agents
Address the following comment on mk/arm.mk at line 46:
<comment>The `ifndef LD_LINUX_PATH` check never fires because the variable was already defined (possibly as an empty string) by the earlier `:= $(shell ...)` assignment. Use an emptiness check so we fail fast when the sysroot lookup fails.</comment>
<file context>
@@ -5,5 +5,54 @@ ARCH_DEFS = \
+            LD_LINUX_PATH := $(shell cd $(LD_LINUX_PATH) 2>/dev/null && pwd)
+            LD_LINUX_PATH := $(LD_LINUX_PATH)/$(shell echo $(CROSS_COMPILE) | sed s'/.$$//')/libc
+            LD_LINUX_PATH := $(shell cd $(LD_LINUX_PATH) 2>/dev/null && pwd)
+            ifndef LD_LINUX_PATH
+                LD_LINUX_PATH = /usr/$(shell echo $(CROSS_COMPILE) | sed s'/.$$//')
+                LD_LINUX_PATH := $(shell cd $(LD_LINUX_PATH) 2>/dev/null && pwd)
</file context>
| $(VECHO) "Updating snapshot for %s\n" $(ARCH) | ||
| tests/update-snapshots.sh $(ARCH) | ||
| $(VECHO) "Updating snapshot for %s (DYNLINK=%s)\n" $(ARCH) $(DYNLINK) | ||
| tests/update-snapshots.sh $(ARCH) $(DYNLINK) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make update-snapshot now fails because the DYNLINK parameter is missing when the variable is not explicitly set, triggering the helper script’s usage error.
Prompt for AI agents
Address the following comment on Makefile at line 128:
<comment>`make update-snapshot` now fails because the DYNLINK parameter is missing when the variable is not explicitly set, triggering the helper script’s usage error.</comment>
<file context>
@@ -72,44 +86,46 @@ config:
-	$(VECHO) "Updating snapshot for %s\n" $(ARCH)
-	tests/update-snapshots.sh $(ARCH)
+	$(VECHO) "Updating snapshot for %s (DYNLINK=%s)\n" $(ARCH) $(DYNLINK)
+	tests/update-snapshots.sh $(ARCH) $(DYNLINK)
 	$(VECHO) "  OK\n"
 
</file context>
| tests/update-snapshots.sh $(ARCH) $(DYNLINK) | |
| tests/update-snapshots.sh $(ARCH) $(if $(strip $(DYNLINK)),$(DYNLINK),0) | 
| $(VECHO) "Checking snapshot for %s\n" $(ARCH) | ||
| tests/check-snapshots.sh $(ARCH) | ||
| $(VECHO) "Checking snapshot for %s (%s mode)\n" $(ARCH) $(DYNLINK) | ||
| tests/check-snapshots.sh $(ARCH) $(DYNLINK) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make check-snapshot no longer works out of the box because the DYNLINK argument is omitted when the variable is unset, causing the helper script to exit with an error.
Prompt for AI agents
Address the following comment on Makefile at line 117:
<comment>`make check-snapshot` no longer works out of the box because the DYNLINK argument is omitted when the variable is unset, causing the helper script to exit with an error.</comment>
<file context>
@@ -72,44 +86,46 @@ config:
-	$(VECHO) "Checking snapshot for %s\n" $(ARCH)
-	tests/check-snapshots.sh $(ARCH)
+	$(VECHO) "Checking snapshot for %s (%s mode)\n" $(ARCH) $(DYNLINK)
+	tests/check-snapshots.sh $(ARCH) $(DYNLINK)
 	$(VECHO) "  OK\n"
 
</file context>
| tests/check-snapshots.sh $(ARCH) $(DYNLINK) | |
| tests/check-snapshots.sh $(ARCH) $(if $(strip $(DYNLINK)),$(DYNLINK),0) | 
| The documentation should address PLT performance overhead. First function call path: 
 Subsequent calls: Only steps 1-2-5 Thus, the overhead is 5-10 CPU cycles (compared to direct call). For a bootstrapping compiler, this overhead is acceptable. | 
| elf_write_int(dynamic_sections.elf_plt, | ||
| __movw(__AL, __r12, addr_of_got)); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add/clarify comments:
- PLT stub executes and immediately jumps (lw __pc, [__r12])
- Target function will restore correct r12 from [sp+16]
- r12 modification is not persistent
| int addr_of_got = dynamic_sections.elf_got_start + PTR_SIZE * 2; | ||
| int end = dynamic_sections.plt_size - PLT_FIXUP_SIZE; | ||
| elf_write_int(dynamic_sections.elf_plt, __push_reg(__AL, __lr)); | ||
| elf_write_int(dynamic_sections.elf_plt, __movw(__AL, __r10, addr_of_got)); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ARM EABI specifies r10 might be the stack limit register, but on Linux ARM EABI, r10 is callee-saved, and the dynamic linker will protect it, so there is no problem.
Code is correct, but lacks comments explaining register choices.
| i += PTR_SIZE) | ||
| elf_write_int(dynamic_sections.elf_got, | ||
| dynamic_sections.elf_plt_start); | ||
| elf_write_int(dynamic_sections.elf_got, 0); /* End with 0x0 */ | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is GOT[N] = 0 needed as an end marker?
Dynamic linker determines GOT size via DT_PLTGOT and DT_PLTRELSZ, does not need a terminator.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I compiled a simple program with GCC and referred to its GOT generation, but I misunderstood that a terminator was necessary. I will fix it.
| emit(__sw(__AL, __lr, __sp, -4)); | ||
| emit(__movw(__AL, __r8, ph2_ir->src0 + 4)); | ||
| emit(__movt(__AL, __r8, ph2_ir->src0 + 4)); | ||
| ofs = ALIGN_UP(ph2_ir->src0 + 4, MIN_ALIGNMENT); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ARM EABI requires stack to be aligned to 8 bytes at public interface boundaries (i.e., when calling functions). Internal stack frames can be unaligned, but must be aligned when calling external functions.
More comments.
| The behavior of the current implementation: 
 It means: 
 For the current stage, this is acceptable. Document the limitations. | 
| Consider to place abi.sh as an additional test suite for ABI compliance. | 
| 
 
 I will try to improve shecc to make all internal and external functions follow AAPCS. However, If we consider that external functions may call internal functions, I think there is another issue related to accessing global variables. Since internal functions rely on r12 to access global variables on the global stack, such accesses may become incorrect because r12 could be modified by external functions. Therefore, how about the following conceptual approach: 
 Since the starting address of  | 
| 
 Our goal here is to ensure AAPCS compliance with minimal effort. The above proposal appears sound, provided that the ABI compliance test suite passes. | 
The proposed changes enhance shecc to generate dynamically linked executables. When the
--dynlinkflag is specified, shecc produces sections such as.pltand.gotfor the compiled programs, allowing the executables to leverage the ELF interpreter and the GNU C library to run.This pull request is still a work in progress due to the following incomplete tasks:
README.mdto describe dynamic linking.c.candc.hto avoid duplicationsUpdated usage: (10/11 11:43 updated)