Rust 與 LLVM簡單介紹
LLVM 簡介
LLVM 全稱是 Low Level Virtual Machine,它是源自 the University of Illinois 的一個研究專案,該專案旨在提供一個現代化的編譯機制,使得對任何程式語言既可以做到靜態編譯也可以動態編譯,而且非常高效。後來 LLVM 專案逐漸發展,並孵化了許多子專案,比如 Clang,LLDB, OpenMP 等。
一般來說,編譯器會將源語言翻譯為一種“中間語言(IR)”,之後再由 中間語言 利用後端程式和裝置翻譯為目標平臺的組合語言。不同編譯器的中間語言IR是不一樣的,IR集中體現了編譯器的主要特徵——演演算法,優化方式,彙編流程等等。
LLVM 是非常棒的平臺,很多語言都支援將原始碼編譯為 LLVM中間語言,比如說Rust、Julia。兩個語言靠著LLVM的強大而實現了很多強大的語言功能。
Rust on LLVM
Rust是一門靜態編譯型語言,Rust官方的編譯器叫rustc,rustc使用LLVM作為編譯器後端。rustc對Rust原始碼進行詞法分析、靜態型別檢查,最終將程式碼編譯為 LLVM IR。一般 LLVM IR 的檔案字尾是 .ll
,Bytecode 檔案字尾是 .bc
,其中 IR 是可讀的。
使用 LLVM 的話,編譯的流程就清晰很多:
原始碼 -> AST -> LLVM IR -> LLVM Bytecode -> ASM -> Native
將Rust 程式碼編譯為 LLVM IR
fn main() {
let str = "rust";
println!("{}",str);
}
複製程式碼
llvm-ir 引數將rust編譯為 ir 中間語言,asm 是編譯為彙編。
rustc --emit=asm,llvm-ir file.rs
LLVM 的 lli
命令可以直接執行 .ll
檔案,llc
命令可以編譯彙編。
lli file.ll
Stack dump:
0. Program arguments: /usr/local/llvm/bin/lli file.ll
0 lli 0x000000010e0027d1 llvm::sys::PrintStackTrace(llvm::raw_ostream&) + 37
1 lli 0x000000010e00200b llvm::sys::RunSignalHandlers() + 52
2 lli 0x000000010e002b98 SignalHandler(int) + 144
3 libsystem_platform.dylib 0x00007fffe93ebb3a _sigtramp + 26
4 libsystem_platform.dylib 0x0000000000000009 _sigtramp + 381764841
5 libsystem_platform.dylib 0x000000010f137270 _sigtramp + 634697552
6 lli 0x000000010dd38578 llvm::MCJIT::runFunction(llvm::Function*,llvm::ArrayRef<llvm::GenericValue>) + 508
7 lli 0x000000010dcc57cf llvm::ExecutionEngine::runFunctionAsMain(llvm::Function*,std::__1::vector<std::__1::basic_string<char,std::__1::char_traits<char>,std::__1::allocator<char> >,std::__1::allocator<std::__1::basic_string<char,std::__1::allocator<char> > > > const&,char const* const*) + 1159
8 lli 0x000000010d9e3363 main + 7675
9 libdyld.dylib 0x00007fffe91dc235 start + 1
10 libdyld.dylib 0x0000000000000002 start + 383925710
[1] 89982 segmentation fault /usr/local /llvm/bin/lli file.ll
複製程式碼
file.rs 的 LLVM IR 內容
簡單解釋下 LLVM IR的含義:
-
@
代表全域性變數,%
代表區域性變數 -
%a = alloca i32,align 4
棧上分配記憶體 -
load
讀出內容store
寫入內容 -
i32
i表示32位integer32,即四位元組 -
aligin 4
4位元組對齊 -
%add = add nsw i32 %0
,%1
加法操作
以上只是簡單的一個示例,只是介紹學習的途徑。想要深入的瞭解IR,可以參考 LLVM Language Reference Manual。
; ModuleID = 'file.7rcbfp3g-cgu.0'
source_filename = "file.7rcbfp3g-cgu.0"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7.0"
%"core::fmt::Formatter" = type { [0 x i64],{ i64,i64 },[0 x i64],{ {}*,[3 x i64]* },{ i64*,i64* },{ [0 x { i8*,i8* }]*,[0 x i32],i32,[0 x i8],i8,[7 x i8] }
%"core::fmt::Void" = type { [0 x i8],{},%"core::marker::PhantomData<*mut core::ops::function::Fn<(),Output=()>>",[0 x i8] }
%"core::marker::PhantomData<*mut core::ops::function::Fn<(),Output=()>>" = type {}
%"core::fmt::Arguments" = type { [0 x i64],{ [0 x { [0 x i8]*,i64 }]*,[0 x i64] }
%"unwind::libunwind::_Unwind_Exception" = type { [0 x i64],i64,void (i32,%"unwind::libunwind::_Unwind_Exception"*)*,[6 x i64],[0 x i64] }
%"unwind::libunwind::_Unwind_Context" = type { [0 x i8] }
@vtable.0 = private unnamed_addr constant { void (i8**)*,i32 (i8**)*,i32 (i8**)* } { void (i8**)* @_ZN4core3ptr18real_drop_in_place17h7f6d35a144fd7fecE,i64 8,i32 (i8**)* @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h63f922eebf54c4d5E",i32 (i8**)* @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17hf307176a8af38675E" },align 8
@0 = private unnamed_addr constant <{ [4 x i8] }> <{ [4 x i8] c"rust" }>,align 1
@1 = private unnamed_addr constant <{ [0 x i8] }> zeroinitializer,align 1
@2 = private unnamed_addr constant <{ [1 x i8] }> <{ [1 x i8] c"\0A" }>,align 1
@3 = private unnamed_addr constant <{ i8*,[8 x i8],i8*,[8 x i8] }> <{ i8* getelementptr inbounds (<{ [0 x i8] }>,<{ [0 x i8] }>* @1,i32 0,i32 0),[8 x i8] zeroinitializer,i8* getelementptr inbounds (<{ [1 x i8] }>,<{ [1 x i8] }>* @2,[8 x i8] c"\01\00\00\00\00\00\00\00" }>,align 8
; std::rt::lang_start
; Function Attrs: uwtable
define hidden i64 @_ZN3std2rt10lang_start17h545e8eadaec5e3ffE(void ()* nonnull %main,i64 %argc,i8** %argv) unnamed_addr #0 {
start:
%_7 = alloca i8*,align 8
%0 = bitcast i8** %_7 to void ()**
store void ()* %main,void ()** %0,align 8
%1 = bitcast i8** %_7 to {}*
; call std::rt::lang_start_internal
%2 = call i64 @_ZN3std2rt19lang_start_internal17hc1ac2c20e9f8edf2E({}* nonnull align 1 %1,[3 x i64]* noalias readonly align 8 dereferenceable(24) bitcast ({ void (i8**)*,i32 (i8**)* }* @vtable.0 to [3 x i64]*),i8** %argv)
br label %bb1
bb1: ; preds = %start
ret i64 %2
}
; std::rt::lang_start::{{closure}}
; Function Attrs: uwtable
define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h63f922eebf54c4d5E"(i8** noalias readonly align 8 dereferenceable(8) %arg0) unnamed_addr #0 {
start:
%0 = bitcast i8** %arg0 to void ()**
%1 = load void ()*,align 8,!nonnull !1
call void %1()
br label %bb1
bb1: ; preds = %start
; call <() as std::process::Termination>::report
%2 = call i32 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc9096258341f5e78E"()
br label %bb2
bb2: ; preds = %bb1
ret i32 %2
}
; std::sys::unix::process::process_common::ExitCode::as_i32
; Function Attrs: inlinehint uwtable
define internal i32 @_ZN3std3sys4unix7process14process_common8ExitCode6as_i3217h07f1ac4bfdd8cfccE(i8* noalias readonly align 1 dereferenceable(1) %self) unnamed_addr #1 {
start:
%0 = load i8,i8* %self,align 1
%1 = zext i8 %0 to i32
ret i32 %1
}
; <&T as core::fmt::Display>::fmt
; Function Attrs: uwtable
define internal zeroext i1 @"_ZN44_$LT$$RF$T$u20$as$u20$core..fmt..Display$GT$3fmt17h5f4bd8ab2c6676ecE"({ [0 x i8]*,i64 }* noalias readonly align 8 dereferenceable(16) %self,%"core::fmt::Formatter"* align 8 dereferenceable(96) %f) unnamed_addr #0 {
start:
%0 = getelementptr inbounds { [0 x i8]*,{ [0 x i8]*,i64 }* %self,i32 0
%1 = load [0 x i8]*,[0 x i8]** %0,!nonnull !1
%2 = getelementptr inbounds { [0 x i8]*,i32 1
%3 = load i64,i64* %2,align 8
; call <str as core::fmt::Display>::fmt
%4 = call zeroext i1 @"_ZN42_$LT$str$u20$as$u20$core..fmt..Display$GT$3fmt17h91c2539c48905fe1E"([0 x i8]* noalias nonnull readonly align 1 %1,i64 %3,%"core::fmt::Formatter"* align 8 dereferenceable(96) %f)
br label %bb1
bb1: ; preds = %start
ret i1 %4
}
; core::fmt::ArgumentV1::new
; Function Attrs: uwtable
define internal { i8*,i8* } @_ZN4core3fmt10ArgumentV13new17hdfa3ca615e4b084aE({ [0 x i8]*,i64 }* noalias readonly align 8 dereferenceable(16) %x,i1 ({ [0 x i8]*,i64 }*,%"core::fmt::Formatter"*)* nonnull %f) unnamed_addr #0 {
start:
%transmute_temp1 = alloca %"core::fmt::Void"*,align 8
%transmute_temp = alloca i1 (%"core::fmt::Void"*,%"core::fmt::Formatter"*)*,align 8
%_0 = alloca { i8*,i8* },align 8
%0 = bitcast i1 (%"core::fmt::Void"*,%"core::fmt::Formatter"*)** %transmute_temp to i1 ({ [0 x i8]*,%"core::fmt::Formatter"*)**
store i1 ({ [0 x i8]*,%"core::fmt::Formatter"*)* %f,%"core::fmt::Formatter"*)** %0,align 8
%1 = load i1 (%"core::fmt::Void"*,i1 (%"core::fmt::Void"*,%"core::fmt::Formatter"*)** %transmute_temp,!nonnull !1
br label %bb1
bb1: ; preds = %start
%2 = bitcast %"core::fmt::Void"** %transmute_temp1 to { [0 x i8]*,i64 }**
store { [0 x i8]*,i64 }* %x,i64 }** %2,align 8
%3 = load %"core::fmt::Void"*,%"core::fmt::Void"** %transmute_temp1,!nonnull !1
br label %bb2
bb2: ; preds = %bb1
%4 = bitcast { i8*,i8* }* %_0 to %"core::fmt::Void"**
store %"core::fmt::Void"* %3,%"core::fmt::Void"** %4,align 8
%5 = getelementptr inbounds { i8*,{ i8*,i8* }* %_0,i32 1
%6 = bitcast i8** %5 to i1 (%"core::fmt::Void"*,%"core::fmt::Formatter"*)**
store i1 (%"core::fmt::Void"*,%"core::fmt::Formatter"*)* %1,%"core::fmt::Formatter"*)** %6,align 8
%7 = getelementptr inbounds { i8*,i32 0
%8 = load i8*,i8** %7,!nonnull !1
%9 = getelementptr inbounds { i8*,i32 1
%10 = load i8*,i8** %9,!nonnull !1
%11 = insertvalue { i8*,i8* } undef,i8* %8,0
%12 = insertvalue { i8*,i8* } %11,i8* %10,1
ret { i8*,i8* } %12
}
; core::fmt::Arguments::new_v1
; Function Attrs: inlinehint uwtable
define internal void @_ZN4core3fmt9Arguments6new_v117h853b1a3cb02ecdf5E(%"core::fmt::Arguments"* noalias nocapture sret dereferenceable(48),[0 x { [0 x i8]*,i64 }]* noalias nonnull readonly align 8 %pieces.0,i64 %pieces.1,[0 x { i8*,i8* }]* noalias nonnull readonly align 8 %args.0,i64 %args.1) unnamed_addr #1 {
start:
%_4 = alloca { i64*,align 8
%1 = bitcast { i64*,i64 }* %_4 to {}**
store {}* null,{}** %1,align 8
%2 = bitcast %"core::fmt::Arguments"* %0 to { [0 x { [0 x i8]*,i64 }*
%3 = getelementptr inbounds { [0 x { [0 x i8]*,i64 }* %2,i32 0
store [0 x { [0 x i8]*,i64 }]* %pieces.0,i64 }]** %3,align 8
%4 = getelementptr inbounds { [0 x { [0 x i8]*,i32 1
store i64 %pieces.1,i64* %4,align 8
%5 = getelementptr inbounds %"core::fmt::Arguments",%"core::fmt::Arguments"* %0,i32 3
%6 = getelementptr inbounds { i64*,i64 }* %_4,i32 0
%7 = load i64*,i64** %6,align 8
%8 = getelementptr inbounds { i64*,i32 1
%9 = load i64,i64* %8,align 8
%10 = getelementptr inbounds { i64*,i64 }* %5,i32 0
store i64* %7,i64** %10,align 8
%11 = getelementptr inbounds { i64*,i32 1
store i64 %9,i64* %11,align 8
%12 = getelementptr inbounds %"core::fmt::Arguments",i32 5
%13 = getelementptr inbounds { [0 x { i8*,i64 }* %12,i32 0
store [0 x { i8*,i8* }]* %args.0,i8* }]** %13,align 8
%14 = getelementptr inbounds { [0 x { i8*,i32 1
store i64 %args.1,i64* %14,align 8
ret void
}
; core::ops::function::FnOnce::call_once{{vtable.shim}}
; Function Attrs: uwtable
define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17hf307176a8af38675E"(i8** %arg0) unnamed_addr #0 {
start:
%arg1 = alloca {},align 1
%0 = load i8*,i8** %arg0,!nonnull !1
; call core::ops::function::FnOnce::call_once
%1 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h5714eef014de52c8E(i8* nonnull %0)
br label %bb1
bb1: ; preds = %start
ret i32 %1
}
; core::ops::function::FnOnce::call_once
; Function Attrs: uwtable
define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h5714eef014de52c8E(i8* nonnull) unnamed_addr #0 personality i32 (i32,%"unwind::libunwind::_Unwind_Exception"*,%"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality {
start:
%personalityslot = alloca { i8*,i32 },align 8
%arg1 = alloca {},align 1
%arg0 = alloca i8*,align 8
store i8* %0,align 8
; invoke std::rt::lang_start::{{closure}}
%1 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h63f922eebf54c4d5E"(i8** align 8 dereferenceable(8) %arg0)
to label %bb1 unwind label %cleanup
bb1: ; preds = %start
br label %bb2
bb2: ; preds = %bb1
ret i32 %1
bb3: ; preds = %cleanup
br label %bb4
bb4: ; preds = %bb3
%2 = bitcast { i8*,i32 }* %personalityslot to i8**
%3 = load i8*,i8** %2,align 8
%4 = getelementptr inbounds { i8*,i32 }* %personalityslot,i32 1
%5 = load i32,i32* %4,align 8
%6 = insertvalue { i8*,i32 } undef,i8* %3,0
%7 = insertvalue { i8*,i32 } %6,i32 %5,1
resume { i8*,i32 } %7
cleanup: ; preds = %start
%8 = landingpad { i8*,i32 }
cleanup
%9 = extractvalue { i8*,i32 } %8,0
%10 = extractvalue { i8*,1
%11 = getelementptr inbounds { i8*,i32 0
store i8* %9,i8** %11,align 8
%12 = getelementptr inbounds { i8*,i32 1
store i32 %10,i32* %12,align 8
br label %bb3
}
; core::ptr::real_drop_in_place
; Function Attrs: uwtable
define internal void @_ZN4core3ptr18real_drop_in_place17h7f6d35a144fd7fecE(i8** align 8 dereferenceable(8) %arg0) unnamed_addr #0 {
start:
ret void
}
; <() as std::process::Termination>::report
; Function Attrs: inlinehint uwtable
define internal i32 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc9096258341f5e78E"() unnamed_addr #1 {
start:
; call <std::process::ExitCode as std::process::Termination>::report
%0 = call i32 @"_ZN68_$LT$std..process..ExitCode$u20$as$u20$std..process..Termination$GT$6report17h67475b03fcc05555E"(i8 0)
br label %bb1
bb1: ; preds = %start
ret i32 %0
}
; <std::process::ExitCode as std::process::Termination>::report
; Function Attrs: inlinehint uwtable
define internal i32 @"_ZN68_$LT$std..process..ExitCode$u20$as$u20$std..process..Termination$GT$6report17h67475b03fcc05555E"(i8) unnamed_addr #1 {
start:
%self = alloca i8,align 1
store i8 %0,align 1
; call std::sys::unix::process::process_common::ExitCode::as_i32
%1 = call i32 @_ZN3std3sys4unix7process14process_common8ExitCode6as_i3217h07f1ac4bfdd8cfccE(i8* noalias readonly align 1 dereferenceable(1) %self)
br label %bb1
bb1: ; preds = %start
ret i32 %1
}
; file::main
; Function Attrs: uwtable
define internal void @_ZN4file4main17hfa5dc64818afec14E() unnamed_addr #0 {
start:
%_11 = alloca i64*,align 8
%_10 = alloca [1 x { i8*,i8* }],align 8
%_3 = alloca %"core::fmt::Arguments",align 8
%str = alloca { [0 x i8]*,align 8
%0 = getelementptr inbounds { [0 x i8]*,i64 }* %str,i32 0
store [0 x i8]* bitcast (<{ [4 x i8] }>* @0 to [0 x i8]*),align 8
%1 = getelementptr inbounds { [0 x i8]*,i32 1
store i64 4,i64* %1,align 8
%2 = bitcast i64** %_11 to { [0 x i8]*,align 8
%3 = bitcast i64** %_11 to { [0 x i8]*,i64 }**
%4 = load { [0 x i8]*,i64 }** %3,!nonnull !1
; call core::fmt::ArgumentV1::new
%5 = call { i8*,i64 }* noalias readonly align 8 dereferenceable(16) %4,%"core::fmt::Formatter"*)* nonnull @"_ZN44_$LT$$RF$T$u20$as$u20$core..fmt..Display$GT$3fmt17h5f4bd8ab2c6676ecE")
%6 = extractvalue { i8*,i8* } %5,0
%7 = extractvalue { i8*,1
br label %bb1
bb1: ; preds = %start
%8 = bitcast [1 x { i8*,i8* }]* %_10 to { i8*,i8* }*
%9 = getelementptr inbounds { i8*,i8* }* %8,i32 0
store i8* %6,align 8
%10 = getelementptr inbounds { i8*,i32 1
store i8* %7,i8** %10,align 8
%11 = bitcast [1 x { i8*,i8* }]* %_10 to [0 x { i8*,i8* }]*
; call core::fmt::Arguments::new_v1
call void @_ZN4core3fmt9Arguments6new_v117h853b1a3cb02ecdf5E(%"core::fmt::Arguments"* noalias nocapture sret dereferenceable(48) %_3,i64 }]* noalias nonnull readonly align 8 bitcast (<{ i8*,[8 x i8] }>* @3 to [0 x { [0 x i8]*,i64 }]*),i64 2,i8* }]* noalias nonnull readonly align 8 %11,i64 1)
br label %bb2
bb2: ; preds = %bb1
; call std::io::stdio::_print
call void @_ZN3std2io5stdio6_print17h8760c403d70231e9E(%"core::fmt::Arguments"* noalias nocapture dereferenceable(48) %_3)
br label %bb3
bb3: ; preds = %bb2
ret void
}
; std::rt::lang_start_internal
; Function Attrs: uwtable
declare i64 @_ZN3std2rt19lang_start_internal17hc1ac2c20e9f8edf2E({}* nonnull align 1,[3 x i64]* noalias readonly align 8 dereferenceable(24),i8**) unnamed_addr #0
; <str as core::fmt::Display>::fmt
; Function Attrs: uwtable
declare zeroext i1 @"_ZN42_$LT$str$u20$as$u20$core..fmt..Display$GT$3fmt17h91c2539c48905fe1E"([0 x i8]* noalias nonnull readonly align 1,%"core::fmt::Formatter"* align 8 dereferenceable(96)) unnamed_addr #0
; Function Attrs: nounwind uwtable
declare i32 @rust_eh_personality(i32,%"unwind::libunwind::_Unwind_Exception"*,%"unwind::libunwind::_Unwind_Context"*) unnamed_addr #2
; std::io::stdio::_print
; Function Attrs: uwtable
declare void @_ZN3std2io5stdio6_print17h8760c403d70231e9E(%"core::fmt::Arguments"* noalias nocapture dereferenceable(48)) unnamed_addr #0
define i32 @main(i32,i8**) unnamed_addr #3 {
top:
%2 = sext i32 %0 to i64
; call std::rt::lang_start
%3 = call i64 @_ZN3std2rt10lang_start17h545e8eadaec5e3ffE(void ()* @_ZN4file4main17hfa5dc64818afec14E,i64 %2,i8** %1)
%4 = trunc i64 %3 to i32
ret i32 %4
}
attributes #0 = { uwtable "no-frame-pointer-elim"="true" "probe-stack"="__rust_probestack" "target-cpu"="core2" }
attributes #1 = { inlinehint uwtable "no-frame-pointer-elim"="true" "probe-stack"="__rust_probestack" "target-cpu"="core2" }
attributes #2 = { nounwind uwtable "no-frame-pointer-elim"="true" "probe-stack"="__rust_probestack" "target-cpu"="core2" }
attributes #3 = { "no-frame-pointer-elim"="true" "target-cpu"="core2" }
!llvm.module.flags = !{!0}
!0 = !{i32 7,!"PIE Level",i32 2}
!1 = !{}
複製程式碼
參考: