1. 程式人生 > 程式設計 >Rust 與 LLVM簡單介紹

Rust 與 LLVM簡單介紹

LLVM 簡介

LLVM 全稱是 Low Level Virtual Machine,它是源自 the University of Illinois 的一個研究專案,該專案旨在提供一個現代化的編譯機制,使得對任何程式語言既可以做到靜態編譯也可以動態編譯,而且非常高效。後來 LLVM 專案逐漸發展,並孵化了許多子專案,比如 Clang,LLDB, OpenMP 等。

一般來說,編譯器會將源語言翻譯為一種“中間語言(IR)”,之後再由 中間語言 利用後端程式和裝置翻譯為目標平臺的組合語言。不同編譯器的中間語言IR是不一樣的,IR集中體現了編譯器的主要特徵——演演算法,優化方式,彙編流程等等。

LLVM 是非常棒的平臺,很多語言都支援將原始碼編譯為 LLVM中間語言,比如說Rust、Julia。兩個語言靠著LLVM的強大而實現了很多強大的語言功能。

Rust on LLVM

Rust是一門靜態編譯型語言,Rust官方的編譯器叫rustc,rustc使用LLVM作為編譯器後端。rustc對Rust原始碼進行詞法分析、靜態型別檢查,最終將程式碼編譯為 LLVM IR。一般 LLVM IR 的檔案字尾是 .ll,Bytecode 檔案字尾是 .bc,其中 IR 是可讀的。

使用 LLVM 的話,編譯的流程就清晰很多:

原始碼 -> AST -> LLVM IR -> LLVM Bytecode -> ASM -> Native

將Rust 程式碼編譯為 LLVM IR

fn main() {
    let str
= "rust"; println!("{}",str); } 複製程式碼

llvm-ir 引數將rust編譯為 ir 中間語言,asm 是編譯為彙編。

rustc --emit=asm,llvm-ir file.rs

LLVM 的 lli 命令可以直接執行 .ll 檔案,llc 命令可以編譯彙編。

lli file.ll


Stack dump:
0.	Program arguments: /usr/local/llvm/bin/lli file.ll
0  lli                      0x000000010e0027d1 llvm::sys::PrintStackTrace(llvm::raw_ostream&) + 37
1  lli                      0x000000010e00200b llvm::sys::RunSignalHandlers() + 52
2  lli                      0x000000010e002b98 SignalHandler(int) + 144
3  libsystem_platform.dylib 0x00007fffe93ebb3a _sigtramp + 26
4  libsystem_platform.dylib 0x0000000000000009 _sigtramp + 381764841
5  libsystem_platform.dylib 0x000000010f137270 _sigtramp + 634697552
6  lli                      0x000000010dd38578 llvm::MCJIT::runFunction(llvm::Function*,llvm::ArrayRef<llvm::GenericValue>) + 508
7  lli                      0x000000010dcc57cf llvm::ExecutionEngine::runFunctionAsMain(llvm::Function*,std::__1::vector<std::__1::basic_string<char,std::__1::char_traits<char>,std::__1::allocator<char> >,std::__1::allocator<std::__1::basic_string<char,std::__1::allocator<char> > > > const&,char const* const*) + 1159
8  lli                      0x000000010d9e3363 main + 7675
9  libdyld.dylib            0x00007fffe91dc235 start + 1
10 libdyld.dylib            0x0000000000000002 start + 383925710
[1]    89982 segmentation fault  /usr/local
/llvm/bin/lli file.ll 複製程式碼

file.rs 的 LLVM IR 內容

簡單解釋下 LLVM IR的含義:

  1. @代表全域性變數,%代表區域性變數

  2. %a = alloca i32,align 4 棧上分配記憶體

  3. load讀出內容 store寫入內容

  4. i32 i表示32位integer32,即四位元組

  5. aligin 4 4位元組對齊

  6. %add = add nsw i32 %0,%1 加法操作

以上只是簡單的一個示例,只是介紹學習的途徑。想要深入的瞭解IR,可以參考 LLVM Language Reference Manual

; ModuleID = 'file.7rcbfp3g-cgu.0'
source_filename = "file.7rcbfp3g-cgu.0"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7.0"

%"core::fmt::Formatter" = type { [0 x i64],{ i64,i64 },[0 x i64],{ {}*,[3 x i64]* },{ i64*,i64* },{ [0 x { i8*,i8* }]*,[0 x i32],i32,[0 x i8],i8,[7 x i8] }
%"core::fmt::Void" = type { [0 x i8],{},%"core::marker::PhantomData<*mut core::ops::function::Fn<(),Output=()>>",[0 x i8] }
%"core::marker::PhantomData<*mut core::ops::function::Fn<(),Output=()>>" = type {}
%"core::fmt::Arguments" = type { [0 x i64],{ [0 x { [0 x i8]*,i64 }]*,[0 x i64] }
%"unwind::libunwind::_Unwind_Exception" = type { [0 x i64],i64,void (i32,%"unwind::libunwind::_Unwind_Exception"*)*,[6 x i64],[0 x i64] }
%"unwind::libunwind::_Unwind_Context" = type { [0 x i8] }

@vtable.0 = private unnamed_addr constant { void (i8**)*,i32 (i8**)*,i32 (i8**)* } { void (i8**)* @_ZN4core3ptr18real_drop_in_place17h7f6d35a144fd7fecE,i64 8,i32 (i8**)* @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h63f922eebf54c4d5E",i32 (i8**)* @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17hf307176a8af38675E" },align 8
@0 = private unnamed_addr constant <{ [4 x i8] }> <{ [4 x i8] c"rust" }>,align 1
@1 = private unnamed_addr constant <{ [0 x i8] }> zeroinitializer,align 1
@2 = private unnamed_addr constant <{ [1 x i8] }> <{ [1 x i8] c"\0A" }>,align 1
@3 = private unnamed_addr constant <{ i8*,[8 x i8],i8*,[8 x i8] }> <{ i8* getelementptr inbounds (<{ [0 x i8] }>,<{ [0 x i8] }>* @1,i32 0,i32 0),[8 x i8] zeroinitializer,i8* getelementptr inbounds (<{ [1 x i8] }>,<{ [1 x i8] }>* @2,[8 x i8] c"\01\00\00\00\00\00\00\00" }>,align 8

; std::rt::lang_start
; Function Attrs: uwtable
define hidden i64 @_ZN3std2rt10lang_start17h545e8eadaec5e3ffE(void ()* nonnull %main,i64 %argc,i8** %argv) unnamed_addr #0 {
start:
  %_7 = alloca i8*,align 8
  %0 = bitcast i8** %_7 to void ()**
  store void ()* %main,void ()** %0,align 8
  %1 = bitcast i8** %_7 to {}*
; call std::rt::lang_start_internal
  %2 = call i64 @_ZN3std2rt19lang_start_internal17hc1ac2c20e9f8edf2E({}* nonnull align 1 %1,[3 x i64]* noalias readonly align 8 dereferenceable(24) bitcast ({ void (i8**)*,i32 (i8**)* }* @vtable.0 to [3 x i64]*),i8** %argv)
  br label %bb1

bb1:                                              ; preds = %start
  ret i64 %2
}

; std::rt::lang_start::{{closure}}
; Function Attrs: uwtable
define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h63f922eebf54c4d5E"(i8** noalias readonly align 8 dereferenceable(8) %arg0) unnamed_addr #0 {
start:
  %0 = bitcast i8** %arg0 to void ()**
  %1 = load void ()*,align 8,!nonnull !1
  call void %1()
  br label %bb1

bb1:                                              ; preds = %start
; call <() as std::process::Termination>::report
  %2 = call i32 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc9096258341f5e78E"()
  br label %bb2

bb2:                                              ; preds = %bb1
  ret i32 %2
}

; std::sys::unix::process::process_common::ExitCode::as_i32
; Function Attrs: inlinehint uwtable
define internal i32 @_ZN3std3sys4unix7process14process_common8ExitCode6as_i3217h07f1ac4bfdd8cfccE(i8* noalias readonly align 1 dereferenceable(1) %self) unnamed_addr #1 {
start:
  %0 = load i8,i8* %self,align 1
  %1 = zext i8 %0 to i32
  ret i32 %1
}

; <&T as core::fmt::Display>::fmt
; Function Attrs: uwtable
define internal zeroext i1 @"_ZN44_$LT$$RF$T$u20$as$u20$core..fmt..Display$GT$3fmt17h5f4bd8ab2c6676ecE"({ [0 x i8]*,i64 }* noalias readonly align 8 dereferenceable(16) %self,%"core::fmt::Formatter"* align 8 dereferenceable(96) %f) unnamed_addr #0 {
start:
  %0 = getelementptr inbounds { [0 x i8]*,{ [0 x i8]*,i64 }* %self,i32 0
  %1 = load [0 x i8]*,[0 x i8]** %0,!nonnull !1
  %2 = getelementptr inbounds { [0 x i8]*,i32 1
  %3 = load i64,i64* %2,align 8
; call <str as core::fmt::Display>::fmt
  %4 = call zeroext i1 @"_ZN42_$LT$str$u20$as$u20$core..fmt..Display$GT$3fmt17h91c2539c48905fe1E"([0 x i8]* noalias nonnull readonly align 1 %1,i64 %3,%"core::fmt::Formatter"* align 8 dereferenceable(96) %f)
  br label %bb1

bb1:                                              ; preds = %start
  ret i1 %4
}

; core::fmt::ArgumentV1::new
; Function Attrs: uwtable
define internal { i8*,i8* } @_ZN4core3fmt10ArgumentV13new17hdfa3ca615e4b084aE({ [0 x i8]*,i64 }* noalias readonly align 8 dereferenceable(16) %x,i1 ({ [0 x i8]*,i64 }*,%"core::fmt::Formatter"*)* nonnull %f) unnamed_addr #0 {
start:
  %transmute_temp1 = alloca %"core::fmt::Void"*,align 8
  %transmute_temp = alloca i1 (%"core::fmt::Void"*,%"core::fmt::Formatter"*)*,align 8
  %_0 = alloca { i8*,i8* },align 8
  %0 = bitcast i1 (%"core::fmt::Void"*,%"core::fmt::Formatter"*)** %transmute_temp to i1 ({ [0 x i8]*,%"core::fmt::Formatter"*)**
  store i1 ({ [0 x i8]*,%"core::fmt::Formatter"*)* %f,%"core::fmt::Formatter"*)** %0,align 8
  %1 = load i1 (%"core::fmt::Void"*,i1 (%"core::fmt::Void"*,%"core::fmt::Formatter"*)** %transmute_temp,!nonnull !1
  br label %bb1

bb1:                                              ; preds = %start
  %2 = bitcast %"core::fmt::Void"** %transmute_temp1 to { [0 x i8]*,i64 }**
  store { [0 x i8]*,i64 }* %x,i64 }** %2,align 8
  %3 = load %"core::fmt::Void"*,%"core::fmt::Void"** %transmute_temp1,!nonnull !1
  br label %bb2

bb2:                                              ; preds = %bb1
  %4 = bitcast { i8*,i8* }* %_0 to %"core::fmt::Void"**
  store %"core::fmt::Void"* %3,%"core::fmt::Void"** %4,align 8
  %5 = getelementptr inbounds { i8*,{ i8*,i8* }* %_0,i32 1
  %6 = bitcast i8** %5 to i1 (%"core::fmt::Void"*,%"core::fmt::Formatter"*)**
  store i1 (%"core::fmt::Void"*,%"core::fmt::Formatter"*)* %1,%"core::fmt::Formatter"*)** %6,align 8
  %7 = getelementptr inbounds { i8*,i32 0
  %8 = load i8*,i8** %7,!nonnull !1
  %9 = getelementptr inbounds { i8*,i32 1
  %10 = load i8*,i8** %9,!nonnull !1
  %11 = insertvalue { i8*,i8* } undef,i8* %8,0
  %12 = insertvalue { i8*,i8* } %11,i8* %10,1
  ret { i8*,i8* } %12
}

; core::fmt::Arguments::new_v1
; Function Attrs: inlinehint uwtable
define internal void @_ZN4core3fmt9Arguments6new_v117h853b1a3cb02ecdf5E(%"core::fmt::Arguments"* noalias nocapture sret dereferenceable(48),[0 x { [0 x i8]*,i64 }]* noalias nonnull readonly align 8 %pieces.0,i64 %pieces.1,[0 x { i8*,i8* }]* noalias nonnull readonly align 8 %args.0,i64 %args.1) unnamed_addr #1 {
start:
  %_4 = alloca { i64*,align 8
  %1 = bitcast { i64*,i64 }* %_4 to {}**
  store {}* null,{}** %1,align 8
  %2 = bitcast %"core::fmt::Arguments"* %0 to { [0 x { [0 x i8]*,i64 }*
  %3 = getelementptr inbounds { [0 x { [0 x i8]*,i64 }* %2,i32 0
  store [0 x { [0 x i8]*,i64 }]* %pieces.0,i64 }]** %3,align 8
  %4 = getelementptr inbounds { [0 x { [0 x i8]*,i32 1
  store i64 %pieces.1,i64* %4,align 8
  %5 = getelementptr inbounds %"core::fmt::Arguments",%"core::fmt::Arguments"* %0,i32 3
  %6 = getelementptr inbounds { i64*,i64 }* %_4,i32 0
  %7 = load i64*,i64** %6,align 8
  %8 = getelementptr inbounds { i64*,i32 1
  %9 = load i64,i64* %8,align 8
  %10 = getelementptr inbounds { i64*,i64 }* %5,i32 0
  store i64* %7,i64** %10,align 8
  %11 = getelementptr inbounds { i64*,i32 1
  store i64 %9,i64* %11,align 8
  %12 = getelementptr inbounds %"core::fmt::Arguments",i32 5
  %13 = getelementptr inbounds { [0 x { i8*,i64 }* %12,i32 0
  store [0 x { i8*,i8* }]* %args.0,i8* }]** %13,align 8
  %14 = getelementptr inbounds { [0 x { i8*,i32 1
  store i64 %args.1,i64* %14,align 8
  ret void
}

; core::ops::function::FnOnce::call_once{{vtable.shim}}
; Function Attrs: uwtable
define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17hf307176a8af38675E"(i8** %arg0) unnamed_addr #0 {
start:
  %arg1 = alloca {},align 1
  %0 = load i8*,i8** %arg0,!nonnull !1
; call core::ops::function::FnOnce::call_once
  %1 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h5714eef014de52c8E(i8* nonnull %0)
  br label %bb1

bb1:                                              ; preds = %start
  ret i32 %1
}

; core::ops::function::FnOnce::call_once
; Function Attrs: uwtable
define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h5714eef014de52c8E(i8* nonnull) unnamed_addr #0 personality i32 (i32,%"unwind::libunwind::_Unwind_Exception"*,%"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality {
start:
  %personalityslot = alloca { i8*,i32 },align 8
  %arg1 = alloca {},align 1
  %arg0 = alloca i8*,align 8
  store i8* %0,align 8
; invoke std::rt::lang_start::{{closure}}
  %1 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h63f922eebf54c4d5E"(i8** align 8 dereferenceable(8) %arg0)
          to label %bb1 unwind label %cleanup

bb1:                                              ; preds = %start
  br label %bb2

bb2:                                              ; preds = %bb1
  ret i32 %1

bb3:                                              ; preds = %cleanup
  br label %bb4

bb4:                                              ; preds = %bb3
  %2 = bitcast { i8*,i32 }* %personalityslot to i8**
  %3 = load i8*,i8** %2,align 8
  %4 = getelementptr inbounds { i8*,i32 }* %personalityslot,i32 1
  %5 = load i32,i32* %4,align 8
  %6 = insertvalue { i8*,i32 } undef,i8* %3,0
  %7 = insertvalue { i8*,i32 } %6,i32 %5,1
  resume { i8*,i32 } %7

cleanup:                                          ; preds = %start
  %8 = landingpad { i8*,i32 }
          cleanup
  %9 = extractvalue { i8*,i32 } %8,0
  %10 = extractvalue { i8*,1
  %11 = getelementptr inbounds { i8*,i32 0
  store i8* %9,i8** %11,align 8
  %12 = getelementptr inbounds { i8*,i32 1
  store i32 %10,i32* %12,align 8
  br label %bb3
}

; core::ptr::real_drop_in_place
; Function Attrs: uwtable
define internal void @_ZN4core3ptr18real_drop_in_place17h7f6d35a144fd7fecE(i8** align 8 dereferenceable(8) %arg0) unnamed_addr #0 {
start:
  ret void
}

; <() as std::process::Termination>::report
; Function Attrs: inlinehint uwtable
define internal i32 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc9096258341f5e78E"() unnamed_addr #1 {
start:
; call <std::process::ExitCode as std::process::Termination>::report
  %0 = call i32 @"_ZN68_$LT$std..process..ExitCode$u20$as$u20$std..process..Termination$GT$6report17h67475b03fcc05555E"(i8 0)
  br label %bb1

bb1:                                              ; preds = %start
  ret i32 %0
}

; <std::process::ExitCode as std::process::Termination>::report
; Function Attrs: inlinehint uwtable
define internal i32 @"_ZN68_$LT$std..process..ExitCode$u20$as$u20$std..process..Termination$GT$6report17h67475b03fcc05555E"(i8) unnamed_addr #1 {
start:
  %self = alloca i8,align 1
  store i8 %0,align 1
; call std::sys::unix::process::process_common::ExitCode::as_i32
  %1 = call i32 @_ZN3std3sys4unix7process14process_common8ExitCode6as_i3217h07f1ac4bfdd8cfccE(i8* noalias readonly align 1 dereferenceable(1) %self)
  br label %bb1

bb1:                                              ; preds = %start
  ret i32 %1
}

; file::main
; Function Attrs: uwtable
define internal void @_ZN4file4main17hfa5dc64818afec14E() unnamed_addr #0 {
start:
  %_11 = alloca i64*,align 8
  %_10 = alloca [1 x { i8*,i8* }],align 8
  %_3 = alloca %"core::fmt::Arguments",align 8
  %str = alloca { [0 x i8]*,align 8
  %0 = getelementptr inbounds { [0 x i8]*,i64 }* %str,i32 0
  store [0 x i8]* bitcast (<{ [4 x i8] }>* @0 to [0 x i8]*),align 8
  %1 = getelementptr inbounds { [0 x i8]*,i32 1
  store i64 4,i64* %1,align 8
  %2 = bitcast i64** %_11 to { [0 x i8]*,align 8
  %3 = bitcast i64** %_11 to { [0 x i8]*,i64 }**
  %4 = load { [0 x i8]*,i64 }** %3,!nonnull !1
; call core::fmt::ArgumentV1::new
  %5 = call { i8*,i64 }* noalias readonly align 8 dereferenceable(16) %4,%"core::fmt::Formatter"*)* nonnull @"_ZN44_$LT$$RF$T$u20$as$u20$core..fmt..Display$GT$3fmt17h5f4bd8ab2c6676ecE")
  %6 = extractvalue { i8*,i8* } %5,0
  %7 = extractvalue { i8*,1
  br label %bb1

bb1:                                              ; preds = %start
  %8 = bitcast [1 x { i8*,i8* }]* %_10 to { i8*,i8* }*
  %9 = getelementptr inbounds { i8*,i8* }* %8,i32 0
  store i8* %6,align 8
  %10 = getelementptr inbounds { i8*,i32 1
  store i8* %7,i8** %10,align 8
  %11 = bitcast [1 x { i8*,i8* }]* %_10 to [0 x { i8*,i8* }]*
; call core::fmt::Arguments::new_v1
  call void @_ZN4core3fmt9Arguments6new_v117h853b1a3cb02ecdf5E(%"core::fmt::Arguments"* noalias nocapture sret dereferenceable(48) %_3,i64 }]* noalias nonnull readonly align 8 bitcast (<{ i8*,[8 x i8] }>* @3 to [0 x { [0 x i8]*,i64 }]*),i64 2,i8* }]* noalias nonnull readonly align 8 %11,i64 1)
  br label %bb2

bb2:                                              ; preds = %bb1
; call std::io::stdio::_print
  call void @_ZN3std2io5stdio6_print17h8760c403d70231e9E(%"core::fmt::Arguments"* noalias nocapture dereferenceable(48) %_3)
  br label %bb3

bb3:                                              ; preds = %bb2
  ret void
}

; std::rt::lang_start_internal
; Function Attrs: uwtable
declare i64 @_ZN3std2rt19lang_start_internal17hc1ac2c20e9f8edf2E({}* nonnull align 1,[3 x i64]* noalias readonly align 8 dereferenceable(24),i8**) unnamed_addr #0

; <str as core::fmt::Display>::fmt
; Function Attrs: uwtable
declare zeroext i1 @"_ZN42_$LT$str$u20$as$u20$core..fmt..Display$GT$3fmt17h91c2539c48905fe1E"([0 x i8]* noalias nonnull readonly align 1,%"core::fmt::Formatter"* align 8 dereferenceable(96)) unnamed_addr #0

; Function Attrs: nounwind uwtable
declare i32 @rust_eh_personality(i32,%"unwind::libunwind::_Unwind_Exception"*,%"unwind::libunwind::_Unwind_Context"*) unnamed_addr #2

; std::io::stdio::_print
; Function Attrs: uwtable
declare void @_ZN3std2io5stdio6_print17h8760c403d70231e9E(%"core::fmt::Arguments"* noalias nocapture dereferenceable(48)) unnamed_addr #0

define i32 @main(i32,i8**) unnamed_addr #3 {
top:
  %2 = sext i32 %0 to i64
; call std::rt::lang_start
  %3 = call i64 @_ZN3std2rt10lang_start17h545e8eadaec5e3ffE(void ()* @_ZN4file4main17hfa5dc64818afec14E,i64 %2,i8** %1)
  %4 = trunc i64 %3 to i32
  ret i32 %4
}

attributes #0 = { uwtable "no-frame-pointer-elim"="true" "probe-stack"="__rust_probestack" "target-cpu"="core2" }
attributes #1 = { inlinehint uwtable "no-frame-pointer-elim"="true" "probe-stack"="__rust_probestack" "target-cpu"="core2" }
attributes #2 = { nounwind uwtable "no-frame-pointer-elim"="true" "probe-stack"="__rust_probestack" "target-cpu"="core2" }
attributes #3 = { "no-frame-pointer-elim"="true" "target-cpu"="core2" }

!llvm.module.flags = !{!0}

!0 = !{i32 7,!"PIE Level",i32 2}
!1 = !{}
複製程式碼

參考:

stackoverflow.com/a/38917344

www.jianshu.com/p/598b7094b…

segmentfault.com/a/119000000…

www.nagain.com/activity/ar…

embeddedartistry.com/blog/2017/2…

willcrichton.net/notes/rust-…

github.com/RReverser/v…