【docker 17 原始碼分析】 Docker Daemon啟動
基礎知識
Daemon通過三種方式監聽請求,unix,tcp,fd,預設使用unix domain socket(/var/run/Docker.sock)。對於遠端請求,可以開啟tcp socket(-H tcp:0.0.0.0:2375),或者固定IP(-H tcp://192.168.0.1:2375)。可以使用多種配置如下:
$ sudo dockerd -H unix:///var/run/docker.sock -H tcp://192.168.59.106 -H tcp://10.10.10.2
docker為docker client 和 docker daemon,client端傳送命令,daemon端負責
- 建立 Docker 執行環境
- httpserver 服務於Docker Client,接收並處理相應請求
一. Docker Daemon 啟動原始碼分析
入口程式碼為cmd/dockerd/docker.go。
func main() {
if reexec.Init() {
return
}
_, stdout, stderr := term.StdStreams()
logrus.SetOutput(stderr)
cmd := newDaemonCommand ()
cmd.SetOutput(stdout)
......
}
1.1 reexec 是 docker 自己實現的一個 package,https://groups.google.com/forum/#!topic/docker-dev/ePLDji_qBvE 給的解釋可能是個過期的程式碼
1.2 logrus 第三方的 log 模組,initLogging 就是將輸出定向到 stderr。
1.3 newDaemonCommand
func newDaemonCommand() *cobra.Command { opts := daemonOptions{1.3.1 dockerd 啟動 deamon 實際執行的是 runDeamon 函式,可以使用 docker -D 啟動,進入 runDeamon()後,daemonConfig: config.New(), common: cliflags.NewCommonOptions(), } cmd := &cobra.Command{ Use: "dockerd [OPTIONS]", Short: "A self-sufficient runtime for containers.", SilenceUsage: true, SilenceErrors: true, Args: cli.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { opts.flags = cmd.Flags() return runDaemon(opts) }, } ......return cmd }
初始化 daemonCli,執行 start 方法,runDeamon 中除了 api service 還會 d, err := daemon.NewDaemon(cli.Config, registryService,
containerdRemote),註冊 registry,和 contanerd 等。
1.3.2defaultDaemonConfigFile預設配置檔案"/etc/docker/daemon.json"
funcrunDaemon(opts daemonOptions) error { daemonCli := NewDaemonCli() ...... err = daemonCli.start(opts) notifyShutdown(err) return err }
type DaemonCli struct { *config.Config configFile *string flags *pflag.FlagSet api *apiserver.Server d *daemon.Daemon authzMiddleware *authorization.Middleware}
1.4 daemonCli.start cli.Pidfile 為 /var/run/docker.pid,建立檔案並寫入 pid
if cli.Pidfile != "" { pf, err := pidfile.New(cli.Pidfile) if err != nil { return fmt.Errorf("Error starting daemon: %v", err) } defer func() { if err := pf.Remove(); err != nil { logrus.Error(err) } }() }
1.5 daemonCli.start 啟動一個server
- len(cli.Config.Hosts),如果沒有-H引數,長度為0,預設使用的是 /var/run/docker.sock。make一個長度為1的切片
- api server.New(serverConfig) 生成一個api server 物件,包含多個http服務
func (cli *DaemonCli) start(opts daemonOptions) (err error) { ...... serverConfig := &apiserver.Config{ Logging: true, SocketGroup: cli.Config.SocketGroup, Version: dockerversion.Version, EnableCors: cli.Config.EnableCors, CorsHeaders: cli.Config.CorsHeaders, } if len(cli.Config.Hosts) == 0 { cli.Config.Hosts = make([]string, 1) } api := apiserver.New(serverConfig) cli.api = api }
1.6 daemonCli.start 初始化監聽地址,因為啟動沒有指定任何 host, ParseHost 設定為預設值,unix:///var/run/docker.sock,proto 為 unix,addr 為 /var/run/docker.sock,listeners.Init 建立 socket server,accept 客戶端發來的請求。其他的 TCP 協議等一樣的分析。
func (cli *DaemonCli) start(opts *daemonOptions) (err error) { ......for i := 0; i < len(cli.Config.Hosts); i++ { var err error if cli.Config.Hosts[i], err = dopts.ParseHost(cli.Config.TLS, cli.Config.Hosts[i]); err != nil { return fmt.Errorf("error parsing -H %s : %v", cli.Config.Hosts[i], err) } protoAddr := cli.Config.Hosts[i] protoAddrParts := strings.SplitN(protoAddr, "://", 2) if len(protoAddrParts) != 2 { return fmt.Errorf("bad format %s, expected PROTO://ADDR", protoAddr) } proto := protoAddrParts[0] addr := protoAddrParts[1] ...... ls, err := listeners.Init(proto, addr, serverConfig.SocketGroup, serverConfig.TLSConfig) if err != nil { return err } ls = wrapListeners(proto, ls) ...... hosts = append(hosts, protoAddrParts[1]) cli.api.Accept(addr, ls...) }
// Accept sets a listener the server accepts connections into. func (s *Server) Accept(addr string, listeners ...net.Listener) { for _, listener := range listeners { httpServer := &HTTPServer{ srv: &http.Server{ Addr: addr, }, l: listener, } s.servers = append(s.servers, httpServer) } }
1.7 daemonCli.start 生成一個 DefaultService 結構體:
registryService := registry.NewService(cli.Config.ServiceOptions)
// DefaultService is a registry service. It tracks configuration data such as a list // of mirrors. type DefaultService struct { config *serviceConfig mu sync.Mutex } // NewService returns a new instance of DefaultService ready to be // installed into an engine. func NewService(options ServiceOptions) *DefaultService { return &DefaultService{ config: newServiceConfig(options), } }
1.8 daemonCli.start 生成一個 libcontainerd remote 例項:建立目錄 /var/run/docker/libcontainerd,目錄下 rpc addr 為 docker-containerd.sock
// New creates a fresh instance of libcontainerd remote. func New(stateDir string, options ...RemoteOption) (_ Remote, err error) { r := &remote{ stateDir: stateDir, daemonPid: -1, eventTsPath: filepath.Join(stateDir, eventTimestampFilename), } for _, option := range options { if err := option.Apply(r); err != nil { return nil, err } } if err := sysinfo.MkdirAll(stateDir, 0700); err != nil { if r.rpcAddr == "" { r.rpcAddr = filepath.Join(stateDir, containerdSockFilename) } if r.startDaemon { if err := r.runContainerdDaemon(); err != nil {return r, nil }
1.8.1 runContainerDaemon,pid 檔案為 /var/run/docker/libcontainerd/docker-containerd.pid,開啟檔案檢視先前程序是否存在,否則啟動一個新的例項,命令大致為:docker-containerd -l unix:///var/run/docker/libcontainerd.sock --metrics-interval=0 --start-timeout 2m --state-dir /var/run/docker/libcontainerd/containerd --shim docker-containerd-shim --runtime docker-runc
寫入 pid 至檔案。setOOMScore 寫入 /proc/${pid}/oom_score_adj
func (r *remote) runContainerdDaemon() error { pidFilename := filepath.Join(r.stateDir, containerdPidFilename) f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600) if n > 0 { pid, err := strconv.ParseUint(string(b[:n]), 10, 64) if system.IsProcessAlive(int(pid)) { logrus.Infof("libcontainerd: previous instance of containerd still alive (%d)", pid) r.daemonPid = int(pid) return nil } } // Start a new instance args := []string{ "-l", fmt.Sprintf("unix://%s", r.rpcAddr), "--metrics-interval=0", "--start-timeout", "2m", "--state-dir", filepath.Join(r.stateDir, containerdStateDir), } ...... cmd := exec.Command(containerdBinary, args...) if err := cmd.Start(); err != nil { r.daemonPid = cmd.Process.Pid return nil }
1.8.1.1 執行可以檢視 ps axf | grep docker,啟動程序 docker-containerd
1.8.2 啟動完 docker-containerd 程序,使用 rpc 連線到這個程序,
func New(stateDir string, options ...RemoteOption) (_ Remote, err error) { dialOpts := []grpc.DialOption{ grpc.WithInsecure(), grpc.WithBackoffMaxDelay(2 * time.Second), grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) { return net.DialTimeout("unix", addr, timeout) }), } conn, err := grpc.Dial(r.rpcAddr, dialOpts...) r.rpcConn = conn r.apiClient = containerd.NewAPIClient(conn) // Get the timestamp to restore from t := r.getLastEventTimestamp() tsp, err := ptypes.TimestampProto(t) r.restoreFromTimestamp = tsp go r.handleConnectionChange() if err := r.startEventsMonitor(); err != nil { return r, nil }handleConnectionChange 啟動 for 迴圈,500 毫秒一次進行檢查。
1.9 daemon.NewDaemon 放入第二章節詳解
d, err := daemon.NewDaemon(cli.Config, registryService, containerdRemote, pluginStore) if err != nil { return fmt.Errorf("Error starting daemon: %v", err) }
1.10 initRouter(routerOptions),路由有 checkoutpoint,container,image,volume 等。POST DELETE GET 操作
func initRouter(opts routerOptions) { decoder := runconfig.ContainerDecoder{} routers := []router.Router{ // we need to add the checkpoint router before the container router or the DELETE gets masked checkpointrouter.NewRouter(opts.daemon, decoder), container.NewRouter(opts.daemon, decoder), image.NewRouter(opts.daemon, decoder), systemrouter.NewRouter(opts.daemon, opts.cluster, opts.buildCache), volume.NewRouter(opts.daemon), build.NewRouter(opts.buildBackend, opts.daemon), sessionrouter.NewRouter(opts.sessionManager), swarmrouter.NewRouter(opts.cluster), pluginrouter.NewRouter(opts.daemon.PluginManager()), distributionrouter.NewRouter(opts.daemon), } if opts.daemon.NetworkControllerEnabled() { routers = append(routers, network.NewRouter(opts.daemon, opts.cluster)) } if opts.daemon.HasExperimental() { for _, r := range routers { for _, route := range r.Routes() { if experimental, ok := route.(router.ExperimentalRoute); ok { experimental.Enable() } } } } opts.api.InitRouter(routers...) }1.10.1 initRouter 主要的是 InitRouter 函式實用的是如下:
func (s *Server) InitRouter(routers ...router.Router) { s.routers = append(s.routers, routers...) m := s.createMux() s.routerSwapper = &routerSwapper{ router: m, } } // createMux initializes the main router the server uses. func (s *Server) createMux() *mux.Router { m := mux.NewRouter() for _, apiRouter := range s.routers { for _, r := range apiRouter.Routes() { f := s.makeHTTPHandler(r.Handler()) m.Path(versionMatcher + r.Path()).Methods(r.Method()).Handler(f) m.Path(r.Path()).Methods(r.Method()).Handler(f) } } debugRouter := debug.NewRouter() s.routers = append(s.routers, debugRouter) for _, r := range debugRouter.Routes() { f := s.makeHTTPHandler(r.Handler()) m.Path("/debug" + r.Path()).Handler(f) } err := errors.NewRequestNotFoundError(fmt.Errorf("page not found")) notFoundHandler := httputils.MakeErrorHandler(err) m.HandleFunc(versionMatcher+"/{path:.*}", notFoundHandler) m.NotFoundHandler = notFoundHandler return m }
1.11 cli.api.Wait 等待請求在 /var/run/docker.sock
func(s *Server) Wait(waitChan chan error) { if err := s.serveAPI(); err != nil { logrus.Errorf("ServeAPI error: %v", err) waitChan <- err return} waitChan <- nil }
docker 啟動總結:
大部分是引數的檢查
啟動 API server 監聽請求
建立 API 路由
啟動程序 libcontainerd,
映象之間關係等
二. NewDaemon 函式原始碼分析
位置:daemon/daemon.gosetDefaultMtu(config)
2.1 設定 MTU。若 config 中有值則使用 config 中的,否則設定為預設的 1500 funcverifyDaemonSettings(conf *config.Config) error { // Check for mutually incompatible config options if conf.BridgeConfig.Iface != "" && conf.BridgeConfig.IP != "" { return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one") } if !conf.BridgeConfig.EnableIPTables && !conf.BridgeConfig.InterContainerCommunication { return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true") } if !conf.BridgeConfig.EnableIPTables && conf.BridgeConfig.EnableIPMasq { conf.BridgeConfig.EnableIPMasq = false } if err := VerifyCgroupDriver(conf); err != nil { return err } if conf.CgroupParent != "" && UsingSystemd(conf) { if len(conf.CgroupParent) <= 6 || !strings.HasSuffix(conf.CgroupParent, ".slice") { return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"") } } if conf.DefaultRuntime == "" { conf.DefaultRuntime = config.StockRuntimeName } if conf.Runtimes == nil { conf.Runtimes = make(map[string]types.Runtime) } conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary} return nil }
2.2 驗證 daemon 的配置
- 不能同時指定網橋和網橋的 IP。如果指定網橋,應使用網橋的當前IP地址,不能再設定 IP 地址。
- 不能同時禁用 iptable 和 icc(容器間通訊)。如果禁用 icc,docker 會在宿主機的 iptables 的 FORWARD chain 中新增一條 docker 容器間流量均 DROP 的規則,此時設定 EnableIPTables 為f alse,衝突!!!
funcisBridgeNetworkDisabled(conf *config.Config) bool { return conf.BridgeConfig.Iface == config.DisableNetworkBridge }
2.3 設定是否啟用網橋。disableNetworkBridge 為 none,Iface 為空,所以 DisableBridge 為false
if !platformSupported { return nil, errSystemNotSupported } if err := checkSystem(); err != nil { return nil, err }2.4 檢查系統支援和使用者許可權。需要 root 許可權,linux 中 uid = 0 為 root 使用者,檢查核心版本
uidMaps, gidMaps, err := setupRemappedRoot(config) rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)2.5 docker 支援 user namespace,主要隔離安全相關 identifiers 和 attributes,包括使用者 ID、使用者組 ID、root 目錄、key 以及特殊許可權。普通使用者的程序通過 clone() 建立的新程序在新 user namespace 中擁有不同的使用者和使用者組,這意味著一個程序在容器外屬於一個沒有特權的普通使用者,但是建立的容器程序卻屬於擁有所有許可權的超級使用者。
// setupDaemonProcess sets various settings for the daemon's process func setupDaemonProcess(config *config.Config) error { // setup the daemons oom_score_adj return setupOOMScoreAdj(config.OOMScoreAdjust) }2.6 設定OOM killer值。oom killer設定/proc/self/oom_score_adj,值的範圍為–17~15。正值易被OOM Killer選定。如果設定為–17,表示禁止被kill掉
tmp, err := prepareTempDir(config.Root, rootIDs) realTmp, err := getRealPath(tmp) os.Setenv("TMPDIR", realTmp)2.7 prepareTempDir 設定 tmp dir。如有環境變數 DOCKER_TMPDIR,則使用這個,否則建立 /var/lib/docker/tmp,將舊的變為 /var/lib/docker/tmp-old。getRealPath 然後建立一個指向tmp檔案的符號連結realTmp,並把 realTmp 賦值給環境變數TMPDIR
// configureMaxThreads sets the Go runtime max threads threshold // which is 90% of the kernel setting from /proc/sys/kernel/threads-max func configureMaxThreads(config *config.Config) error { mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max") if err != nil { return err } mtint, err := strconv.Atoi(strings.TrimSpace(string(mt))) if err != nil { return err } maxThreads := (mtint / 100) * 90 debug.SetMaxThreads(maxThreads) logrus.Debugf("Golang's threads limit set to %d", maxThreads) return nil }2.8 設定最大執行緒數。從 /proc/sys/kernel/threads-max 檔案讀取值,然後乘以0.9
daemonRepo := filepath.Join(config.Root, "containers") if err := idtools.MkdirAllAs(daemonRepo, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) { return nil, err }2.9 建立容器目錄,位於 docker daemon 目錄下的 containers。daemon 建立容器會把容器的元資料資訊放此
driverName := os.Getenv("DOCKER_DRIVER") if driverName == "" { driverName = config.GraphDriver }
d.stores[runtime.GOOS] = daemonStore{graphDriver: driverName}
d.RegistryService = registryServiced.PluginStore = pluginStore// Plugin system initialization should happen before restore. Do not change order.d.pluginManager, err = plugin.NewManager(plugin.ManagerConfig{ Root: filepath.Join(config.Root, "plugins"),ExecRoot: getPluginExecRoot(config.Root),Store: d.PluginStore,Executor: containerdRemote,RegistryService: registryService,LiveRestoreEnabled: config.LiveRestoreEnabled,LogPluginEvent: d.LogPluginEvent, // todo: make privateAuthzMiddleware: config.AuthzMiddleware,})if err != nil { return nil, errors.Wrap(err, "couldn't create plugin manager")}d.layerStore, err = layer.NewStoreFromOptions(layer.StoreOptions{ StorePath: config.Root,MetadataStorePathTemplate: filepath.Join(config.Root, "image", "%s", "layerdb"),GraphDriver: driverName,GraphDriverOptions: config.GraphOptions,UIDMaps: uidMaps,GIDMaps: gidMaps,PluginGetter: d.PluginStore,ExperimentalEnabled: config.Experimental,})if err != nil { return nil, err} 2.10 填充 daemon 結構體
// Plugin system initialization should happen before restore. Do not change order. d.pluginManager, err = plugin.NewManager(plugin.ManagerConfig{ Root: filepath.Join(config.Root, "plugins"), ExecRoot: getPluginExecRoot(config.Root), Store: d.PluginStore, Executor: containerdRemote, RegistryService: registryService, LiveRestoreEnabled: config.LiveRestoreEnabled, LogPluginEvent: d.LogPluginEvent