深入理解SpringCloud之Eureka註冊過程分析
eureka是一種去中心化的服務治理應用,其顯著特點是既可以作為服務端又可以作為服務向自己配置的地址進行註冊。那麽這篇文章就來探討一下eureka的註冊流程。
一、Eureka的服務端
eureka的服務端核心類是EurekaBootstrap,該類實現了一個ServletContextListener的監聽器。因此我們可以斷定eureka是基於servlet容器實現的。關鍵代碼如下:
public class EurekaBootStrap implements ServletContextListener { //...省略相關代碼 /** * Initializes Eureka, including syncing up with other Eureka peers and publishing the registry. * *@see * javax.servlet.ServletContextListener#contextInitialized(javax.servlet.ServletContextEvent) */ @Override public void contextInitialized(ServletContextEvent event) { try { initEurekaEnvironment(); initEurekaServerContext(); ServletContext sc= event.getServletContext(); sc.setAttribute(EurekaServerContext.class.getName(), serverContext); } catch (Throwable e) { logger.error("Cannot bootstrap eureka server :", e); throw new RuntimeException("Cannot bootstrap eureka server :", e); } }//省略相關代碼..... }
我們可以看到在ServletContext初始化完成時,會初始化Eureka環境,然後初始化EurekaServerContext,那麽我們在看一看initEurekaServerContext方法:
/** * init hook for server context. Override for custom logic. */ protected void initEurekaServerContext() throws Exception { // ..... ApplicationInfoManager applicationInfoManager = null; if (eurekaClient == null) { EurekaInstanceConfig instanceConfig = isCloud(ConfigurationManager.getDeploymentContext()) ? new CloudInstanceConfig() : new MyDataCenterInstanceConfig(); applicationInfoManager = new ApplicationInfoManager( instanceConfig, new EurekaConfigBasedInstanceInfoProvider(instanceConfig).get()); EurekaClientConfig eurekaClientConfig = new DefaultEurekaClientConfig(); eurekaClient = new DiscoveryClient(applicationInfoManager, eurekaClientConfig); } else { applicationInfoManager = eurekaClient.getApplicationInfoManager(); } PeerAwareInstanceRegistry registry; if (isAws(applicationInfoManager.getInfo())) { registry = new AwsInstanceRegistry( eurekaServerConfig, eurekaClient.getEurekaClientConfig(), serverCodecs, eurekaClient ); awsBinder = new AwsBinderDelegate(eurekaServerConfig, eurekaClient.getEurekaClientConfig(), registry, applicationInfoManager); awsBinder.start(); } else { registry = new PeerAwareInstanceRegistryImpl( eurekaServerConfig, eurekaClient.getEurekaClientConfig(), serverCodecs, eurekaClient ); } //....省略部分代碼 }
在這個方法裏會創建許多與eureka服務相關的對象,在這裏我列舉了兩個核心對象分別是eurekaClient與PeerAwareInstanceRegistry,關於客戶端部分我們等會再說,我們現在來看看PeerAwareInstanceRegistry到底是做什麽用的,這裏我寫貼出關於這個類的類圖:
根據類圖我們可以清晰的發現PeerAwareInstanceRegistry的最頂層接口為LeaseManager與LookupService,其中LookupService定義了最基本的發現示例的行為而LeaseManager定義了處理客戶端註冊,續約,註銷等操作。那麽在這篇文章我們還是重點關註一下LeaseManager的相關接口的實現。回過頭來我們在看PeerAwareInstanceRegistry,其實這個類用於多個節點下復制相關信息,比如說一個節點註冊續約與下線那麽通過這個類將會相關復制(通知)到各個節點。我們來看看它是怎麽處理客戶端註冊的:
/** * Registers the information about the {@link InstanceInfo} and replicates * this information to all peer eureka nodes. If this is replication event * from other replica nodes then it is not replicated. * * @param info * the {@link InstanceInfo} to be registered and replicated. * @param isReplication * true if this is a replication event from other replica nodes, * false otherwise. */ @Override public void register(final InstanceInfo info, final boolean isReplication) { int leaseDuration = Lease.DEFAULT_DURATION_IN_SECS; if (info.getLeaseInfo() != null && info.getLeaseInfo().getDurationInSecs() > 0) { leaseDuration = info.getLeaseInfo().getDurationInSecs(); } super.register(info, leaseDuration, isReplication); replicateToPeers(Action.Register, info.getAppName(), info.getId(), info, null, isReplication); }
我們可以看到它調用了父類的register方法後又通過replicateToPeers復制對應的行為到其他節點,具體如何復制的先不在這裏討論,我們重點來看看註冊方法,我們在父類裏找到register()方法:
/** * Registers a new instance with a given duration. * * @see com.netflix.eureka.lease.LeaseManager#register(java.lang.Object, int, boolean) */ public void register(InstanceInfo registrant, int leaseDuration, boolean isReplication) { try { read.lock(); Map<String, Lease<InstanceInfo>> gMap = registry.get(registrant.getAppName()); REGISTER.increment(isReplication); if (gMap == null) { final ConcurrentHashMap<String, Lease<InstanceInfo>> gNewMap = new ConcurrentHashMap<String, Lease<InstanceInfo>>(); gMap = registry.putIfAbsent(registrant.getAppName(), gNewMap); if (gMap == null) { gMap = gNewMap; } } Lease<InstanceInfo> existingLease = gMap.get(registrant.getId()); // Retain the last dirty timestamp without overwriting it, if there is already a lease if (existingLease != null && (existingLease.getHolder() != null)) { Long existingLastDirtyTimestamp = existingLease.getHolder().getLastDirtyTimestamp(); Long registrationLastDirtyTimestamp = registrant.getLastDirtyTimestamp(); logger.debug("Existing lease found (existing={}, provided={}", existingLastDirtyTimestamp, registrationLastDirtyTimestamp); // this is a > instead of a >= because if the timestamps are equal, we still take the remote transmitted // InstanceInfo instead of the server local copy. if (existingLastDirtyTimestamp > registrationLastDirtyTimestamp) { logger.warn("There is an existing lease and the existing lease‘s dirty timestamp {} is greater" + " than the one that is being registered {}", existingLastDirtyTimestamp, registrationLastDirtyTimestamp); logger.warn("Using the existing instanceInfo instead of the new instanceInfo as the registrant"); registrant = existingLease.getHolder(); } } else { // The lease does not exist and hence it is a new registration synchronized (lock) { if (this.expectedNumberOfRenewsPerMin > 0) { // Since the client wants to cancel it, reduce the threshold // (1 // for 30 seconds, 2 for a minute) this.expectedNumberOfRenewsPerMin = this.expectedNumberOfRenewsPerMin + 2; this.numberOfRenewsPerMinThreshold = (int) (this.expectedNumberOfRenewsPerMin * serverConfig.getRenewalPercentThreshold()); } } logger.debug("No previous lease information found; it is new registration"); } Lease<InstanceInfo> lease = new Lease<InstanceInfo>(registrant, leaseDuration); if (existingLease != null) { lease.setServiceUpTimestamp(existingLease.getServiceUpTimestamp()); } gMap.put(registrant.getId(), lease); //。。。省略部分代碼 }
通過源代碼,我們來簡要梳理一下流程:
1)首先根據appName獲取一些列的服務實例對象,如果為Null,則新創建一個map並把當前的註冊應用程序信息添加到此Map當中,這裏有一個Lease對象,這個類描述了泛型T的時間屬性,比如說註冊時間,服務啟動時間,最後更新時間等,大家可以關註一下它的實現:
/* * Copyright 2012 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.netflix.eureka.lease; import com.netflix.eureka.registry.AbstractInstanceRegistry; /** * Describes a time-based availability of a {@link T}. Purpose is to avoid * accumulation of instances in {@link AbstractInstanceRegistry} as result of ungraceful * shutdowns that is not uncommon in AWS environments. * * If a lease elapses without renewals, it will eventually expire consequently * marking the associated {@link T} for immediate eviction - this is similar to * an explicit cancellation except that there is no communication between the * {@link T} and {@link LeaseManager}. * * @author Karthik Ranganathan, Greg Kim */ public class Lease<T> { enum Action { Register, Cancel, Renew }; public static final int DEFAULT_DURATION_IN_SECS = 90; private T holder; private long evictionTimestamp; private long registrationTimestamp; private long serviceUpTimestamp; // Make it volatile so that the expiration task would see this quicker private volatile long lastUpdateTimestamp; private long duration; public Lease(T r, int durationInSecs) { holder = r; registrationTimestamp = System.currentTimeMillis(); lastUpdateTimestamp = registrationTimestamp; duration = (durationInSecs * 1000); } /** * Renew the lease, use renewal duration if it was specified by the * associated {@link T} during registration, otherwise default duration is * {@link #DEFAULT_DURATION_IN_SECS}. */ public void renew() { lastUpdateTimestamp = System.currentTimeMillis() + duration; } /** * Cancels the lease by updating the eviction time. */ public void cancel() { if (evictionTimestamp <= 0) { evictionTimestamp = System.currentTimeMillis(); } } /** * Mark the service as up. This will only take affect the first time called, * subsequent calls will be ignored. */ public void serviceUp() { if (serviceUpTimestamp == 0) { serviceUpTimestamp = System.currentTimeMillis(); } } /** * Set the leases service UP timestamp. */ public void setServiceUpTimestamp(long serviceUpTimestamp) { this.serviceUpTimestamp = serviceUpTimestamp; } /** * Checks if the lease of a given {@link com.netflix.appinfo.InstanceInfo} has expired or not. */ public boolean isExpired() { return isExpired(0l); } /** * Checks if the lease of a given {@link com.netflix.appinfo.InstanceInfo} has expired or not. * * Note that due to renew() doing the ‘wrong" thing and setting lastUpdateTimestamp to +duration more than * what it should be, the expiry will actually be 2 * duration. This is a minor bug and should only affect * instances that ungracefully shutdown. Due to possible wide ranging impact to existing usage, this will * not be fixed. * * @param additionalLeaseMs any additional lease time to add to the lease evaluation in ms. */ public boolean isExpired(long additionalLeaseMs) { return (evictionTimestamp > 0 || System.currentTimeMillis() > (lastUpdateTimestamp + duration + additionalLeaseMs)); } /** * Gets the milliseconds since epoch when the lease was registered. * * @return the milliseconds since epoch when the lease was registered. */ public long getRegistrationTimestamp() { return registrationTimestamp; } /** * Gets the milliseconds since epoch when the lease was last renewed. * Note that the value returned here is actually not the last lease renewal time but the renewal + duration. * * @return the milliseconds since epoch when the lease was last renewed. */ public long getLastRenewalTimestamp() { return lastUpdateTimestamp; } /** * Gets the milliseconds since epoch when the lease was evicted. * * @return the milliseconds since epoch when the lease was evicted. */ public long getEvictionTimestamp() { return evictionTimestamp; } /** * Gets the milliseconds since epoch when the service for the lease was marked as up. * * @return the milliseconds since epoch when the service for the lease was marked as up. */ public long getServiceUpTimestamp() { return serviceUpTimestamp; } /** * Returns the holder of the lease. */ public T getHolder() { return holder; } }View Code
2)根據當前註冊的ID,如果能在map中取到則做以下操作:
2.1)根據當前存在節點的觸碰時間和註冊節點的觸碰時間比較,如果前者的時間晚於後者的時間,那麽當前註冊的實例就以已存在的實例為準
2.2)否則更新其每分鐘期望的續約數量及其閾值
3)將當前的註冊節點存到map當中,至此我們的註冊過程基本告一段落了
二、eureka客戶端
在服務端servletContext初始化完畢時,會創建DiscoveryClient。熟悉eureka的朋友,一定熟悉這兩個屬性:fetchRegistry與registerWithEureka。在springcloud中集成eureka獨立模式運行時,如果這兩個值不為false,那麽啟動會報錯,為什麽會報錯呢?其實答案就在DiscoveryClient的構造函數中:
@Inject DiscoveryClient(ApplicationInfoManager applicationInfoManager, EurekaClientConfig config, AbstractDiscoveryClientOptionalArgs args, Provider<BackupRegistry> backupRegistryProvider) { //....省略部分代碼 if (!config.shouldRegisterWithEureka() && !config.shouldFetchRegistry()) { logger.info("Client configured to neither register nor query for data."); scheduler = null; heartbeatExecutor = null; cacheRefreshExecutor = null; eurekaTransport = null; instanceRegionChecker = new InstanceRegionChecker(new PropertyBasedAzToRegionMapper(config), clientConfig.getRegion()); // This is a bit of hack to allow for existing code using DiscoveryManager.getInstance() // to work with DI‘d DiscoveryClient DiscoveryManager.getInstance().setDiscoveryClient(this); DiscoveryManager.getInstance().setEurekaClientConfig(config); initTimestampMs = System.currentTimeMillis(); logger.info("Discovery Client initialized at timestamp {} with initial instances count: {}", initTimestampMs, this.getApplications().size()); return; // no need to setup up an network tasks and we are done } try { // default size of 2 - 1 each for heartbeat and cacheRefresh scheduler = Executors.newScheduledThreadPool(2, new ThreadFactoryBuilder() .setNameFormat("DiscoveryClient-%d") .setDaemon(true) .build()); heartbeatExecutor = new ThreadPoolExecutor( 1, clientConfig.getHeartbeatExecutorThreadPoolSize(), 0, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), new ThreadFactoryBuilder() .setNameFormat("DiscoveryClient-HeartbeatExecutor-%d") .setDaemon(true) .build() ); // use direct handoff cacheRefreshExecutor = new ThreadPoolExecutor( 1, clientConfig.getCacheRefreshExecutorThreadPoolSize(), 0, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), new ThreadFactoryBuilder() .setNameFormat("DiscoveryClient-CacheRefreshExecutor-%d") .setDaemon(true) .build() ); // use direct handoff eurekaTransport = new EurekaTransport(); scheduleServerEndpointTask(eurekaTransport, args); //....省略部分代碼 initScheduledTasks(); //.... }
根據源代碼,我們可以得出以下結論:
1)如果shouldRegisterWithEureka與shouldFetchRegistry都為false,那麽直接return。
2)創建發送心跳與刷新緩存的線程池
3)初始化創建的定時任務
那麽我們在看看initScheduledTasks()方法裏有如下代碼:
// Heartbeat timer scheduler.schedule( new TimedSupervisorTask( "heartbeat", scheduler, heartbeatExecutor, renewalIntervalInSecs, TimeUnit.SECONDS, expBackOffBound, new HeartbeatThread() ), renewalIntervalInSecs, TimeUnit.SECONDS);
此處是觸發一個定時執行的線程,以秒為單位,根據renewalIntervalInSecs值定時執行發送心跳,HeartbeatThread線程執行如下:
/** * The heartbeat task that renews the lease in the given intervals. */ private class HeartbeatThread implements Runnable { public void run() { if (renew()) { lastSuccessfulHeartbeatTimestamp = System.currentTimeMillis(); } } }
我們可以看到run方法裏很簡單執行renew方法,如果成功記錄一下時間。renew方法:
/** * Renew with the eureka service by making the appropriate REST call */ boolean renew() { EurekaHttpResponse<InstanceInfo> httpResponse; try { httpResponse = eurekaTransport.registrationClient.sendHeartBeat(instanceInfo.getAppName(), instanceInfo.getId(), instanceInfo, null); logger.debug("{} - Heartbeat status: {}", PREFIX + appPathIdentifier, httpResponse.getStatusCode()); if (httpResponse.getStatusCode() == 404) { REREGISTER_COUNTER.increment(); logger.info("{} - Re-registering apps/{}", PREFIX + appPathIdentifier, instanceInfo.getAppName()); long timestamp = instanceInfo.setIsDirtyWithTime(); boolean success = register(); if (success) { instanceInfo.unsetIsDirty(timestamp); } return success; } return httpResponse.getStatusCode() == 200; } catch (Throwable e) { logger.error("{} - was unable to send heartbeat!", PREFIX + appPathIdentifier, e); return false; } }
在這裏發送心跳如果返回的是404,那麽會執行註冊操作,註意我們根據返回值httpResponse可以斷定這一切的操作都是基於http請求的,到底是不是呢?我們繼續看一下register方法:
/** * Register with the eureka service by making the appropriate REST call. */ boolean register() throws Throwable { logger.info(PREFIX + appPathIdentifier + ": registering service..."); EurekaHttpResponse<Void> httpResponse; try { httpResponse = eurekaTransport.registrationClient.register(instanceInfo); } catch (Exception e) { logger.warn("{} - registration failed {}", PREFIX + appPathIdentifier, e.getMessage(), e); throw e; } if (logger.isInfoEnabled()) { logger.info("{} - registration status: {}", PREFIX + appPathIdentifier, httpResponse.getStatusCode()); } return httpResponse.getStatusCode() == 204; }
在這裏又調用了eurekaTransport裏registrationClient的方法:
private static final class EurekaTransport { private ClosableResolver bootstrapResolver; private TransportClientFactory transportClientFactory; private EurekaHttpClient registrationClient; private EurekaHttpClientFactory registrationClientFactory; private EurekaHttpClient queryClient; private EurekaHttpClientFactory queryClientFactory; void shutdown() { if (registrationClientFactory != null) { registrationClientFactory.shutdown(); } if (queryClientFactory != null) { queryClientFactory.shutdown(); } if (registrationClient != null) { registrationClient.shutdown(); } if (queryClient != null) { queryClient.shutdown(); } if (transportClientFactory != null) { transportClientFactory.shutdown(); } if (bootstrapResolver != null) { bootstrapResolver.shutdown(); } } }
在這裏我們可以看到,eureka的客戶端是使用http請求進行註冊服務的,也就是說當我們創建DiscoveryClient就會向服務端進行實例的註冊。
三、服務端提供的rest服務
服務端提供用於處理客戶端註冊請求的代碼我們已經看過了,既然客戶端是通過走HTTP協議進行註冊的,那服務端總要有處理這個http請求的地址吧,其實eureka服務端是采用jax-rs標準提供rest方式進行暴露服務的,我們可以看一下這個類ApplicationResoure的addInstance方法:
/** * Registers information about a particular instance for an * {@link com.netflix.discovery.shared.Application}. * * @param info * {@link InstanceInfo} information of the instance. * @param isReplication * a header parameter containing information whether this is * replicated from other nodes. */ @POST @Consumes({"application/json", "application/xml"}) public Response addInstance(InstanceInfo info, @HeaderParam(PeerEurekaNode.HEADER_REPLICATION) String isReplication) { logger.debug("Registering instance {} (replication={})", info.getId(), isReplication); // validate that the instanceinfo contains all the necessary required fields if (isBlank(info.getId())) { return Response.status(400).entity("Missing instanceId").build(); } else if (isBlank(info.getHostName())) { return Response.status(400).entity("Missing hostname").build(); } else if (isBlank(info.getIPAddr())) { return Response.status(400).entity("Missing ip address").build(); } else if (isBlank(info.getAppName())) { return Response.status(400).entity("Missing appName").build(); } else if (!appName.equals(info.getAppName())) { return Response.status(400).entity("Mismatched appName, expecting " + appName + " but was " + info.getAppName()).build(); } else if (info.getDataCenterInfo() == null) { return Response.status(400).entity("Missing dataCenterInfo").build(); } else if (info.getDataCenterInfo().getName() == null) { return Response.status(400).entity("Missing dataCenterInfo Name").build(); } // handle cases where clients may be registering with bad DataCenterInfo with missing data DataCenterInfo dataCenterInfo = info.getDataCenterInfo(); if (dataCenterInfo instanceof UniqueIdentifier) { String dataCenterInfoId = ((UniqueIdentifier) dataCenterInfo).getId(); if (isBlank(dataCenterInfoId)) { boolean experimental = "true".equalsIgnoreCase(serverConfig.getExperimental("registration.validation.dataCenterInfoId")); if (experimental) { String entity = "DataCenterInfo of type " + dataCenterInfo.getClass() + " must contain a valid id"; return Response.status(400).entity(entity).build(); } else if (dataCenterInfo instanceof AmazonInfo) { AmazonInfo amazonInfo = (AmazonInfo) dataCenterInfo; String effectiveId = amazonInfo.get(AmazonInfo.MetaDataKey.instanceId); if (effectiveId == null) { amazonInfo.getMetadata().put(AmazonInfo.MetaDataKey.instanceId.getName(), info.getId()); } } else { logger.warn("Registering DataCenterInfo of type {} without an appropriate id", dataCenterInfo.getClass()); } } } registry.register(info, "true".equals(isReplication)); return Response.status(204).build(); // 204 to be backwards compatible }
深入理解SpringCloud之Eureka註冊過程分析