iptables和netfilter的通訊流程

阿新 • • 發佈：2018-12-28

iptables和netfilter通訊採用的是setsockopt和getsockopt函式

一、使用者態iptables程式碼

前面部落格文章 https://blog.csdn.net/haolipengzhanshen/article/details/84888489

我們分析了iptables的主流程，相信大家會熟悉下面的程式碼

ret = do_command4(argc, argv, &table, &handle, false);
   if (ret) {
       ret = iptc_commit(handle);
       iptc_free(handle);
   }

iptables使用do_command4函式逐個解析完iptables命令列引數後，呼叫iptc_commit函式提交iptables命令規則

查詢iptc_commit函式的實現，僅僅找到了 #define TC_COMMIT iptc_commit

繼續找TC_COMMIT函式的程式碼實現，定位到libiptc目錄下的libiptc.c檔案

在TC_COMMIT(struct xtc_handle *handle)函式的中間位置呼叫了setsockeopt將iptables規則傳遞給核心模組

ret = setsockopt(handle->sockfd, TC_IPPROTO, SO_SET_REPLACE, repl,sizeof(*repl) + repl->size);

在TC_COMMIT(struct xtc_handle *handle)函式的末尾呼叫了setsockopt用於計數功能

ret = setsockopt(handle->sockfd, TC_IPPROTO, SO_SET_ADD_COUNTERS,newcounters, counterlen);

按照我之前的猜想，對iptables規則的操作有新增和刪除，那麼命令起碼有SO_SET_ADD和SO_SET_DEL

但是實際情況是，只有SO_SET_REPLACE命令，難道它可以替代新增和刪除？為什麼哦？

兩次呼叫setsockopt函式的區別是SO_SET_REPLACE和SO_SET_ADD_COUNTERS命令的區別

前者是新增規則，後者是增加計數，因為我們研究的是新增規則，

#define SO_SET_REPLACE IPT_SO_SET_REPLACE

所以我們探索下IPT_SO_SET_REPLACE是如何處理的

二、核心態netfilter程式碼

在netfilter程式碼中查詢IPT_SO_SET_REPLACE的引用位置

定位到do_ipt_set_ctl函式

static int
do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
	int ret;

	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
		return -EPERM;

	switch (cmd) {
	case IPT_SO_SET_REPLACE:
		ret = do_replace(sock_net(sk), user, len);
		break;

	case IPT_SO_SET_ADD_COUNTERS:
		ret = do_add_counters(sock_net(sk), user, len, 0);
		break;

	default:
		duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
		ret = -EINVAL;
	}

	return ret;
}

IPT_SO_SET_REPLACE命令的響應函式是do_replace函式

static int
do_replace(struct net *net, const void __user *user, unsigned int len)
{
	int ret;
	struct ipt_replace tmp;
	struct xt_table_info *newinfo;
	void *loc_cpu_entry;
	struct ipt_entry *iter;

	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
		return -EFAULT;

	/* overflow check */
	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
		return -ENOMEM;
	tmp.name[sizeof(tmp.name)-1] = 0;

	newinfo = xt_alloc_table_info(tmp.size);
	if (!newinfo)
		return -ENOMEM;

	/* choose the copy that is on our node/cpu */
	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
			   tmp.size) != 0) {
		ret = -EFAULT;
		goto free_newinfo;
	}

	ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
	if (ret != 0)
		goto free_newinfo;

	duprintf("Translated table\n");

	ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
			   tmp.num_counters, tmp.counters);
	if (ret)
		goto free_newinfo_untrans;
	return 0;

 free_newinfo_untrans:
	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
		cleanup_entry(iter, net);
 free_newinfo:
	xt_free_table_info(newinfo);
	return ret;
}

copy_from_user將使用者空間資料拷貝到核心態

copy_to_user是將核心態資料拷貝給使用者態

呼叫__do_replace函式，並高亮newinfo變數，newinfo是新增的iptables規則，必然要對其進行替換操作

定位到net/netfilter/ipv4目錄下的ip_tables.c檔案

static int
__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
	     struct xt_table_info *newinfo, unsigned int num_counters,
	     void __user *counters_ptr)
{
	int ret;
	struct xt_table *t;
	struct xt_table_info *oldinfo;
	struct xt_counters *counters;
	void *loc_cpu_old_entry;
	struct ipt_entry *iter;

	ret = 0;
	counters = vzalloc(num_counters * sizeof(struct xt_counters));
	if (!counters) {
		ret = -ENOMEM;
		goto out;
	}

	t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
				    "iptable_%s", name);
	if (IS_ERR_OR_NULL(t)) {
		ret = t ? PTR_ERR(t) : -ENOENT;
		goto free_newinfo_counters_untrans;
	}

	/* You lied! */
	if (valid_hooks != t->valid_hooks) {
		duprintf("Valid hook crap: %08X vs %08X\n",
			 valid_hooks, t->valid_hooks);
		ret = -EINVAL;
		goto put_module;
	}

	oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
	if (!oldinfo)
		goto put_module;

	/* Update module usage count based on number of rules */
	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
		oldinfo->number, oldinfo->initial_entries, newinfo->number);
	if ((oldinfo->number > oldinfo->initial_entries) ||
	    (newinfo->number <= oldinfo->initial_entries))
		module_put(t->me);
	if ((oldinfo->number > oldinfo->initial_entries) &&
	    (newinfo->number <= oldinfo->initial_entries))
		module_put(t->me);

	/* Get the old counters, and synchronize with replace */
	get_counters(oldinfo, counters);

	/* Decrease module usage counts and free resource */
	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
		cleanup_entry(iter, net);

	xt_free_table_info(oldinfo);
	if (copy_to_user(counters_ptr, counters,
			 sizeof(struct xt_counters) * num_counters) != 0)
		ret = -EFAULT;
	vfree(counters);
	xt_table_unlock(t);
	return ret;

 put_module:
	module_put(t->me);
	xt_table_unlock(t);
 free_newinfo_counters_untrans:
	vfree(counters);
 out:
	return ret;
}

1、vzalloc為計數器申請記憶體空間

2、try_then_request_module會檢查模組是否已經存在，如果不存在則使用request_module（mod）載入模組。核心模組引用核心模組，一般使用request_module（mod）

3、oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);

使用xt_replace_table函式將舊內容替換成新內容newinfo結構體，這裡是核心哦，劃重點了哈哈哈

4、將計數器指標提供給使用者態訪問

if (copy_to_user(counters_ptr, counters,sizeof(struct xt_counters) * num_counters) != 0)

三、核心態netfilter和使用者態iptables的通訊結構體

netfilter是如何接收到iptables傳遞的規則引數呢？
.match = set_match_v0
之前在netfilter核心模組處，match回撥函式被註冊為set_match_v0()

static bool
set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v0 *info = par->matchinfo;
ADT_OPT(opt, par->family, info->match_set.u.compat.dim,
info->match_set.u.compat.flags, 0, UINT_MAX);

return match_set(info->match_set.index, skb, par, &opt,
info->match_set.u.compat.flags & IPSET_INV_MATCH);
}
從set_match_v0的xt_action_param結構體指標中獲取使用者態程式設定的iptables規則，真的是這樣嗎？

我們只要找到match回撥函式的呼叫位置，然後是如何給match回撥函式傳引數的，就能驗證我的猜想是否正確。

經過一段時間的尋找，終於在net/ipv4/netfilter目錄下的ip_tables.c檔案中的ipt_do_table函式中，找到了呼叫match函式的地方。

unsigned int
ipt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
struct xt_table *table)
{
//程式碼省略

/* Initialization */
ip = ip_hdr(skb);
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;

acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
acpar.thoff = ip_hdrlen(skb);
acpar.hotdrop = false;
acpar.in = in;
acpar.out = out;
acpar.family = NFPROTO_IPV4;
acpar.hooknum = hook;

IP_NF_ASSERT(table->valid_hooks & (1 << hook));
local_bh_disable();
addend = xt_write_recseq_begin();
private = table->private;
cpu = smp_processor_id();
table_base = private->entries[cpu];
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
stackptr = per_cpu_ptr(private->stackptr, cpu);
origptr = *stackptr;

e = get_entry(table_base, private->hook_entry[hook]);

do {
//對應iptables規則中的taget和match規則
const struct xt_entry_target *t;
const struct xt_entry_match *ematch;

IP_NF_ASSERT(e);
if (!ip_packet_match(ip, indev, outdev,
&e->ip, acpar.fragoff)) {
no_match:
e = ipt_next_entry(e);
continue;
}

xt_ematch_foreach(ematch, e) {
acpar.match = ematch->u.kernel.match;
acpar.matchinfo = ematch->data;
//match函式被呼叫位置！！！
if (!acpar.match->match(skb, &acpar))
goto no_match;
}

ipt_do_table函式中一直在給struct xt_action_param *型別的acpar結構體的各個成員不斷賦值，然後呼叫match函式時，作為引數傳入函式，其中acpar.matchinfo = ematch->data，acpar.matchinfo之中儲存的就是使用者態的規則資料

正印證了我的猜想：netfilter從xt_action_param結構體獲取iptables中已經設定的規則

結論：netfilter核心模組和iptables使用者態之間通訊的結構體是 struct xt_action_param結構體,規則內容儲存在par->matchinfo成員變數中

參考連結

copy_from_user和copy_to_user函式使用

https://blog.csdn.net/ce123_zhouwei/article/details/8454226

iptables和netfilter的通訊流程

一、使用者態iptables程式碼

二、核心態netfilter程式碼

三、核心態netfilter和使用者態iptables的通訊結構體

iptables和netfilter的通訊流程

IPTables 和 Netfilter 框架

iptables和netfilter

Linux網絡相關，firewalld和netfilter，netfilter5表5鏈介紹，iptables語法

firewalld和netfilter、netfilter5表5鏈介紹、iptables語法

2018.5.8 七周三次課（firewalld和netfilter，iptables語法）

網絡相關、firewalld和netfilter、netfilter5表5鏈介紹、iptables語

Linux 網路程式設計——內網和外網之間的通訊流程

（C#）TCP與UDP通訊流程、Tcplistener和Tcpclient進行通訊、UdpClient進行通訊

Redis原始碼剖析和註釋（二十五）--- Redis Cluster 的通訊流程深入剖析（載入配置檔案、節點握手、分配槽）

Linux網路相關的命令、firewalld和netfilter、netfilter5表5鏈介紹、iptables語法

Spark原始碼學習（二）---Master和Worker的啟動以及Actor通訊流程

七週第三次課 2017.11.29 Linux網路相關、firewalld和netfilter、netfilter5表5鏈介紹、iptables語法

資料在同網段和跨網段通訊流程

Kafka網路模型和通訊流程剖析

qt-qml移動開發之在ios上開發和部署app流程簡單介紹

總賬和日記賬生成流程

問題：如何讓業務對象和對應的流程關聯？

【Linux 驅動】Netfilter/iptables (八) Netfilter的NAT機制

USB HID通訊流程

iptables和netfilter的通訊流程

一、使用者態iptables程式碼

二、核心態netfilter程式碼

三、核心態netfilter和使用者態iptables的通訊結構體

相關推薦