1. 程式人生 > >mysql分析(二)mysql語法分析

mysql分析(二)mysql語法分析

函數定義 mon sts ngs stop local don 字符 force

一、mysqld中對於SQL語句的分析
客戶端和服務器之間交互的時候,客戶端發送的同樣是字符串形式的查詢和執行命令,返回的特定格式的數據庫內容(?)。這個過程就需要在服務器端進行實時指令的翻譯,生成特定的查詢指令。在sqlite的實現中,sqlite定義了一個專用的虛擬機環境,和通常的真正的匯編指令相同,它有自己的指令集,有特定的指令格式和操作數,有自己的寄存器和內存內容,這個虛擬環境叫做vdbe(virtual database environment)。這一點其實並不奇怪,但是也並不是那麽簡單的,因為這是要設計出一種語言,這種語言要盡量的精簡,但是為了方便,也可以增加一些相對復雜的指令。想一下RISC和CISC體系結構的指令的區別就可以看出,其實設計一種語言並不是那麽簡單的。特別是我們最多使用的C語言,能夠使用和設計並實現一種語言是有天大區別的。
二、詞法分析
和很多的編譯器一樣,其詞法分析並沒有使用lex來生成詞法分析,而是自己定義了一個詞法分析器,其實現相對簡單一些,可能和SQL語法的語言本身比較簡單有關。其可以識別的單詞定義在lex.h文件的symbols數組中:
typedef struct st_symbol {
const char *name;
uint tok;
uint length;
struct st_sym_group *group;
} SYMBOL;
#define SYM(A) SYM_OR_NULL(A),0,&sym_group_common

/*
Symbols are broken into separated arrays to allow field names with
same name as functions.
These are kept sorted for human lookup (the symbols are hashed).

NOTE! The symbol tables should be the same regardless of what features
are compiled into the server. Don‘t add ifdef‘ed symbols to the
lists
*/

static SYMBOL symbols[] = {
{ "&&", SYM(AND_AND_SYM)},
{ "<", SYM(LT)},
{ "<=", SYM(LE)},
{ "<>", SYM(NE)},
{ "!=", SYM(NE)},
{ "=", SYM(EQ)},
{ ">", SYM(GT_SYM)},
{ ">=", SYM(GE)},
{ "<<", SYM(SHIFT_LEFT)},
{ ">>", SYM(SHIFT_RIGHT)},
{ "<=>", SYM(EQUAL_SYM)},
{ "ACCESSIBLE", SYM(ACCESSIBLE_SYM)},
{ "ACTION", SYM(ACTION)},
{ "ADD", SYM(ADD)},
{ "AFTER", SYM(AFTER_SYM)},
{ "AGAINST", SYM(AGAINST)},
{ "AGGREGATE", SYM(AGGREGATE_SYM)},
{ "ALL", SYM(ALL)},
{ "ALGORITHM", SYM(ALGORITHM_SYM)},
{ "ALTER", SYM(ALTER)},
{ "ANALYZE", SYM(ANALYZE_SYM)},
{ "AND", SYM(AND_SYM)},
{ "ANY", SYM(ANY_SYM)},
……
{ "XML", SYM(XML_SYM)}, /* LOAD XML Arnold/Erik */
{ "YEAR", SYM(YEAR_SYM)},
{ "YEAR_MONTH", SYM(YEAR_MONTH_SYM)},
{ "ZEROFILL", SYM(ZEROFILL)},
{ "||", SYM(OR_OR_SYM)}
};
然後在gen_lex_hash.cc中可能生成了一個動態文件,只是文件到底是幹啥的並不是很清楚,因為當時執行make的時候沒有重定向make的輸出,所以並不清楚這些文件是如何使用的,好在現在也不是很關心這個東西。裏面會動態生成一個get_hash_symbol函數以及一些簡單的表格,get_hash_symbol也是在真正的詞法分析中使用的一個函數。
三、語法分析
語法分析始終是一個比較復雜的東東,所以雖然很多人不願意,但是還是使用了yacc來進行語法分析,詞法分析文件是在sql_yacc.yy文件中實現的,相對該文件復雜一些。
insert:
INSERT
{
LEX *lex= Lex;
lex->sql_command= SQLCOM_INSERT;
lex->duplicates= DUP_ERROR;
mysql_init_select(lex);
}
insert_lock_option
opt_ignore insert2
{
Select->set_lock_for_tables($3);
Lex->current_select= &Lex->select_lex;
}
insert_field_spec opt_insert_update
{}
;
insert2:
INTO insert_table {} 這說明insert之後的into是可選的,有沒有相同的
| insert_table {}
;

insert_table:
table_name
{
LEX *lex=Lex;
lex->field_list.empty();
lex->many_values.empty();
lex->insert_list=0;
};

insert_field_spec:
insert_values {}
| ‘(‘ ‘)‘ insert_values {}
| ‘(‘ fields ‘)‘ insert_values {}
| SET
{
LEX *lex=Lex;
if (!(lex->insert_list = new List_item) ||
lex->many_values.push_back(lex->insert_list))
MYSQL_YYABORT;
}
ident_eq_list
;

fields:
fields ‘,‘ insert_ident { Lex->field_list.push_back($3); }
| insert_ident { Lex->field_list.push_back($1); }
;

insert_values:
VALUES values_list {}
| VALUE_SYM values_list {}
| create_select
{ Select->set_braces(0);}
union_clause {}
| ‘(‘ create_select ‘)‘
{ Select->set_braces(1);}
union_opt {}
;
……
opt_insert_update:
/* empty */
| ON DUPLICATE_SYM { Lex->duplicates= DUP_UPDATE; }
KEY_SYM UPDATE_SYM insert_update_list
;
values_list:
values_list ‘,‘ no_braces
| no_braces
;
no_braces:
‘(‘
{
if (!(Lex->insert_list = new List_item))
MYSQL_YYABORT;
}
opt_values ‘)‘
{
LEX *lex=Lex;
if (lex->many_values.push_back(lex->insert_list))
MYSQL_YYABORT;
}
;
opt_values:
/* empty */ {}
| values
;
values:
values ‘,‘ expr_or_default
{
if (Lex->insert_list->push_back($3))
MYSQL_YYABORT;
}
| expr_or_default
{
if (Lex->insert_list->push_back($1))
MYSQL_YYABORT;
}
;
expr_or_default:
expr { $$= $1;}
| DEFAULT
{
$$= new (YYTHD->mem_root) Item_default_value(Lex->current_context());
if ($$ == NULL)
MYSQL_YYABORT;
}
;
四、查詢指令開始
query:
END_OF_INPUT
{
THD *thd= YYTHD;
if (!thd->bootstrap &&
(!(thd->lex->select_lex.options & OPTION_FOUND_COMMENT)))
{
my_message(ER_EMPTY_QUERY, ER(ER_EMPTY_QUERY), MYF(0));
MYSQL_YYABORT;
}
thd->lex->sql_command= SQLCOM_EMPTY_QUERY;
YYLIP->found_semicolon= NULL;
}
| verb_clause
{
Lex_input_stream *lip = YYLIP;

if ((YYTHD->client_capabilities & CLIENT_MULTI_QUERIES) &&
lip->multi_statements &&
! lip->eof())
{
/*
We found a well formed query, and multi queries are allowed:
- force the parser to stop after the ‘;‘
- mark the start of the next query for the next invocation
of the parser.
*/
lip->next_state= MY_LEX_END;
lip->found_semicolon= lip->get_ptr();
}
else
{
/* Single query, terminated. */
lip->found_semicolon= NULL;
}
}
‘;‘
opt_end_of_input
| verb_clause END_OF_INPUT
{
/* Single query, not terminated. */
YYLIP->found_semicolon= NULL;
}
;

opt_end_of_input:
/* empty */
| END_OF_INPUT
;
這說明每次命令的查詢都是以一個分號或者是文件結束標誌的,這也說明這個語法分析是相對比較功能單一的,它一次只執行一個指令,對於一個
use mysql; select ;
句型的語句,mysql是分為兩次發送的,這樣的好處就是能夠及時發現錯誤。
(gdb) r -e "use mysql;select * from help_keyword limit 1;select * from help_topic limit 2"
The program being debugged has been started already.
Start it from the beginning? (y or n) y

Starting program: /usr/local/mysql/bin/mysql -e "use mysql;select * from help_keyword limit 1;select * from help_topic limit 2"
[Thread debugging using libthread_db enabled]
[New Thread 0xb7fe6b70 (LWP 9826)]
[Thread 0xb7fe6b70 (LWP 9826) exited]

Breakpoint 1, com_go (buffer=0x833c090, line=0x0)
at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:2967
2967 ulong timer, warnings= 0;
(gdb) p *buffer
$3 = {Ptr = 0x83441e8 "select * from help_keyword limit 1", str_length = 34,
Alloced_length = 520, alloced = true, str_charset = 0x80ceee0}
(gdb) c
Continuing.
+-----------------+------+
| help_keyword_id | name |
+-----------------+------+
| 0 | JOIN |
+-----------------+------+

Breakpoint 1, com_go (buffer=0x833c090, line=0x0)
at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:2967
2967 ulong timer, warnings= 0;
(gdb) p *buffer
$4 = {Ptr = 0x83441e8 "select * from help_topic limit 2\n", str_length = 32,
Alloced_length = 520, alloced = true, str_charset = 0x80ceee0}
(gdb) c
五、從開始到write調用鏈
Breakpoint 4, vio_write (vio=0x8344600, buf=0x8348798 "#", size=39)
at /home/tsecer/Downloads/mysql-5.5.28/vio/viosocket.c:127
127 r = write(vio->sd, buf, size);
Current language: auto
The current source language is "auto; currently c".
(gdb) p/50c buf
Item count other than 1 is meaningless in "print" command.
(gdb) x/50c buf
0x8348798: 35 ‘#‘ 0 ‘\000‘ 0 ‘\000‘ 0 ‘\000‘ 3 ‘\003‘ 115 ‘s‘ 101 ‘e‘ 108 ‘l‘
0x83487a0: 101 ‘e‘ 99 ‘c‘ 116 ‘t‘ 32 ‘ ‘ 42 ‘*‘ 32 ‘ ‘ 102 ‘f‘ 114 ‘r‘
0x83487a8: 111 ‘o‘ 109 ‘m‘ 32 ‘ ‘ 104 ‘h‘ 101 ‘e‘ 108 ‘l‘ 112 ‘p‘ 95 ‘_‘
0x83487b0: 107 ‘k‘ 101 ‘e‘ 121 ‘y‘ 119 ‘w‘ 111 ‘o‘ 114 ‘r‘ 100 ‘d‘ 32 ‘ ‘
0x83487b8: 108 ‘l‘ 105 ‘i‘ 109 ‘m‘ 105 ‘i‘ 116 ‘t‘ 32 ‘ ‘ 49 ‘1‘ 0 ‘\000‘
0x83487c0: 0 ‘\000‘ 0 ‘\000‘ 109 ‘m‘ 121 ‘y‘ 115 ‘s‘ 113 ‘q‘ 108 ‘l‘ 95 ‘_‘
0x83487c8: 110 ‘n‘ 97 ‘a‘
(gdb) bt
#0 vio_write (vio=0x8344600, buf=0x8348798 "#", size=39)
at /home/tsecer/Downloads/mysql-5.5.28/vio/viosocket.c:127
#1 0x08061800 in net_real_write (net=0x833bc60, packet=0x8348798 "#", len=39)
at /home/tsecer/Downloads/mysql-5.5.28/sql/net_serv.cc:632
#2 0x08061112 in net_flush (net=0x833bc60)
at /home/tsecer/Downloads/mysql-5.5.28/sql/net_serv.cc:338
#3 0x08061477 in net_write_command (net=0x833bc60, command=3 ‘\003‘, header=0x0,
head_len=0, packet=0x83441e8 "select * from help_keyword limit 1", len=34)
at /home/tsecer/Downloads/mysql-5.5.28/sql/net_serv.cc:478
#4 0x08064bbb in cli_advanced_command (mysql=0x833bc60, command=COM_QUERY,
header=0x0, header_length=0,
arg=0x83441e8 "select * from help_keyword limit 1", arg_length=34,
skip_check=1 ‘\001‘, stmt=0x0)
at /home/tsecer/Downloads/mysql-5.5.28/sql-common/client.c:841
#5 0x08068fa6 in mysql_send_query (mysql=0x833bc60,
query=0x83441e8 "select * from help_keyword limit 1", length=34)
at /home/tsecer/Downloads/mysql-5.5.28/sql-common/client.c:3903
#6 0x08068fd6 in mysql_real_query (mysql=0x833bc60,
query=0x83441e8 "select * from help_keyword limit 1", length=34)
at /home/tsecer/Downloads/mysql-5.5.28/sql-common/client.c:3914
#7 0x080522cd in mysql_real_query_for_lazy (
buf=0x83441e8 "select * from help_keyword limit 1", length=34)
at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:2738
---Type <return> to continue, or q <return> to quit---
#8 0x08052e7f in com_go (buffer=0x833c090, line=0x0)
at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:3008
#9 0x080511cc in add_line (buffer=...,
line=0x8343398 "select * from help_keyword limit 1rd limit 1;select * from help_topic limit 2", in_string=0xbffff03f "", ml_comment=0xbffff03e, truncated=false)
at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:2178
#10 0x08050938 in read_and_execute (interactive=false)
at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:1951
#11 0x0804fa31 in main (argc=6, argv=0x8343220)
at /home/tsecer/Downloads/mysql-5.5.28/client/mysql.cc:1232
(gdb)


vio_write函數定義mysql-5.5.28\vio\viosocket.c
size_t vio_write(Vio * vio, const uchar* buf, size_t size)
{
size_t r;
DBUG_ENTER("vio_write");
DBUG_PRINT("enter", ("sd: %d buf: 0x%lx size: %u", vio->sd, (long) buf,
(uint) size));
#ifdef __WIN__
r = send(vio->sd, buf, size,0);
#else
r = write(vio->sd, buf, size);
#endif /* __WIN__ */
#ifndef DBUG_OFF
if (r == (size_t) -1)
{
DBUG_PRINT("vio_error", ("Got error on write: %d",socket_errno));
}
#endif /* DBUG_OFF */
DBUG_PRINT("exit", ("%u", (uint) r));
DBUG_RETURN(r);
}
其實都是一些應用層的皮毛內容,和編譯器一樣,真正的語義分析及優化才是關鍵。對於數據庫來說,真正的核心操作同樣在於查詢指令的生成,而這一點可能需要花費大量的時間(和智商),由於兩者都不具備,有時間再慢慢來。

mysql分析(二)mysql語法分析