1. 程式人生 > >ora-600 4000恢復一例

ora-600 4000恢復一例

下午一個同事遇到經典的ora-600 4000錯誤,我遠端幫忙處理了一下,關於該錯誤的處理, 網上已經有不少的例子了,通常情況下,該錯誤通過反覆重啟資料庫,然後可以進行規避 4000錯誤,但是如果反覆重啟N次後,錯誤依舊的話,那麼我們只能使用極端手段了。 網上能找到的例子基本上都是一個思路,通過trace 定位到含未提交檢視的block, 然後用bbed(windows可以使用UE代替)修改flag,將20修改為80即可,如下: *** 2011-08-30 15:57:10.037 ksedmp: internal or fatal error ORA-00600: internal error code, arguments: [4000], [5], [], [], [], [], [], [] Current SQL statement for this session: select ctime, mtime, stime from obj$ where obj# = :1 ----- Call Stack Trace ----- calling              call     entry                argument values in hex     location             type     point                (? means dubious value)    -------------------- -------- -------------------- ---------------------------- ksedst()+27          call     ksedst1()            0 ? 1 ? ksedmp()+557         call     ksedst()             0 ? 9BF6BA9C ? 0 ? 2A ?                                                    955B3FF0 ? 70000 ? ksfdmp()+19          call     ksedmp()             3 ? BFA3EF80 ? AC152B0 ?                                                    CBD2D20 ? 3 ? CB84398 ? kgeriv()+188         call     00000000             CBD2D20 ? 3 ? kgeasi()+113         call     kgeriv()             CBD2D20 ? B7F50020 ? FA0 ?                                                    1 ? BFA3EFBC ? ktudba()+264         call     kgeasi()             CBD2D20 ? B7F50020 ? FA0 ?                                                    2 ? 1 ? 0 ? 5 ? 0 ? ktrgcm()+6207        call     ktudba()             5 ? BFA3F49C ? 0 ? 0 ? ktrgtc()+941         call     ktrgcm()             B7F6A3A0 ? 0 ? B7F9EC60 ?                                                    8EF1A0B4 ? 8EF10CE8 ? 198 ? kdsgrp()+107         call     ktrgtc()             B7F6A3A0 ? B7F6A348 ?                                                    9C22152 ? BFA3F5B8 ? 240 ?                                                    9C24DD4 ? 9C21D8C ? kdsfbrcb()+513       call     kdsgrp()             B7F6A39C ? 0 ? B7F6A39C ? qertbFetchByRowID()  call     kdsfbrcb()           B7F6A39C ? B7F9EBF8 ? 0 ? 1 ? +2052                                              0 ? 0 ? opifch2()+5157       call     00000000             8EF10A8C ? A11CDF4 ?                                                    BFA3FBE4 ? 1 ? opifch()+56          call     opifch2()            89 ? 5 ? BFA3FE54 ? opiodr()+2347        call     00000000             5 ? 2 ? BFA40BD0 ? rpidrus()+434        call     opiodr()             5 ? 2 ? BFA40BD0 ? 5 ? skgmstack()+210      call     00000000             BFA4062C ? CBD2E1C ?                                                    CBD2E1C ? BFA40610 ?                                                    BFA40B14 ? BFA4062C ? rpidru()+98          call     skgmstack()          BFA40610 ? CBD2AE0 ? F618 ?                                                    9749536 ? BFA4062C ? rpiswu2()+1061       call     00000000             BFA40B14 ? BFA40C60 ? 2 ? 2 ?                                                    BFA40AD8 ? 5953 ? rpidrv()+1915        call     rpiswu2()            99C70654 ? 0 ? BFA40AD8 ? 2 ?                                                    BFA40B50 ? 0 ? BFA40AD8 ? 0 ?                                                    97497F0 ? 97498CC ?                                                    BFA40B14 ? 8 ? rpifch()+56          call     rpidrv()             5 ? 5 ? BFA40BD0 ? 8 ? kqdpts()+174         call     rpifch()             5 ? 5 ? 5 ? 3 ? 9AB69FDB ?                                                    7 ? kqrlfc()+534         call     kqdpts()             9AB69E4C ? BFA40E10 ? 35953 ?                                                    CBD2E1C ? CBD2D20 ? 8 ? kqlbplc()+107        call     kqrlfc()             0 ? BFA40DF8 ? 4 ? 0 ?                                                    C251F20 ? 47 ? kqlblfc()+477        call     kqlbplc()            0 ? BFA42734 ? 9CCC2088 ?                                                    CBD2E1C ? CBD2D20 ? 7 ? adbdrv()+5689        call     kqlblfc()            0 ? BFA45508 ? opiexe()+18301       call     adbdrv()             23288 ? 0 ? 18E19E2E ?                                                    48FAE ? 9AB70BC4 ? 0 ? opiosq0()+3918       call     opiexe()             4 ? 0 ? BFA46978 ? kpooprx()+250        call     opiosq0()            3 ? E ? BFA46B80 ? A4 ? kpoal8()+867         call     kpooprx()            BFA48D58 ? BFA478F0 ? 1D ?                                                    1 ? 0 ? A4 ? opiodr()+2347        call     00000000             5E ? 17 ? BFA48D54 ? ttcpip()+4227        call     00000000             5E ? 17 ? BFA48D54 ? 0 ?                                                    CD51D86 ? 11 ? opitsk()+1991        call     ttcpip()             CBDA520 ? 5E ? BFA48D54 ? 0 ?                                                    BFA48234 ? BFA48E78 ? opiino()+1387        call     opitsk()             0 ? 0 ? opiodr()+2347        call     00000000             3C ? 4 ? BFA49940 ? opidrv()+915         call     opiodr()             3C ? 4 ? BFA49940 ? 0 ? sou2o()+113          call     opidrv()             3C ? 4 ? BFA49940 ? opimai_real()+212    call     sou2o()              BFA49924 ? 3C ? 4 ?                                                    BFA49940 ? main()+111           call     opimai_real()        2 ? BFA49970 ? __libc_start_main()  call     00000000             2 ? BFA49A34 ? BFA49A40 ? +220                                               4FFAC2 ? 0 ? 12D798 ?

從上面錯誤來看,我們知道問題出在訪問obj#上,下面繼續看trace。
Object id on Block? Y
 seg/obj: 0x12  csc: 0xb2c.3a7f4d34  itc: 1  flg: -  typ: 1 - DATA
     fsl: 0  fnx: 0x0 ver: 0x01
 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0x0005.01e.000099e3  0x00802689.29dd.09  --U-    1  fsc 0x0000.3a7f4d35
data_block_dump,data header at 0x847ce044
===============
tsiz: 0x1fb8
hsiz: 0xea
pbl: 0x847ce044
bdba: 0x0040007a
     76543210
flag=--------
ntab=1
nrow=108
frre=-1
fsbo=0xea
fseo=0x385
avsp=0x369
tosp=0x369
0xe:pti[0] nrow=108 offs=0
上面的資訊比較關鍵,關於XID,UBA的解釋,我以前也寫過相關文章,這裡不多說。
通過bdba: 0x0040007a 我們可以通過如下查詢,得知為file 1 block 122.
select dbms_utility.data_block_address_file(TO_NUMBER('40007a', 'XXXXXXXX')) file_id,
dbms_utility.data_block_address_block(TO_NUMBER('40007a', 'XXXXXXXX')) block_id from dual;
編譯BBED後,然後看了這個block的ktbbh,如下:BBED> set file 1 block 122
        FILE#           1
        BLOCK#          122BBED> p ktbbh
struct ktbbh, 48 bytes                      @20    
   ub1 ktbbhtyp                             @20       0x01 (KDDBTDATA)
   union ktbbhsid, 4 bytes                  @24    
      ub4 ktbbhsg1                          @24       0x00000012
      ub4 ktbbhod1                          @24       0x00000012
   struct ktbbhcsc, 8 bytes                 @28    
      ub4 kscnbas                           @28       0x3a7f4d34
      ub2 kscnwrp                           @32       0x0b2c
   b2 ktbbhict                              @36       1
   ub1 ktbbhflg                             @38       0x02 (NONE)
   ub1 ktbbhfsl                             @39       0x00
   ub4 ktbbhfnx                             @40       0x00000000
   struct ktbbhitl[0], 24 bytes             @44    
      struct ktbitxid, 8 bytes              @44    
         ub2 kxidusn                        @44       0x0005
         ub2 kxidslt                        @46       0x001e
         ub4 kxidsqn                        @48       0x000099e3
      struct ktbituba, 8 bytes              @52    
         ub4 kubadba                        @52       0x00802689
         ub2 kubaseq                        @56       0x29dd
         ub1 kubarec                        @58       0x09
      ub2 ktbitflg                          @60       0x2001 (KTBFUPB)
      union _ktbitun, 2 bytes               @62    
         b2 _ktbitfsc                       @62       0
         ub2 _ktbitwrp                      @62       0x0000
      ub4 ktbitbas                          @64       0x3a7f4d35
BBED>
上面的ktbitxid 即為XID的,ktbituba即為UBA,其他的不多說。
這裡主要是要修改 ktbitflg,該結構其實佔據了2個offset。
修改的時候需要注意一下的是要看os是32位還是64位,32位的話,其位元組序是反的。
我這裡就直接執行modify /x 8001 offset 60  然後sum apply即可。
然後再重啟資料庫 直接open,發現不再出現4000錯誤了,而是2663,這個好辦,
該錯誤跟2662 類似,直接調整scn即可,如下:
alter session set events '10015 trace name adjust_scn level n'; --mount下
最後再次open,錯誤號即變成了4194,這個就太熟悉不過了,清理undo就行了。
在dbsnake的部落格裡面,他以前模擬了一下ora-00600  4000錯誤,詳見如下連結:
http://dbsnake.com/2010/08/ora-600-4000-example.html
在網上能搜到的最早處理這個問題的個人應該logzgh,這哥們目前在淘寶。
連結:http://logzgh.itpub.net/post/3185/191423