golang知識圖譜NLP實戰第四節——關係抽取完善邏輯
阿新 • • 發佈:2019-02-06
用golang應用提交文字給這個hanlp服務,返回json資料格式的依存句法分析結果。
//這個是專門解析json的 package main import ( "encoding/json" "fmt" "log" "net/url" "github.com/astaxie/beego/httplib" ) type Dict struct { SBV []HanlpJson VOB []HanlpJson IOB []HanlpJson FOB []HanlpJson DBL []HanlpJson ATT []HanlpJson ADV []HanlpJson CMP []HanlpJson COO []HanlpJson POB []HanlpJson LAD []HanlpJson RAD []HanlpJson IS []HanlpJson HED []HanlpJson } type HanlpResult struct { Result HanlpWord `json:"result"` } type HanlpWord struct { Word []HanlpJson `json:"word"` } //用於json資料解析 type HanlpJson struct { ID int64 LEMMA string CPOSTAG string POSTAG string HEAD Json DEPREL string NAME string } type Json struct { ID int64 LEMMA string CPOSTAG string POSTAG string DEPREL string NAME string } type Ltp2 struct { Ltptwo []Ltp1 } type Ltp1 struct { //這個辦法不行,保留! Ltpone []Ltp } type Ltp struct { Id int64 `json:"id"` Cont string `json:"cont"` Pos string `json:"pos"` Ne string `json:"ne"` Parent int64 `json:"parent"` Relate string `json:"relate"` Semparent int64 `json:"semparent"` Semrelate string `json:"semrelate"` Arg []Arg1 `json:"arg"` Sem []Sem1 `json:"sem"` } type Sem1 struct { Id int64 `json:"id"` Parent int64 `json:"parent"` Relate string `json:"relate"` } type Arg1 struct { Id int64 `json:"id"` Type string `json:"type"` Beg int64 `json:"beg"` End int64 `json:"end"` } func main() { link := url.QueryEscape("輸水乾線從佛山市順德區杏壇鎮西江干流中央的鯉魚洲島端部取水") req := httplib.Post("http://localhost:8888/max?sentence=" + link) str, err := req.String() if err != nil { log.Fatal(err) } // fmt.Println(str) //json字串解析到結構體 var hanlpresult HanlpResult err = json.Unmarshal([]byte(str), &hanlpresult) if err != nil { log.Fatal(err) } // fmt.Println(hanlpresult.Result.Word) hanlp := hanlpresult.Result.Word dict := make([]Dict, 0) for _, w := range hanlp { dict1 := make([]Dict, 1) for _, x := range hanlp { if w.ID == x.HEAD.ID { list1 := make([]HanlpJson, 1) list1[0] = x switch x.DEPREL { case "主謂關係": dict1[0].SBV = list1 case "動賓關係": dict1[0].VOB = list1 case "間賓關係": dict1[0].IOB = list1 case "前置賓語": dict1[0].FOB = list1 case "兼語": dict1[0].DBL = list1 case "定中關係": dict1[0].ATT = list1 case "狀中結構": dict1[0].ADV = list1 case "動補結構": dict1[0].CMP = list1 case "並列關係": dict1[0].COO = list1 case "介賓關係": dict1[0].POB = list1 case "左附加關係": dict1[0].LAD = list1 case "右附加關係": dict1[0].RAD = list1 case "獨立結構": dict1[0].IS = list1 case "核心關係": dict1[0].HED = list1 } // bb = make([]Hanlp, 0) } } dict = append(dict, dict1...) } // fmt.Println(dict) for i := 0; i < len(dict); i++ { extract(hanlp, dict, i) } } func extract(hanlp []HanlpJson, dict []Dict, i int) { w := hanlp[i] ww := dict[i] //主謂賓關係:劉小緒生於四川 if len(ww.SBV) > 0 && len(ww.VOB) > 0 { entity1 := ww.SBV[0].LEMMA // 排除:劉小緒和李華是朋友 prep := ww.SBV[0] prepIndex := prep.ID - 1 prepDict := dict[prepIndex] //這個寫法注意,當做dict陣列的索引和它ID是一致的 if len(prepDict.COO) > 0 { relation := ww.VOB[0].LEMMA entity2 := prepDict.COO[0].LEMMA fmt.Println(entity1 + "," + relation + "," + entity2) } else { entity2 := ww.VOB[0].LEMMA relation := w.LEMMA fmt.Println(entity1 + "," + relation + "," + entity2) } } // 動補結構:劉小緒洗乾淨了衣服 //"主謂關係"] //"動賓關係"] //"動補結構"] if len(ww.SBV) > 0 && len(ww.VOB) > 0 && len(ww.CMP) > 0 { entity1 := ww.SBV[0].LEMMA complement := ww.CMP[0].LEMMA entity2 := ww.VOB[0].LEMMA if len(ww.RAD) > 0 { //右附加關係 subjoin := ww.RAD[0].LEMMA relation := w.LEMMA + complement + subjoin fmt.Println(entity1 + "," + relation + "," + entity2) } else { relation := w.LEMMA + complement fmt.Println(entity1 + "," + relation + "," + entity2) } } //定中關係 if len(ww.ATT) > 0 { entity1 := ww.ATT[0].LEMMA relation := w.LEMMA for _, ii := range dict { //這裡要用dict maps陣列 //"主謂關係"] //"動賓關係"] if len(ii.SBV) > 0 && len(ii.VOB) > 0 { if ii.SBV[0].LEMMA == relation { entity2 := ii.VOB[0].LEMMA fmt.Println(entity1 + "," + relation + "," + entity2) } } } } // 狀動結構:父親非常喜歡跑步 // 非常 是 跑步的狀語,關係應該為非常喜歡 //"主謂關係"] //動賓關係"] //"狀中結構"] if len(ww.SBV) > 0 && len(ww.VOB) > 0 && len(ww.ADV) > 0 { entity1 := ww.SBV[0].LEMMA adverbial := ww.ADV[0].LEMMA entity2 := ww.VOB[0].LEMMA relation := adverbial + w.LEMMA fmt.Println(entity1 + "," + relation + "," + entity2) } // 狀動補結構 //"主謂關係"] //"動賓關係"] //"狀中結構"] //"動補結構"] if len(ww.SBV) > 0 && len(ww.VOB) > 0 && len(ww.ADV) > 0 && len(ww.CMP) > 0 { entity1 := ww.SBV[0].LEMMA adverbial := ww.ADV[0].LEMMA complement := ww.CMP[0].LEMMA entity2 := ww.VOB[0].LEMMA relation := adverbial + w.LEMMA + complement fmt.Println(entity1 + "," + relation + "," + entity2) } // 定語後置:父親是來自肯亞的留學生 if w.DEPREL == "定中關係" { if len(ww.VOB) > 0 { entity1 := hanlp[w.HEAD.ID-1].LEMMA relation := w.LEMMA entity2 := ww.VOB[0].LEMMA fmt.Println(entity1 + "," + relation + "," + entity2) } } // 介賓關係:劉小緒就職於學校 // 於 和 學校 是介賓關係 //"主謂關係"] //"動補結構"] if len(ww.SBV) > 0 && len(ww.CMP) > 0 { entity1 := ww.SBV[0].LEMMA prep := ww.CMP[0] //maps["動補結構"][0] prepIndex := prep.ID - 1 prepDict := dict[prepIndex] //這個寫法注意,當做dict陣列的索引和它ID是一致的 if len(prepDict.POB) > 0 { entity2 := prepDict.POB[0].LEMMA //"介賓關係"][0] relation := w.LEMMA + prep.LEMMA fmt.Println(entity1 + "," + relation + "," + entity2) } } // 賓語前置結構:海洋由水組成 if len(ww.FOB) > 0 { entity2 := ww.FOB[0].LEMMA //maps["前置賓語"][0] if len(ww.ADV) > 0 { adverbial := ww.ADV[0] //maps["狀中結構"][0] prepIndex := adverbial.ID - 1 prepDict := dict[prepIndex] if len(prepDict.POB) > 0 { entity1 := prepDict.POB[0].LEMMA relation := w.LEMMA fmt.Println(entity1 + "," + relation + "," + entity2) } } } }