1. 程式人生 > >coursera 算法二 week 1 wordnet

coursera 算法二 week 1 wordnet

分享圖片 true 判斷 unit color between list 否則 ++

這周的作業可謂是一波三折,但是收獲了不少,熟悉了廣度優先搜索還有符號圖的建立。此外還知道了Integer.MAX_VALUE。

SAP:

求v和w的大概思路是對v和w分別廣度優先搜索,然後遍歷圖中每一個頂點,如果v和w都可以到達一個頂點,就計算v和w到這一頂點的距離和,最後求出最短的距離以及對應的頂點便是所求length和ancestor。

至於Iterable<Integer> v和Iterable<Integer> w,開始我是求v中每一個頂點和w中的每一個頂點的距離,然後求出最短距離,但提交後時間測試通不過。參考了其他人的一些博客後發現可以遍歷一次完成對v或w的廣度優先搜索,於是自己寫了一個BFS類。然而這次提交出現了OperationCountLimitExceededException,最後檢查了半天才發現bfs時丟了一句 ‘ if(!marked[w]) ‘。。。後來發現官方提供的BreadthFirstDirectedPaths類可以完成Iterable<Integer> v的廣度優先搜索,於是幹脆直接調用這個。

但是提交後還是有問題。。。對於沒有共同祖先的情況判斷不正確,不能返回-1,檢查了半天發現每次求length或ancestor都應該在前面加上 anc = -1; 否則這次求返回的是上次的anc。

import edu.princeton.cs.algs4.*;
import edu.princeton.cs.algs4.In;

public class SAP {
    private Digraph G;
    private int anc = -1;
   // constructor takes a digraph (not necessarily a DAG)
   public
SAP(Digraph G) { if(G == null) throw new IllegalArgumentException(); this.G = new Digraph(G); } // length of shortest ancestral path between v and w; -1 if no such path public int length(int v, int w) { if(v < 0 || v > G.V() - 1 || w < 0 || w > G.V() - 1)
throw new IllegalArgumentException(); anc = -1; BreadthFirstDirectedPaths bv = new BreadthFirstDirectedPaths(G, v); BreadthFirstDirectedPaths bw = new BreadthFirstDirectedPaths(G, w); int minLength = Integer.MAX_VALUE; for(int i = 0; i < G.V(); i++) { if(bv.hasPathTo(i) && bw.hasPathTo(i)) { int l = bv.distTo(i) + bw.distTo(i); if(l < minLength) { minLength = l; anc = i; } } } if(minLength == Integer.MAX_VALUE) return -1; else return minLength; } // a common ancestor of v and w that participates in a shortest ancestral path; -1 if no such path public int ancestor(int v, int w) { length(v, w); return anc; } // length of shortest ancestral path between any vertex in v and any vertex in w; -1 if no such path public int length(Iterable<Integer> v, Iterable<Integer> w) { if(v == null || w == null) throw new IllegalArgumentException(); anc = -1; for(int i : v) { if(i < 0 || i > G.V() - 1) throw new IllegalArgumentException(); } for(int i : w) { if(i < 0 || i > G.V() - 1) throw new IllegalArgumentException(); } BreadthFirstDirectedPaths bv = new BreadthFirstDirectedPaths(G, v); BreadthFirstDirectedPaths bw = new BreadthFirstDirectedPaths(G, w); int minLength = Integer.MAX_VALUE; for(int i = 0; i < G.V(); i++) { if(bv.hasPathTo(i) && bw.hasPathTo(i)) { int l = bv.distTo(i) + bw.distTo(i); if(l < minLength) { minLength = l; anc = i; } } } if(minLength == Integer.MAX_VALUE) return -1; else return minLength; } // a common ancestor that participates in shortest ancestral path; -1 if no such path public int ancestor(Iterable<Integer> v, Iterable<Integer> w) { length(v, w); return anc; } // do unit testing of this class public static void main(String[] args) { } }

WordNet:

wordnet涉及到符號圖的問題,開始用ST<String, Integer>來完成noun到id的索引,後來發現一個noun可能對應多個id,於是改為ST<String, Bag<Integer>>。

需要檢查有向圖是否合格:1.不能有環。通過類DirectedCycle完成。 2.只能有一個root。經參考別人的博客發現一個很巧妙的方法,如果一個頂點是根,那麽它不指向其它頂點,所以它不會出現在hypernyms每行的第一個id。

方法sap需要通過id得到noun,用數組的話不能提前知道數組大小,於是參考網上用ArrayList<String>完成id到noun的索引。

import edu.princeton.cs.algs4.*;
import java.util.ArrayList;

public class WordNet {
    private ST<String, Bag<Integer>> st;
    private ArrayList<String> idList;
    private Digraph G;
    
   // constructor takes the name of the two input files
   public WordNet(String synsets, String hypernyms) {
       if(synsets == null || hypernyms == null) throw new IllegalArgumentException();
       
       st = new ST<String, Bag<Integer>>();
       idList = new ArrayList<String>();
       
       int count = 0;
       In in1 = new In(synsets);
       while(in1.hasNextLine()) {
           String[] a = in1.readLine().split(",");
           String[] a2 = a[1].split(" ");
           
           for(int i = 0; i < a2.length; i++) {
               if(st.contains(a2[i])) st.get(a2[i]).add(Integer.parseInt(a[0]));
               else {
                    Bag<Integer> b = new Bag<Integer>();
                    b.add(Integer.parseInt(a[0]));
                    st.put(a2[i], b);
               }
           }
           count++;
           idList.add(a[1]);
       }
       
       G = new Digraph(count);
       In in2 = new In(hypernyms);
       boolean[] isNotRoot = new boolean[count];
       int rootNumber = 0;
       
       while(in2.hasNextLine()) {
           String[] a = in2.readLine().split(",");
           isNotRoot[Integer.parseInt(a[0])] = true;
           for(int i = 1; i < a.length; i++)
               G.addEdge(Integer.parseInt(a[0]), Integer.parseInt(a[i]));
       }
       
       for(int i = 0; i < count; i++) {
           if(!isNotRoot[i]) rootNumber++;
       }
       DirectedCycle d = new DirectedCycle(G);
       if(rootNumber > 1 || d.hasCycle()) throw new IllegalArgumentException();
   }
   
   // returns all WordNet nouns
   public Iterable<String> nouns() {
       return st.keys();
   }

   // is the word a WordNet noun?
   public boolean isNoun(String word) {
       if(word == null) throw new IllegalArgumentException();
       return st.contains(word);
   }

   // distance between nounA and nounB (defined below)
   public int distance(String nounA, String nounB) {
       if(nounA == null || nounB == null || !isNoun(nounA) || !isNoun(nounB))
           throw new IllegalArgumentException();
        SAP s = new SAP(G);
        Bag<Integer> ida = st.get(nounA);
        Bag<Integer> idb = st.get(nounB);
        
        return s.length(ida, idb);
   }

   // a synset (second field of synsets.txt) that is the common ancestor of nounA and nounB
   // in a shortest ancestral path (defined below)
   public String sap(String nounA, String nounB) {
       if(nounA == null || nounB == null || !isNoun(nounA) || !isNoun(nounB))
           throw new IllegalArgumentException();
        SAP s = new SAP(G);
        Bag<Integer> ida = st.get(nounA);
        Bag<Integer> idb = st.get(nounB);
        
        int root = s.ancestor(ida, idb);
        return idList.get(root);
   }
    
   // do unit testing of this class
   public static void main(String[] args) {
      

   }
}

Outcast:

技術分享圖片
public class Outcast {
    private WordNet wordnet;
    
    // constructor takes a WordNet object
    public Outcast(WordNet wordnet) {
        this.wordnet = wordnet;
    }
    // given an array of WordNet nouns, return an outcast   
    public String outcast(String[] nouns) {
        int length = nouns.length;
        int[][] distance = new int[length][length];
        
        for(int i = 0; i < length; i++) {
            for(int j = i; j < length; j++) {
                distance[i][j] = wordnet.distance(nouns[i], nouns[j]);
            }
        }
        
        int maxDistance = 0;
        int sum = 0;
        int num = 0;
        for(int i = 0; i < nouns.length; i++) {
            sum = 0;
            for(int j = 0; j < nouns.length; j++) {
                if(i < j)
                    sum += distance[i][j];
                else
                    sum += distance[j][i];
            }
            
            if(sum > maxDistance) {
                maxDistance = sum;
                num = i;
            }
        }
        
        return nouns[num];
    }
    // see test client below
    public static void main(String[] args) {
    }        
}
View Code

coursera 算法二 week 1 wordnet