ivector-extractor-testcc檔案讀書筆記
阿新 • • 發佈:2018-12-20
// ivector/ivector-extractor-test.cc // Copyright 2013 Daniel Povey // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include "gmm/model-test-common.h" #include "gmm/full-gmm-normal.h" #include "ivector/ivector-extractor.h" #include "util/kaldi-io.h" namespace kaldi { void TestIvectorExtractorIO(const IvectorExtractor &extractor) { std::ostringstream ostr; bool binary = (Rand() % 2 == 0); extractor.Write(ostr, binary); std::istringstream istr(ostr.str()); IvectorExtractor extractor2; extractor2.Read(istr, binary); std::ostringstream ostr2; extractor2.Write(ostr2, binary); KALDI_ASSERT(ostr.str() == ostr2.str()); } void TestIvectorExtractorStatsIO(IvectorExtractorStats &stats) { std::ostringstream ostr; bool binary = (Rand() % 2 == 0); stats.Write(ostr, binary); std::istringstream istr(ostr.str()); IvectorExtractorStats stats2; stats2.Read(istr, binary); std::ostringstream ostr2; stats2.Write(ostr2, binary); if (binary) { // this was failing in text mode, due to differences like // 8.2244e+06 vs 8.22440e+06 KALDI_ASSERT(ostr.str() == ostr2.str()); } { // Test I/O of IvectorExtractorStats and that it works identically with the "add" // mechanism. We only test this with binary == true; otherwise it's not // identical due to limited precision. std::ostringstream ostr; bool binary = true; stats.Write(ostr, binary); IvectorExtractorStats stats2; { std::istringstream istr(ostr.str()); stats2.Read(istr, binary); } { std::istringstream istr(ostr.str()); stats2.Read(istr, binary, true); // add to existing. } IvectorExtractorStats stats3(stats); stats3.Add(stats); std::ostringstream ostr2; stats2.Write(ostr2, false); std::ostringstream ostr3; stats3.Write(ostr3, false); //if (binary) { // KALDI_ASSERT(ostr2.str() == ostr3.str()); //} } } void TestIvectorExtraction(const IvectorExtractor &extractor, const MatrixBase<BaseFloat> &feats, const FullGmm &fgmm) { if (extractor.IvectorDependentWeights()) return; // Nothing to do as online iVector estimator does not work in this // case. int32 num_frames = feats.NumRows(), feat_dim = feats.NumCols(), num_gauss = extractor.NumGauss(), ivector_dim = extractor.IvectorDim(); Posterior post(num_frames); double tot_log_like = 0.0; for (int32 t = 0; t < num_frames; t++) { SubVector<BaseFloat> frame(feats, t); Vector<BaseFloat> posterior(fgmm.NumGauss(), kUndefined); tot_log_like += fgmm.ComponentPosteriors(frame, &posterior); for (int32 i = 0; i < posterior.Dim(); i++) post[t].push_back(std::make_pair(i, posterior(i))); } // The zeroth and 1st-order stats are in "utt_stats". IvectorExtractorUtteranceStats utt_stats(num_gauss, feat_dim, false); utt_stats.AccStats(feats, post); OnlineIvectorEstimationStats online_stats(extractor.IvectorDim(), extractor.PriorOffset(), 0.0); for (int32 t = 0; t < num_frames; t++) { online_stats.AccStats(extractor, feats.Row(t), post[t]); } Vector<double> ivector1(ivector_dim), ivector2(ivector_dim); extractor.GetIvectorDistribution(utt_stats, &ivector1, NULL); int32 num_cg_iters = -1; // for testing purposes, compute it exactly. online_stats.GetIvector(num_cg_iters, &ivector2); KALDI_LOG << "ivector1 = " << ivector1; KALDI_LOG << "ivector2 = " << ivector2; // objf change vs. default iVector. note, here I'm using objf // and auxf pretty much interchangeably :-( double objf_change2 = online_stats.ObjfChange(ivector2) * utt_stats.NumFrames(); Vector<double> ivector_baseline(ivector_dim); ivector_baseline(0) = extractor.PriorOffset(); double objf_change1 = extractor.GetAuxf(utt_stats, ivector1) - extractor.GetAuxf(utt_stats, ivector_baseline); KALDI_LOG << "objf_change1 = " << objf_change1 << ", objf_change2 = " << objf_change2; KALDI_ASSERT(ivector1.ApproxEqual(ivector2)); } void UnitTestIvectorExtractor() { FullGmm fgmm; int32 dim = 5 + Rand() % 5, num_comp = 1 + Rand() % 5; KALDI_LOG << "Num Gauss = " << num_comp; unittest::InitRandFullGmm(dim, num_comp, &fgmm); FullGmmNormal fgmm_normal(fgmm); IvectorExtractorOptions ivector_opts; ivector_opts.ivector_dim = dim + 5; ivector_opts.use_weights = (Rand() % 2 == 0); KALDI_LOG << "Feature dim is " << dim << ", ivector dim is " << ivector_opts.ivector_dim; IvectorExtractor extractor(ivector_opts, fgmm); TestIvectorExtractorIO(extractor); IvectorExtractorStatsOptions stats_opts; if (Rand() % 2 == 0) stats_opts.update_variances = false; stats_opts.num_samples_for_weights = 100; // Improve accuracy // of estimation, since we do it with relatively few utterances, // and we're testing the convergence. int32 num_utts = 1 + Rand() % 5; std::vector<Matrix<BaseFloat> > all_feats(num_utts); for (int32 utt = 0; utt < num_utts; utt++) { int32 num_frames = 100 + Rand() % 200; if (Rand() % 2 == 0) num_frames *= 10; if (Rand() % 2 == 0) num_frames /= 1.0; Matrix<BaseFloat> feats(num_frames, dim); fgmm_normal.Rand(&feats); feats.Swap(&all_feats[utt]); } int32 num_iters = 4; double last_auxf_impr = 0.0, last_auxf = 0.0; for (int32 iter = 0; iter < num_iters; iter++) { IvectorExtractorStats stats(extractor, stats_opts); for (int32 utt = 0; utt < num_utts; utt++) { Matrix<BaseFloat> &feats = all_feats[utt]; stats.AccStatsForUtterance(extractor, feats, fgmm); TestIvectorExtraction(extractor, feats, fgmm); } TestIvectorExtractorStatsIO(stats); IvectorExtractorEstimationOptions estimation_opts; estimation_opts.gaussian_min_count = dim + 5; double auxf = stats.AuxfPerFrame(), auxf_impr = stats.Update(estimation_opts, &extractor); KALDI_LOG << "Iter " << iter << ", auxf per frame was " << auxf << ", improvement in this update " << "phase was " << auxf_impr; if (iter > 0) { double auxf_change = auxf - last_auxf; KALDI_LOG << "Predicted auxf change from last update phase was " << last_auxf_impr << " versus observed change " << auxf_change; double wiggle_room = (ivector_opts.use_weights ? 5.0e-05 : 1.0e-08); // The weight update is (a) not exact, and (b) relies on sampling, [two // separate issues], so it might not always improve. But with // a large number of "weight samples", it's OK. KALDI_ASSERT(auxf_change >= last_auxf_impr - wiggle_room); } last_auxf_impr = auxf_impr; last_auxf = auxf; } std::cout << "********************************************************************************************\n"; } } int main() { using namespace kaldi; SetVerboseLevel(5); for (int i = 0; i < 10; i++) UnitTestIvectorExtractor(); std::cout << "Test OK.\n"; return 0; }