1. 程式人生 > >特徵提取I-VECTOR演算法原始碼01

特徵提取I-VECTOR演算法原始碼01

#include <algorithm>
 #include "ivector/agglomerative-clustering.h"
 
 namespace kaldi {
 
 void AgglomerativeClusterer::Cluster() {
   KALDI_VLOG(2) << "Initializing cluster assignments.";
   Initialize();
 
   KALDI_VLOG(2) << "Clustering...";
   // This is the main algorithm loop. It moves through the queue merging
   // clusters until a stopping criterion has been reached.
   while (num_clusters_ > min_clust_ && !queue_.empty()) {
     std::pair<BaseFloat, std::pair<uint16, uint16> > pr = queue_.top();
     int32 i = (int32) pr.second.first, j = (int32) pr.second.second;
     queue_.pop();
     // check to make sure clusters have not already been merged
     if ((active_clusters_.find(i) != active_clusters_.end()) &&
         (active_clusters_.find(j) != active_clusters_.end()))
       MergeClusters(i, j);
   }
 
   std::vector<int32> new_assignments(num_points_);
   int32 label_id = 0;
   std::set<int32>::iterator it;
   // Iterate through the clusters and assign all utterances within the cluster
   // an ID label unique to the cluster. This is the final output and frees up
   // the cluster memory accordingly.
   for (it = active_clusters_.begin(); it != active_clusters_.end(); ++it) {
     ++label_id;
     AhcCluster *cluster = clusters_map_[*it];
     std::vector<int32>::iterator utt_it;
     for (utt_it = cluster->utt_ids.begin();
          utt_it != cluster->utt_ids.end(); ++utt_it)
       new_assignments[*utt_it] = label_id;
     delete cluster;
   }
   assignments_->swap(new_assignments);
 }
 
 BaseFloat AgglomerativeClusterer::GetCost(int32 i, int32 j) {
   if (i < j)
     return cluster_cost_map_[std::make_pair(i, j)];
   else
     return cluster_cost_map_[std::make_pair(j, i)];
 }
 
 void AgglomerativeClusterer::Initialize() {
   KALDI_ASSERT(num_clusters_ != 0);
   for (int32 i = 0; i < num_points_; i++) {
     // create an initial cluster of size 1 for each point
     std::vector<int32> ids;
     ids.push_back(i);
     AhcCluster *c = new AhcCluster(++count_, -1, -1, ids);
     clusters_map_[count_] = c;
     active_clusters_.insert(count_);
 
     // propagate the queue with all pairs from the cost matrix
     for (int32 j = i+1; j < num_clusters_; j++) {
       BaseFloat cost = costs_(i,j);
       cluster_cost_map_[std::make_pair(i+1, j+1)] = cost;
       if (cost <= thresh_)
         queue_.push(std::make_pair(cost,
             std::make_pair(static_cast<uint16>(i+1),
                            static_cast<uint16>(j+1))));
     }
   }
 }
 
 void AgglomerativeClusterer::MergeClusters(int32 i, int32 j) {
   AhcCluster *clust1 = clusters_map_[i];
   AhcCluster *clust2 = clusters_map_[j];
   // For memory efficiency, the first cluster is updated to contain the new
   // merged cluster information, and the second cluster is later deleted.
   clust1->id = ++count_;
   clust1->parent1 = i;
   clust1->parent2 = j;
   clust1->size += clust2->size;
   clust1->utt_ids.insert(clust1->utt_ids.end(), clust2->utt_ids.begin(),
                          clust2->utt_ids.end());
   // Remove the merged clusters from the list of active clusters.
   active_clusters_.erase(i);
   active_clusters_.erase(j);
   // Update the queue with all the new costs involving the new cluster
   std::set<int32>::iterator it;
   for (it = active_clusters_.begin(); it != active_clusters_.end(); ++it) {
     // The new cost is the sum of the costs of the new cluster's parents
     BaseFloat new_cost = GetCost(*it, i) + GetCost(*it, j);
     cluster_cost_map_[std::make_pair(*it, count_)] = new_cost;
     BaseFloat norm = clust1->size * (clusters_map_[*it])->size;
     if (new_cost / norm <= thresh_)
       queue_.push(std::make_pair(new_cost / norm,
           std::make_pair(static_cast<uint16>(*it),
                          static_cast<uint16>(count_))));
   }
   active_clusters_.insert(count_);
   clusters_map_[count_] = clust1;
   delete clust2;
   num_clusters_--;
 }
 
 void AgglomerativeCluster(
     const Matrix<BaseFloat> &costs,
     BaseFloat thresh,
     int32 min_clust,
     std::vector<int32> *assignments_out) {
   KALDI_ASSERT(min_clust >= 0);
   AgglomerativeClusterer ac(costs, thresh, min_clust, assignments_out);
   ac.Cluster();
 }
 

ivector-extractor.cc File Reference

#include <vector>
#include "ivector/ivector-extractor.h"
#include "util/kaldi-thread.h"

Include dependency graph for ivector-extractor.cc:

#ifndef KALDI_IVECTOR_IVECTOR_EXTRACTOR_H_
 #define KALDI_IVECTOR_IVECTOR_EXTRACTOR_H_
 
 #include <vector>
 #include <mutex>
 #include "base/kaldi-common.h"
 #include "matrix/matrix-lib.h"
 #include "gmm/model-common.h"
 #include "gmm/diag-gmm.h"
 #include "gmm/full-gmm.h"
 #include "itf/options-itf.h"
 #include "util/common-utils.h"
 #include "hmm/posterior.h"
 
 namespace kaldi {
 
 // Note, throughout this file we use SGMM-type notation because
 // that's what I'm comfortable with.
 // Dimensions:
 //  D is the feature dim (e.g. D = 60)
 //  I is the number of Gaussians (e.g. I = 2048)
 //  S is the ivector dim (e.g. S = 400)
 
 
 
 // Options for estimating iVectors, during both training and test.  Note: the
 // "acoustic_weight" is not read by any class declared in this header; it has to
 // be applied by calling IvectorExtractorUtteranceStats::Scale() before
 // obtaining the iVector.
 // The same is true of max_count: it has to be applied by programs themselves
 // e.g. see ../ivectorbin/ivector-extract.cc.
 struct IvectorEstimationOptions {
   double acoustic_weight;
   double max_count;
   IvectorEstimationOptions(): acoustic_weight(1.0), max_count(0.0) {}
   void Register(OptionsItf *opts) {
     opts->Register("acoustic-weight", &acoustic_weight,
                    "Weight on part of auxf that involves the data (e.g. 0.2); "
                    "if this weight is small, the prior will have more effect.");
     opts->Register("max-count", &max_count,
                    "Maximum frame count (affects prior scaling): if >0, the prior "
                    "term will be scaled up after the frame count exceeds this "
                    "value.  Note that this count is considered after posterior "
                    "scaling (e.g. --acoustic-weight option, or scale argument to "
                    "scale-post), so you would normally use a cutoff 10 times "
                    "smaller than the corresponding number of frames.");
   }
 };
 
 
 class IvectorExtractor;
 class IvectorExtractorComputeDerivedVarsClass;
 
 class IvectorExtractorUtteranceStats {
  public:
   IvectorExtractorUtteranceStats(int32 num_gauss, int32 feat_dim,
                                  bool need_2nd_order_stats):
       gamma_(num_gauss), X_(num_gauss, feat_dim) {
     if (need_2nd_order_stats) {
       S_.resize(num_gauss);
       for (int32 i = 0; i < num_gauss; i++)
         S_[i].Resize(feat_dim);
     }
   }
 
   void AccStats(const MatrixBase<BaseFloat> &feats,
                 const Posterior &post);
 
   void Scale(double scale); // Used to apply acoustic scale.
 
   double NumFrames() { return gamma_.Sum(); }
 
  protected:
   friend class IvectorExtractor;
   friend class IvectorExtractorStats;
   Vector<double> gamma_; // zeroth-order stats (summed posteriors), dimension [I]
   Matrix<double> X_; // first-order stats, dimension [I][D]
   std::vector<SpMatrix<double> > S_; // 2nd-order stats, dimension [I][D][D], if
                                      // required.
 };
 
 
 struct IvectorExtractorOptions {
   int ivector_dim;
   int num_iters;
   bool use_weights;
   IvectorExtractorOptions(): ivector_dim(400), num_iters(2),
                              use_weights(true) { }
   void Register(OptionsItf *opts) {
     opts->Register("num-iters", &num_iters, "Number of iterations in "
                    "iVector estimation (>1 needed due to weights)");
     opts->Register("ivector-dim", &ivector_dim, "Dimension of iVector");
     opts->Register("use-weights", &use_weights, "If true, regress the "
                    "log-weights on the iVector");
   }
 };
 
 
 // Forward declaration.  This class is used together with IvectorExtractor to
 // compute iVectors in an online way, so we can update the estimate efficiently
 // as we add frames.
 class OnlineIvectorEstimationStats;
 
 // Caution: the IvectorExtractor is not the only thing required to get an
 // ivector.  We also need to get posteriors from a GMM, typically a FullGmm.
 // Typically these will be obtained in a process that involves using a DiagGmm
 // for Gaussian selection, followed by getting posteriors from the FullGmm.  To
 // keep track of these, we keep them all in the same directory,
 // e.g. final.{ubm,dubm,ie}.
 
 class IvectorExtractor {
  public:
   friend class IvectorExtractorStats;
   friend class OnlineIvectorEstimationStats;
 
   IvectorExtractor(): prior_offset_(0.0) { }
 
   IvectorExtractor(
       const IvectorExtractorOptions &opts,
       const FullGmm &fgmm);
 
   void GetIvectorDistribution(
       const IvectorExtractorUtteranceStats &utt_stats,
       VectorBase<double> *mean,
       SpMatrix<double> *var) const;
 
   double PriorOffset() const { return prior_offset_; }
 
   double GetAuxf(const IvectorExtractorUtteranceStats &utt_stats,
                  const VectorBase<double> &mean,
                  const SpMatrix<double> *var = NULL) const;
 
   double GetAcousticAuxf(const IvectorExtractorUtteranceStats &utt_stats,
                          const VectorBase<double> &mean,
                          const SpMatrix<double> *var = NULL) const;
 
   double GetPriorAuxf(const VectorBase<double> &mean,
                          const SpMatrix<double> *var = NULL) const;
 
   double GetAcousticAuxfVariance(
       const IvectorExtractorUtteranceStats &utt_stats) const;
 
   double GetAcousticAuxfMean(
       const IvectorExtractorUtteranceStats &utt_stats,
       const VectorBase<double> &mean,
       const SpMatrix<double> *var = NULL) const;
 
   double GetAcousticAuxfGconst(
       const IvectorExtractorUtteranceStats &utt_stats) const;
 
   double GetAcousticAuxfWeight(
       const IvectorExtractorUtteranceStats &utt_stats,
       const VectorBase<double> &mean,
       const SpMatrix<double> *var = NULL) const;
 
 
   void GetIvectorDistMean(
       const IvectorExtractorUtteranceStats &utt_stats,
       VectorBase<double> *linear,
       SpMatrix<double> *quadratic) const;
 
   void GetIvectorDistPrior(
       const IvectorExtractorUtteranceStats &utt_stats,
       VectorBase<double> *linear,
       SpMatrix<double> *quadratic) const;
 
   void GetIvectorDistWeight(
       const IvectorExtractorUtteranceStats &utt_stats,
       const VectorBase<double> &mean,
       VectorBase<double> *linear,
       SpMatrix<double> *quadratic) const;
 
   // Note: the function GetStats no longer exists due to code refactoring.
   // Instead of this->GetStats(feats, posterior, &utt_stats), call
   // utt_stats.AccStats(feats, posterior).
 
   int32 FeatDim() const;
   int32 IvectorDim() const;
   int32 NumGauss() const;
   bool IvectorDependentWeights() const { return w_.NumRows() != 0; }
   void Write(std::ostream &os, bool binary) const;
   void Read(std::istream &is, bool binary);
 
   // Note: we allow the default assignment and copy operators
   // because they do what we want.
  protected:
   void ComputeDerivedVars();
   void ComputeDerivedVars(int32 i);
   friend class IvectorExtractorComputeDerivedVarsClass;
 
   // Imagine we'll project the iVectors with transformation T, so apply T^{-1}
   // where necessary to keep the model equivalent.  Used to keep unit variance
   // (like prior re-estimation).
   void TransformIvectors(const MatrixBase<double> &T,
                          double new_prior_offset);
 
 
   Matrix<double> w_;
 
   Vector<double> w_vec_;
 
   std::vector<Matrix<double> > M_;
 
   std::vector<SpMatrix<double> > Sigma_inv_;
 
   double prior_offset_;
 
   // Below are *derived variables* that can be computed from the
   // variables above.
 
   Vector<double> gconsts_;
 
   Matrix<double> U_;
 
   std::vector<Matrix<double> > Sigma_inv_M_;
  private:
   // var <-- quadratic_term^{-1}, but done carefully, first flooring eigenvalues
   // of quadratic_term to 1.0, which mathematically is the least they can be,
   // due to the prior term.
   static void InvertWithFlooring(const SpMatrix<double> &quadratic_term,
                                  SpMatrix<double> *var);
 };
 
 class OnlineIvectorEstimationStats {
  public:
   // Search above for max_count to see an explanation; if nonzero, it will
   // put a higher weight on the prior (vs. the stats) once the count passes
   // that value.
   OnlineIvectorEstimationStats(int32 ivector_dim,
                                BaseFloat prior_offset,
                                BaseFloat max_count);
 
   OnlineIvectorEstimationStats(const OnlineIvectorEstimationStats &other);
 
 
   // Accumulate stats for one frame.
   void AccStats(const IvectorExtractor &extractor,
                 const VectorBase<BaseFloat> &feature,
                 const std::vector<std::pair<int32, BaseFloat> > &gauss_post);
 
   // Accumulate stats for a sequence (or collection) of frames.
   void AccStats(const IvectorExtractor &extractor,
                 const MatrixBase<BaseFloat> &features,
                 const std::vector<std::vector<std::pair<int32, BaseFloat> > > &gauss_post);
 
 
   int32 IvectorDim() const { return linear_term_.Dim(); }
 
   void GetIvector(int32 num_cg_iters,
                   VectorBase<double> *ivector) const;
 
   double NumFrames() const { return num_frames_; }
 
   double PriorOffset() const { return prior_offset_; }
 
   double ObjfChange(const VectorBase<double> &ivector) const;
 
   double Count() const { return num_frames_; }
 
   void Scale(double scale);
 
   void Write(std::ostream &os, bool binary) const;
   void Read(std::istream &is, bool binary);
 
   // Override the default assignment operator
   inline OnlineIvectorEstimationStats &operator=(const OnlineIvectorEstimationStats &other) {
     this->prior_offset_ = other.prior_offset_;
     this->max_count_ = other.max_count_;
     this->num_frames_ = other.num_frames_;
     this->quadratic_term_=other.quadratic_term_;
     this->linear_term_=other.linear_term_;
     return *this;
   }
 
  protected:
   double Objf(const VectorBase<double> &ivector) const;
 
   double DefaultObjf() const;
 
   friend class IvectorExtractor;
   double prior_offset_;
   double max_count_;
   double num_frames_;  // num frames (weighted, if applicable).
   SpMatrix<double> quadratic_term_;
   Vector<double> linear_term_;
 };
 
 
 // This code obtains periodically (for each "ivector_period" frames, e.g. 10
 // frames), an estimate of the iVector including all frames up to that point.
 // This emulates what you could do in an online/streaming algorithm; its use is
 // for neural network training in a way that's matched to online decoding.
 // [note: I don't believe we are currently using the program,
 // ivector-extract-online.cc, that calls this function, in any of the scripts.].
 // Caution: this program outputs the raw iVectors, where the first component
 // will generally be very positive.  You probably want to subtract PriorOffset()
 // from the first element of each row of the output before writing it out.
 // For num_cg_iters, we suggest 15.  It can be a positive number (more -> more
 // exact, less -> faster), or if it's negative it will do the optimization
 // exactly each time which is slower.
 // It returns the objective function improvement per frame from the "default" iVector to
 // the last iVector estimated.
 double EstimateIvectorsOnline(
     const Matrix<BaseFloat> &feats,
     const Posterior &post,
     const IvectorExtractor &extractor,
     int32 ivector_period,
     int32 num_cg_iters,
     BaseFloat max_count,
     Matrix<BaseFloat> *ivectors);
 
 
 struct IvectorExtractorStatsOptions {
   bool update_variances;
   bool compute_auxf;
   int32 num_samples_for_weights;
   int cache_size;
 
   IvectorExtractorStatsOptions(): update_variances(true),
                          compute_auxf(true),
                          num_samples_for_weights(10),
                          cache_size(100) { }
   void Register(OptionsItf *opts) {
     opts->Register("update-variances", &update_variances, "If true, update the "
                    "Gaussian variances");
     opts->Register("compute-auxf", &compute_auxf, "If true, compute the "
                    "auxiliary functions on training data; can be used to "
                    "debug and check convergence.");
     opts->Register("num-samples-for-weights", &num_samples_for_weights,
                    "Number of samples from iVector distribution to use "
                    "for accumulating stats for weight update.  Must be >1");
     opts->Register("cache-size", &cache_size, "Size of an internal "
                    "cache (not critical, only affects speed/memory)");
   }
 };
 
 
 struct IvectorExtractorEstimationOptions {
   double variance_floor_factor;
   double gaussian_min_count;
   int32 num_threads;
   bool diagonalize;
   IvectorExtractorEstimationOptions(): variance_floor_factor(0.1),
                                        gaussian_min_count(100.0),
                                        diagonalize(true) { }
   void Register(OptionsItf *opts) {
     opts->Register("variance-floor-factor", &variance_floor_factor,
                    "Factor that determines variance flooring (we floor each covar "
                    "to this times global average covariance");
     opts->Register("gaussian-min-count", &gaussian_min_count,
                    "Minimum total count per Gaussian, below which we refuse to "
                    "update any associated parameters.");
     opts->Register("diagonalize", &diagonalize,
                    "If true, diagonalize the quadratic term in the "
                    "objective function. This reorders the ivector dimensions"
                    "from most to least important.");
   }
 };
 
 class IvectorExtractorUpdateProjectionClass;
 class IvectorExtractorUpdateWeightClass;
 
 class IvectorExtractorStats {
  public:
   friend class IvectorExtractor;
 
   IvectorExtractorStats(): tot_auxf_(0.0), R_num_cached_(0), num_ivectors_(0) { }
 
   IvectorExtractorStats(const IvectorExtractor &extractor,
                         const IvectorExtractorStatsOptions &stats_opts);
 
   void Add(const IvectorExtractorStats &other);
 
   void AccStatsForUtterance(const IvectorExtractor &extractor,
                             const MatrixBase<BaseFloat> &feats,
                             const Posterior &post);
 
   // This version (intended mainly for testing) works out the Gaussian
   // posteriors from the model.  Returns total log-like for feats, given
   // unadapted fgmm.  You'd want to add Gaussian pruning and preselection using
   // the diagonal, GMM, for speed, if you used this outside testing code.
   double AccStatsForUtterance(const IvectorExtractor &extractor,
                               const MatrixBase<BaseFloat> &feats,
                               const FullGmm &fgmm);
 
   void Read(std::istream &is, bool binary, bool add = false);
 
   void Write(std::ostream &os, bool binary); // non-const version; relates to cache.
 
   // const version of Write; may use extra memory if we have stuff cached
   void Write(std::ostream &os, bool binary) const;
 
   double Update(const IvectorExtractorEstimationOptions &opts,
                 IvectorExtractor *extractor) const;
 
   double AuxfPerFrame() { return tot_auxf_ / gamma_.Sum(); }
 
   void IvectorVarianceDiagnostic(const IvectorExtractor &extractor);
 
   // Copy constructor.
   explicit IvectorExtractorStats (const IvectorExtractorStats &other);
 
  protected:
   friend class IvectorExtractorUpdateProjectionClass;
   friend class IvectorExtractorUpdateWeightClass;
 
 
   // This is called by AccStatsForUtterance
   void CommitStatsForUtterance(const IvectorExtractor &extractor,
                                const IvectorExtractorUtteranceStats &utt_stats);
 
   void CommitStatsForM(const IvectorExtractor &extractor,
                        const IvectorExtractorUtteranceStats &utt_stats,
                        const VectorBase<double> &ivec_mean,
                        const SpMatrix<double> &ivec_var);
 
   void FlushCache();
 
   void CommitStatsForSigma(const IvectorExtractor &extractor,
                            const IvectorExtractorUtteranceStats &utt_stats);
 
   void CommitStatsForWPoint(const IvectorExtractor &extractor,
                             const IvectorExtractorUtteranceStats &utt_stats,
                             const VectorBase<double> &ivector,
                             double weight);
 
 
   void CommitStatsForW(const IvectorExtractor &extractor,
                        const IvectorExtractorUtteranceStats &utt_stats,
                        const VectorBase<double> &ivec_mean,
                        const SpMatrix<double> &ivec_var);
 
   void CommitStatsForPrior(const VectorBase<double> &ivec_mean,
                            const SpMatrix<double> &ivec_var);
 
   // Updates M.  Returns the objf improvement per frame.
   double UpdateProjections(const IvectorExtractorEstimationOptions &opts,
                            IvectorExtractor *extractor) const;
 
   // This internally called function returns the objf improvement
   // for this Gaussian index.  Updates one M.
   double UpdateProjection(const IvectorExtractorEstimationOptions &opts,
                           int32 gaussian,
                           IvectorExtractor *extractor) const;
 
   // Updates the weight projections.  Returns the objf improvement per
   // frame.
   double UpdateWeights(const IvectorExtractorEstimationOptions &opts,
                        IvectorExtractor *extractor) const;
 
   // Updates the weight projection for one Gaussian index.  Returns the objf
   // improvement for this index.
   double UpdateWeight(const IvectorExtractorEstimationOptions &opts,
                       int32 gaussian,
                       IvectorExtractor *extractor) const;
 
   // Returns the objf improvement per frame.
   double UpdateVariances(const IvectorExtractorEstimationOptions &opts,
                          IvectorExtractor *extractor) const;
 
 
 
   // Updates the prior; returns obj improvement per frame.
   double UpdatePrior(const IvectorExtractorEstimationOptions &opts,
                      IvectorExtractor *extractor) const;
 
   // Called from UpdatePrior, separating out some code that
   // computes likelihood changes.
   double PriorDiagnostics(double old_prior_offset) const;
 
 
   void CheckDims(const IvectorExtractor &extractor) const;
 
   IvectorExtractorStatsOptions config_; 
 
   double tot_auxf_;
 
   std::mutex gamma_Y_lock_;
 
   Vector<double> gamma_;
 
   std::vector<Matrix<double> > Y_;
 
   std::mutex R_lock_;
 
   Matrix<double> R_;
 
   std::mutex R_cache_lock_;
 
   int32 R_num_cached_;
   Matrix<double> R_gamma_cache_;
   Matrix<double> R_ivec_scatter_cache_;
 
   std::mutex weight_stats_lock_;
 
   Matrix<double> Q_;
 
   Matrix<double> G_;
 
   std::mutex variance_stats_lock_;
 
   std::vector< SpMatrix<double> > S_;
 
 
   std::mutex prior_stats_lock_;
 
   double num_ivectors_;
 
   Vector<double> ivector_sum_;
 
   SpMatrix<double> ivector_scatter_;
 
  private:
   void GetOrthogonalIvectorTransform(const SubMatrix<double> &T,
                                      IvectorExtractor *extractor,
                                      Matrix<double> *A) const;
 
   IvectorExtractorStats &operator = (const IvectorExtractorStats &other);  // Disallow.
 };
 
 
 
 }  // namespace kaldi
 
 
 #endif