optimizer/optimizer.cpp - GCC Code Coverage Report

Directory:	src/
Coverage:	low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
	Coverage	Exec / Excl / Total
Lines:	48.0%	217 / 0 / 452
Functions:	56.2%	9 / 0 / 16
Branches:	29.6%	214 / 0 / 724
    optimizer/optimizer.cpp
    
        Line
        Branch
        Exec
        Source
      
        #include "optimizer.h"
      
        #include "ps/local_shm/local_shm_stage_report.h"
      
        #include <algorithm>
      
        #include <cstring>
      
        #include <limits>
      
        namespace {
      
        ✗
        std::vector<uint64_t> CollectReaderKeys(const ParameterCompressReader* reader) {
      
        ✗
          const int size = reader->item_size();
      
        ✗
          std::vector<uint64_t> keys;
      
        ✗
          keys.reserve(size);
      
        ✗
          for (int i = 0; i < size; ++i) {
      
        ✗
            keys.push_back(reader->item(i)->key);
      
          }
      
        ✗
          return keys;
      
        ✗
        }
      
        14
        void ValidateFlatUpdateArgs(const base::ConstArray<uint64_t>& keys,
      
                                    const float* grads,
      
                                    int64_t num_rows,
      
                                    int64_t embedding_dim) {
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.

        14
          if (grads == nullptr) {
      
        ✗
            throw std::runtime_error("UpdateFlat grads pointer is null");
      
          }
      
          2/4✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 14 times.

        14
          if (num_rows < 0 || embedding_dim <= 0) {
      
        ✗
            throw std::runtime_error("UpdateFlat invalid rows/dim");
      
          }
      
          1/2✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.

        14
          if (keys.Size() != static_cast<size_t>(num_rows)) {
      
        ✗
            throw std::runtime_error("UpdateFlat keys size mismatch");
      
          }
      
        14
        }
      
        } // namespace
      
        36
        std::unique_ptr<Optimizer> CreateOptimizer(const json& config) {
      
          1/2✗ Branch 1 not taken.
✓ Branch 2 taken 36 times.

        36
          if (!config.is_object()) {
      
        ✗
            throw std::invalid_argument("cache_ps.optimizer must be an object");
      
          }
      
          1/2✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.

        36
          const std::string type    = config.value("type", "SGD");
      
          1/2✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.

        36
          const float learning_rate = config.value("learning_rate", 0.01f);
      
          3/6✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 36 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 36 times.

        36
          if (!std::isfinite(learning_rate) || learning_rate < 0.0f) {
      
        ✗
            throw std::invalid_argument(
      
        ✗
                "cache_ps.optimizer.learning_rate must be finite and non-negative");
      
          }
      
          2/2✓ Branch 1 taken 30 times.
✓ Branch 2 taken 6 times.

        36
          if (type == "SGD") {
      
          3/6✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 30 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 30 times.
✗ Branch 8 not taken.

        60
            LOG(INFO) << "Configured sparse optimizer: type=SGD learning_rate="
      
          1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.

        30
                      << learning_rate;
      
          1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.

        30
            return std::make_unique<SGD>(learning_rate);
      
          }
      
          2/2✓ Branch 1 taken 2 times.
✓ Branch 2 taken 4 times.

        6
          if (type == "RowWiseAdagrad") {
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            const float epsilon = config.value("epsilon", 1e-10f);
      
          3/6✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.

        2
            if (!std::isfinite(epsilon) || epsilon < 0.0f) {
      
        ✗
              throw std::invalid_argument(
      
        ✗
                  "cache_ps.optimizer.epsilon must be finite and non-negative");
      
            }
      
          2/4✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.

        4
            LOG(INFO) << "Configured sparse optimizer: type=RowWiseAdagrad "
      
          5/10✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 2 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 2 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 2 times.
✗ Branch 14 not taken.

        2
                      << "learning_rate=" << learning_rate << " epsilon=" << epsilon;
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            return std::make_unique<RowWiseAdaGrad>(learning_rate, epsilon);
      
          }
      
          2/2✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.

        4
          if (type == "AdamW") {
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            const float beta1        = config.value("beta1", 0.9f);
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            const float beta2        = config.value("beta2", 0.98f);
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            const float epsilon      = config.value("epsilon", 1e-8f);
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            const float weight_decay = config.value("weight_decay", 0.0f);
      
          2/4✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.

        4
            if (!std::isfinite(beta1) || beta1 < 0.0f || beta1 >= 1.0f ||
      
          5/10✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 2 times.

        4
                !std::isfinite(beta2) || beta2 < 0.0f || beta2 >= 1.0f) {
      
        ✗
              throw std::invalid_argument(
      
        ✗
                  "cache_ps.optimizer AdamW beta1/beta2 must be finite in [0, 1)");
      
            }
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        4
            if (!std::isfinite(epsilon) || epsilon < 0.0f ||
      
          4/8✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.

        4
                !std::isfinite(weight_decay) || weight_decay < 0.0f) {
      
        ✗
              throw std::invalid_argument(
      
                  "cache_ps.optimizer AdamW epsilon/weight_decay must be finite and "
      
        ✗
                  "non-negative");
      
            }
      
          3/6✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 2 times.
✗ Branch 8 not taken.

        4
            LOG(INFO) << "Configured sparse optimizer: type=AdamW learning_rate="
      
          5/10✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 2 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 2 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 2 times.
✗ Branch 14 not taken.

        2
                      << learning_rate << " beta1=" << beta1 << " beta2=" << beta2
      
          4/8✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 2 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 2 times.
✗ Branch 11 not taken.

        2
                      << " epsilon=" << epsilon << " weight_decay=" << weight_decay;
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        4
            return std::make_unique<AdamW>(
      
        2
                learning_rate, beta1, beta2, epsilon, weight_decay);
      
          }
      
          2/4✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 2 times.
✗ Branch 6 not taken.

        2
          throw std::invalid_argument("Unsupported cache_ps.optimizer.type: " + type);
      
        36
        }
      
        24
        void SGD::Init(const std::vector<std::string> table_name,
      
                       const EmbeddingTableConfig& config,
      
                       BaseKV* base_kv) {
      
          4/8✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 24 times.
✗ Branch 6 not taken.
✓ Branch 9 taken 24 times.
✗ Branch 10 not taken.
✓ Branch 12 taken 24 times.
✗ Branch 13 not taken.

        24
          LOG(INFO) << "SGD::Init called with " << table_name.size() << " table(s)";
      
          2/2✓ Branch 5 taken 24 times.
✓ Branch 6 taken 24 times.

        48
          for (const auto& name : table_name) {
      
          5/10✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 24 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 24 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 24 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 24 times.
✗ Branch 14 not taken.

        48
            LOG(INFO) << "  Initializing table: '" << name << "' with shape ["
      
          4/8✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 24 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 24 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 24 times.
✗ Branch 11 not taken.

        24
                      << config.num_embeddings << ", " << config.embedding_dim << "]";
      
          1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.

        24
            SparseTensor* param_tensor  = new SparseTensor();
      
          1/2✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.

        24
            std::vector<uint64_t> shape = {config.num_embeddings, config.embedding_dim};
      
        24
            TAG_TYPE tag                = 0; // PARAMETER tag
      
          1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.

        24
            param_tensor->init(
      
                const_cast<std::string&>(name), PARAMETER, tag, shape, base_kv);
      
          1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.

        24
            tensor_map_[name] = param_tensor;
      
        24
          }
      
          3/6✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 24 times.
✗ Branch 6 not taken.
✓ Branch 9 taken 24 times.
✗ Branch 10 not taken.

        48
          LOG(INFO) << "SGD::Init completed. tensor_map_ now has " << tensor_map_.size()
      
          1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.

        24
                    << " entries";
      
        24
        }
      
        ✗
        void SGD::Update(
      
            std::string table, const ParameterCompressReader* reader, unsigned tid) {
      
        ✗
          auto it = tensor_map_.find(table);
      
        ✗
          if (it == tensor_map_.end()) {
      
        ✗
            LOG(ERROR) << "Table not found in SGD optimizer: '" << table << "'";
      
        ✗
            throw std::runtime_error("Table not found: " + table);
      
          }
      
        ✗
          int size                   = reader->item_size();
      
        ✗
          std::vector<uint64_t> keys = CollectReaderKeys(reader);
      
        ✗
          std::vector<base::ConstArray<float>> current_values;
      
        ✗
          it->second->BatchGet(keys, &current_values, tid);
      
        ✗
          for (int i = 0; i < size; ++i) {
      
        ✗
            const auto* item = reader->item(i);
      
        ✗
            if (current_values[i].Size() == 0) {
      
              // If key not found, we fallback to Put to initialize it
      
        ✗
              std::vector<float> zero_init(item->dim, 0.0f);
      
        ✗
              for (int j = 0; j < item->dim; ++j) {
      
        ✗
                zero_init[j] = -learning_rate_ * item->data()[j];
      
              }
      
              std::string val_str(
      
        ✗
                  (char*)zero_init.data(), zero_init.size() * sizeof(float));
      
        ✗
              it->second->Put(item->key, val_str, tid);
      
        ✗
              continue;
      
        ✗
            }
      
        ✗
            float* data = const_cast<float*>(current_values[i].Data());
      
        ✗
            int dim     = std::min(current_values[i].Size(), item->dim);
      
        ✗
        #pragma omp simd
      
            for (int j = 0; j < dim; ++j) {
      
        ✗
              data[j] -= learning_rate_ * item->data()[j];
      
            }
      
          }
      
        ✗
        }
      
        6
        void SGD::UpdateFlat(
      
            std::string table,
      
            const base::ConstArray<uint64_t>& keys,
      
            const float* grads,
      
            int64_t num_rows,
      
            int64_t embedding_dim,
      
            unsigned tid) {
      
          1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

        6
          ValidateFlatUpdateArgs(keys, grads, num_rows, embedding_dim);
      
          1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

        6
          auto it = tensor_map_.find(table);
      
          1/2✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.

        6
          if (it == tensor_map_.end()) {
      
        ✗
            LOG(ERROR) << "Table not found in SGD optimizer: '" << table << "'";
      
        ✗
            throw std::runtime_error("Table not found: " + table);
      
          }
      
          2/4✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 6 times.

        6
          if (it->second->EmbeddingDim() != embedding_dim) {
      
        ✗
            throw std::runtime_error(
      
        ✗
                "SGD::UpdateFlat embedding_dim mismatch for table " + table);
      
          }
      
        6
          const auto direct_update_start = std::chrono::steady_clock::now();
      
          2/4✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 6 times.
✗ Branch 5 not taken.

        6
          if (it->second->ApplySgdUpdateFlat(
      
                  keys, grads, num_rows, embedding_dim, learning_rate_, tid)) {
      
          1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

        6
            recstore::ReportLocalShmStageMetric(
      
                "sgd_update_direct_us",
      
                recstore::LocalShmElapsedUs(direct_update_start));
      
        6
            return;
      
          }
      
        ✗
          std::vector<uint64_t> key_vec(keys.Data(), keys.Data() + keys.Size());
      
        ✗
          const auto batch_get_start = std::chrono::steady_clock::now();
      
        ✗
          std::vector<base::ConstArray<float>> current_values;
      
        ✗
          it->second->BatchGet(key_vec, &current_values, tid);
      
        ✗
          recstore::ReportLocalShmStageMetric(
      
              "sgd_update_batch_get_us", recstore::LocalShmElapsedUs(batch_get_start));
      
        ✗
          const auto apply_start = std::chrono::steady_clock::now();
      
        ✗
          int64_t missing_rows   = 0;
      
        ✗
          for (int64_t row = 0; row < num_rows; ++row) {
      
        ✗
            const float* row_grad = grads + row * embedding_dim;
      
        ✗
            const auto& current   = current_values[static_cast<size_t>(row)];
      
        ✗
            if (current.Size() == 0) {
      
        ✗
              ++missing_rows;
      
        ✗
              std::vector<float> zero_init(static_cast<size_t>(embedding_dim), 0.0f);
      
        ✗
              for (int64_t col = 0; col < embedding_dim; ++col) {
      
        ✗
                zero_init[static_cast<size_t>(col)] = -learning_rate_ * row_grad[col];
      
              }
      
        ✗
              std::string val_str(reinterpret_cast<char*>(zero_init.data()),
      
        ✗
                                  zero_init.size() * sizeof(float));
      
        ✗
              it->second->Put(keys[static_cast<size_t>(row)], val_str, tid);
      
        ✗
              continue;
      
        ✗
            }
      
        ✗
            if (static_cast<int64_t>(current.Size()) != embedding_dim) {
      
        ✗
              throw std::runtime_error(
      
        ✗
                  "SGD::UpdateFlat embedding_dim mismatch for table " + table);
      
            }
      
        ✗
            float* data = const_cast<float*>(current.Data());
      
        ✗
        #pragma omp simd
      
            for (int64_t col = 0; col < embedding_dim; ++col) {
      
        ✗
              data[col] -= learning_rate_ * row_grad[col];
      
            }
      
          }
      
        ✗
          recstore::ReportLocalShmStageMetric(
      
              "sgd_update_apply_us", recstore::LocalShmElapsedUs(apply_start));
      
        ✗
          recstore::ReportLocalShmStageMetric(
      
              "sgd_update_missing_rows", static_cast<double>(missing_rows));
      
        ✗
        }
      
        ✗
        void AdaGrad::Init(const std::vector<std::string> table_name,
      
                           const EmbeddingTableConfig& config,
      
                           BaseKV* base_kv) {
      
        ✗
          for (const auto& name : table_name) {
      
        ✗
            SparseTensor* param_tensor  = new SparseTensor();
      
        ✗
            std::vector<uint64_t> shape = {config.num_embeddings, config.embedding_dim};
      
        ✗
            TAG_TYPE tag                = 0;
      
        ✗
            param_tensor->init(
      
                const_cast<std::string&>(name), PARAMETER, tag, shape, base_kv);
      
        ✗
            tensor_map_[name] = param_tensor;
      
        ✗
            std::string acc_table_name = name + "_accumulated_grad";
      
        ✗
            SparseTensor* acc_tensor   = new SparseTensor();
      
        ✗
            acc_tensor->init(
      
                const_cast<std::string&>(acc_table_name),
      
                MOMENT_1,
      
                tag,
      
                shape,
      
                base_kv);
      
        ✗
            tensor_map_[acc_table_name] = acc_tensor;
      
        ✗
          }
      
        ✗
        }
      
        ✗
        void AdaGrad::Update(
      
            std::string table, const ParameterCompressReader* reader, unsigned tid) {
      
        ✗
          auto param_it = tensor_map_.find(table);
      
        ✗
          if (param_it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error("Table not found: " + table);
      
          }
      
        ✗
          std::string acc_table = table + "_accumulated_grad";
      
        ✗
          auto acc_it           = tensor_map_.find(acc_table);
      
        ✗
          if (acc_it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error(
      
        ✗
                "Accumulated gradient table not found: " + acc_table);
      
          }
      
        ✗
          int size                   = reader->item_size();
      
        ✗
          std::vector<uint64_t> keys = CollectReaderKeys(reader);
      
        ✗
          std::vector<base::ConstArray<float>> current_values;
      
        ✗
          std::vector<base::ConstArray<float>> acc_values;
      
        ✗
          param_it->second->BatchGet(keys, &current_values, tid);
      
        ✗
          acc_it->second->BatchGet(keys, &acc_values, tid);
      
        ✗
          for (int i = 0; i < size; ++i) {
      
        ✗
            const auto* item = reader->item(i);
      
        ✗
            if (current_values[i].Size() == 0 || acc_values[i].Size() == 0) {
      
              // Fallback to sequential initialization if not found
      
              // (This is rare in training but kept for robustness)
      
        ✗
              continue;
      
            }
      
        ✗
            float* param_data = const_cast<float*>(current_values[i].Data());
      
        ✗
            float* acc_data   = const_cast<float*>(acc_values[i].Data());
      
        ✗
            int dim           = std::min(current_values[i].Size(), item->dim);
      
        ✗
        #pragma omp simd
      
            for (int j = 0; j < dim; ++j) {
      
        ✗
              acc_data[j] += item->data()[j] * item->data()[j];
      
        ✗
              float adaptive_lr = learning_rate_ / (std::sqrt(acc_data[j]) + epsilon_);
      
        ✗
              param_data[j] -= adaptive_lr * item->data()[j];
      
            }
      
          }
      
        ✗
        }
      
        ✗
        void AdaGrad::UpdateFlat(
      
            std::string table,
      
            const base::ConstArray<uint64_t>& keys,
      
            const float* grads,
      
            int64_t num_rows,
      
            int64_t embedding_dim,
      
            unsigned tid) {
      
        ✗
          ValidateFlatUpdateArgs(keys, grads, num_rows, embedding_dim);
      
        ✗
          auto param_it = tensor_map_.find(table);
      
        ✗
          if (param_it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error("Table not found: " + table);
      
          }
      
        ✗
          std::string acc_table = table + "_accumulated_grad";
      
        ✗
          auto acc_it           = tensor_map_.find(acc_table);
      
        ✗
          if (acc_it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error(
      
        ✗
                "Accumulated gradient table not found: " + acc_table);
      
          }
      
        ✗
          std::vector<uint64_t> key_vec(keys.Data(), keys.Data() + keys.Size());
      
        ✗
          std::vector<base::ConstArray<float>> current_values;
      
        ✗
          std::vector<base::ConstArray<float>> acc_values;
      
        ✗
          param_it->second->BatchGet(key_vec, &current_values, tid);
      
        ✗
          acc_it->second->BatchGet(key_vec, &acc_values, tid);
      
        ✗
          for (int64_t row = 0; row < num_rows; ++row) {
      
        ✗
            const auto& current = current_values[static_cast<size_t>(row)];
      
        ✗
            const auto& acc     = acc_values[static_cast<size_t>(row)];
      
        ✗
            if (current.Size() == 0 || acc.Size() == 0) {
      
        ✗
              continue;
      
            }
      
        ✗
            if (static_cast<int64_t>(current.Size()) != embedding_dim ||
      
        ✗
                static_cast<int64_t>(acc.Size()) != embedding_dim) {
      
        ✗
              throw std::runtime_error(
      
        ✗
                  "AdaGrad::UpdateFlat embedding_dim mismatch for table " + table);
      
            }
      
        ✗
            const float* row_grad = grads + row * embedding_dim;
      
        ✗
            float* param_data     = const_cast<float*>(current.Data());
      
        ✗
            float* acc_data       = const_cast<float*>(acc.Data());
      
        ✗
        #pragma omp simd
      
            for (int64_t col = 0; col < embedding_dim; ++col) {
      
        ✗
              acc_data[col] += row_grad[col] * row_grad[col];
      
              float adaptive_lr =
      
        ✗
                  learning_rate_ / (std::sqrt(acc_data[col]) + epsilon_);
      
        ✗
              param_data[col] -= adaptive_lr * row_grad[col];
      
            }
      
          }
      
        ✗
        }
      
        2
        void RowWiseAdaGrad::Init(const std::vector<std::string> table_name,
      
                                  const EmbeddingTableConfig& config,
      
                                  BaseKV* base_kv) {
      
          2/2✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.

        4
          for (const auto& name : table_name) {
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            SparseTensor* param_tensor  = new SparseTensor();
      
          1/2✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.

        2
            std::vector<uint64_t> shape = {config.num_embeddings, config.embedding_dim};
      
        2
            TAG_TYPE tag                = 0;
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            param_tensor->init(
      
                const_cast<std::string&>(name), PARAMETER, tag, shape, base_kv);
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            tensor_map_[name] = param_tensor;
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            std::string acc_table_name      = name + "_rowwise_accumulated_grad";
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            SparseTensor* acc_tensor        = new SparseTensor();
      
            std::vector<uint64_t> acc_shape = {
      
          1/2✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.

        2
                config.num_embeddings, 1}; // One value per row
      
        2
            TAG_TYPE acc_tag = static_cast<TAG_TYPE>(MOMENT_1);
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            acc_tensor->init(
      
                const_cast<std::string&>(acc_table_name),
      
                MOMENT_1,
      
                acc_tag,
      
                acc_shape,
      
                base_kv);
      
          1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

        2
            tensor_map_[acc_table_name] = acc_tensor;
      
        2
          }
      
        2
        }
      
        ✗
        void RowWiseAdaGrad::Update(
      
            std::string table, const ParameterCompressReader* reader, unsigned tid) {
      
        ✗
          auto param_it = tensor_map_.find(table);
      
        ✗
          if (param_it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error("Table not found: " + table);
      
          }
      
        ✗
          std::string acc_table = table + "_rowwise_accumulated_grad";
      
        ✗
          auto acc_it           = tensor_map_.find(acc_table);
      
        ✗
          if (acc_it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error(
      
        ✗
                "Row-wise accumulated gradient table not found: " + acc_table);
      
          }
      
        ✗
          int size                   = reader->item_size();
      
        ✗
          std::vector<uint64_t> keys = CollectReaderKeys(reader);
      
        ✗
          std::vector<base::ConstArray<float>> current_values;
      
        ✗
          std::vector<base::ConstArray<float>> acc_values;
      
        ✗
          param_it->second->BatchGet(keys, &current_values, tid);
      
        ✗
          acc_it->second->BatchGet(keys, &acc_values, tid);
      
        ✗
          for (int i = 0; i < size; ++i) {
      
        ✗
            const auto* item           = reader->item(i);
      
        ✗
            const auto& current        = current_values[static_cast<size_t>(i)];
      
        ✗
            const auto& acc            = acc_values[static_cast<size_t>(i)];
      
        ✗
            const int64_t expected_dim = param_it->second->EmbeddingDim();
      
        ✗
            if (item->dim != expected_dim ||
      
        ✗
                (current.Size() != 0 && current.Size() != expected_dim) ||
      
        ✗
                (acc.Size() != 0 && acc.Size() != 1)) {
      
        ✗
              throw std::runtime_error(
      
        ✗
                  "RowWiseAdaGrad::Update embedding_dim mismatch for table " + table);
      
            }
      
        ✗
            const int dim = item->dim;
      
        ✗
            float grad_square_mean = 0.0;
      
        ✗
        #pragma omp simd reduction(+ : grad_square_mean)
      
            for (int j = 0; j < dim; ++j) {
      
        ✗
              grad_square_mean += item->data()[j] * item->data()[j];
      
            }
      
        ✗
            grad_square_mean /= dim;
      
        ✗
            float accumulated_grad = acc.Size() == 0 ? 0.0f : acc.Data()[0];
      
        ✗
            accumulated_grad += grad_square_mean;
      
            const float adaptive_lr =
      
        ✗
                learning_rate_ / (std::sqrt(accumulated_grad) + epsilon_);
      
        ✗
            if (current.Size() == 0) {
      
        ✗
              std::vector<float> initial_value(static_cast<size_t>(dim), 0.0f);
      
        ✗
              for (int j = 0; j < dim; ++j) {
      
        ✗
                initial_value[static_cast<size_t>(j)] = -adaptive_lr * item->data()[j];
      
              }
      
              const std::string value(
      
        ✗
                  reinterpret_cast<const char*>(initial_value.data()),
      
        ✗
                  initial_value.size() * sizeof(float));
      
        ✗
              param_it->second->Put(item->key, value, tid);
      
        ✗
            } else {
      
        ✗
              float* param_data = const_cast<float*>(current.Data());
      
        ✗
        #pragma omp simd
      
              for (int j = 0; j < dim; ++j) {
      
        ✗
                param_data[j] -= adaptive_lr * item->data()[j];
      
              }
      
            }
      
        ✗
            if (acc.Size() == 0) {
      
              const std::string value(
      
        ✗
                  reinterpret_cast<const char*>(&accumulated_grad), sizeof(float));
      
        ✗
              acc_it->second->Put(item->key, value, tid);
      
        ✗
            } else {
      
        ✗
              const_cast<float*>(acc.Data())[0] = accumulated_grad;
      
            }
      
          }
      
        ✗
        }
      
        4
        void RowWiseAdaGrad::UpdateFlat(
      
            std::string table,
      
            const base::ConstArray<uint64_t>& keys,
      
            const float* grads,
      
            int64_t num_rows,
      
            int64_t embedding_dim,
      
            unsigned tid) {
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
          ValidateFlatUpdateArgs(keys, grads, num_rows, embedding_dim);
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
          auto param_it = tensor_map_.find(table);
      
          1/2✗ Branch 2 not taken.
✓ Branch 3 taken 4 times.

        4
          if (param_it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error("Table not found: " + table);
      
          }
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
          std::string acc_table = table + "_rowwise_accumulated_grad";
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
          auto acc_it           = tensor_map_.find(acc_table);
      
          1/2✗ Branch 2 not taken.
✓ Branch 3 taken 4 times.

        4
          if (acc_it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error(
      
        ✗
                "Row-wise accumulated gradient table not found: " + acc_table);
      
          }
      
          1/2✓ Branch 5 taken 4 times.
✗ Branch 6 not taken.

        4
          std::vector<uint64_t> key_vec(keys.Data(), keys.Data() + keys.Size());
      
        4
          std::vector<base::ConstArray<float>> current_values;
      
        4
          std::vector<base::ConstArray<float>> acc_values;
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
          param_it->second->BatchGet(key_vec, &current_values, tid);
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
          acc_it->second->BatchGet(key_vec, &acc_values, tid);
      
          2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

        8
          for (int64_t row = 0; row < num_rows; ++row) {
      
        4
            const auto& current = current_values[static_cast<size_t>(row)];
      
        4
            const auto& acc     = acc_values[static_cast<size_t>(row)];
      
        4
            if ((current.Size() != 0 &&
      
          6/8✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 4 times.

        10
                 static_cast<int64_t>(current.Size()) != embedding_dim) ||
      
          1/2✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.

        6
                (acc.Size() != 0 && acc.Size() != 1)) {
      
        ✗
              throw std::runtime_error(
      
        ✗
                  "RowWiseAdaGrad::UpdateFlat embedding_dim mismatch for table " +
      
        ✗
                  table);
      
            }
      
        4
            const float* row_grad  = grads + row * embedding_dim;
      
        4
            float grad_square_mean = 0.0f;
      
        4
        #pragma omp simd reduction(+ : grad_square_mean)
      
            for (int64_t col = 0; col < embedding_dim; ++col) {
      
        8
              grad_square_mean += row_grad[col] * row_grad[col];
      
            }
      
        4
            grad_square_mean /= static_cast<float>(embedding_dim);
      
          2/2✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.

        4
            float accumulated_grad = acc.Size() == 0 ? 0.0f : acc.Data()[0];
      
        4
            accumulated_grad += grad_square_mean;
      
            const float adaptive_lr =
      
        4
                learning_rate_ / (std::sqrt(accumulated_grad) + epsilon_);
      
          2/2✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.

        4
            if (current.Size() == 0) {
      
              std::vector<float> initial_value(
      
          1/2✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.

        2
                  static_cast<size_t>(embedding_dim), 0.0f);
      
          2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.

        6
              for (int64_t col = 0; col < embedding_dim; ++col) {
      
        4
                initial_value[static_cast<size_t>(col)] = -adaptive_lr * row_grad[col];
      
              }
      
              const std::string value(
      
        2
                  reinterpret_cast<const char*>(initial_value.data()),
      
          1/2✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.

        2
                  initial_value.size() * sizeof(float));
      
          1/2✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.

        2
              param_it->second->Put(keys[static_cast<size_t>(row)], value, tid);
      
        2
            } else {
      
        2
              float* param_data = const_cast<float*>(current.Data());
      
        2
        #pragma omp simd
      
              for (int64_t col = 0; col < embedding_dim; ++col) {
      
        4
                param_data[col] -= adaptive_lr * row_grad[col];
      
              }
      
            }
      
          2/2✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.

        4
            if (acc.Size() == 0) {
      
              const std::string value(
      
          1/2✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.

        2
                  reinterpret_cast<const char*>(&accumulated_grad), sizeof(float));
      
          1/2✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.

        2
              acc_it->second->Put(keys[static_cast<size_t>(row)], value, tid);
      
        2
            } else {
      
        2
              const_cast<float*>(acc.Data())[0] = accumulated_grad;
      
            }
      
          }
      
        4
        }
      
        namespace {
      
        // The step is stored as a tagged scalar in its own table.  Keep its key away
      
        // from normal embedding ids (the top 8 bits are reserved for TensorType).
      
        constexpr uint64_t kAdamWStepKey = (std::numeric_limits<uint64_t>::max() >> 8);
      
        } // namespace
      
        4
        void AdamW::Init(const std::vector<std::string> table_name,
      
                         const EmbeddingTableConfig& config,
      
                         BaseKV* base_kv) {
      
          2/2✓ Branch 5 taken 4 times.
✓ Branch 6 taken 4 times.

        8
          for (const auto& name : table_name) {
      
            const std::vector<uint64_t> shape = {
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
                config.num_embeddings, config.embedding_dim};
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            auto* param_tensor = new SparseTensor();
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            auto mutable_name  = name;
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            auto mutable_shape = shape;
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            param_tensor->init(
      
                mutable_name, PARAMETER, PARAMETER, mutable_shape, base_kv);
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            tensor_map_[name] = param_tensor;
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            auto* first_moment           = new SparseTensor();
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            const std::string first_name = name + "_adamw_m";
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            auto mutable_first_name      = first_name;
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            first_moment->init(
      
                mutable_first_name, MOMENT_1, MOMENT_1, mutable_shape, base_kv);
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            tensor_map_[first_name] = first_moment;
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            auto* second_moment           = new SparseTensor();
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            const std::string second_name = name + "_adamw_v";
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            auto mutable_second_name      = second_name;
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            second_moment->init(
      
                mutable_second_name, MOMENT_2, MOMENT_2, mutable_shape, base_kv);
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            tensor_map_[second_name] = second_moment;
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            auto* step_tensor                = new SparseTensor();
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            const std::string step_name      = name + "_adamw_step";
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            auto mutable_step_name           = step_name;
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
            std::vector<uint64_t> step_shape = {1, 1};
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            step_tensor->init(
      
                mutable_step_name, MOMENT_1, MOMENT_1, step_shape, base_kv);
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
            tensor_map_[step_name] = step_tensor;
      
        4
          }
      
        4
        }
      
        ✗
        void AdamW::Update(
      
            std::string table, const ParameterCompressReader* reader, unsigned tid) {
      
        ✗
          auto param_it = tensor_map_.find(table);
      
        ✗
          if (param_it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error("Table not found: " + table);
      
          }
      
        ✗
          const int size    = reader->item_size();
      
        ✗
          const int64_t dim = param_it->second->EmbeddingDim();
      
        ✗
          std::vector<uint64_t> keys;
      
        ✗
          std::vector<float> grads;
      
        ✗
          keys.reserve(size);
      
        ✗
          grads.reserve(static_cast<size_t>(size) * static_cast<size_t>(dim));
      
        ✗
          for (int i = 0; i < size; ++i) {
      
        ✗
            const auto* item = reader->item(i);
      
        ✗
            if (item->dim != dim) {
      
        ✗
              throw std::runtime_error(
      
        ✗
                  "AdamW::Update embedding_dim mismatch for table " + table);
      
            }
      
        ✗
            keys.push_back(item->key);
      
        ✗
            grads.insert(grads.end(), item->data(), item->data() + dim);
      
          }
      
        ✗
          UpdateRows(table, keys.data(), grads.data(), size, dim, tid);
      
        ✗
        }
      
        4
        void AdamW::UpdateFlat(
      
            std::string table,
      
            const base::ConstArray<uint64_t>& keys,
      
            const float* grads,
      
            int64_t num_rows,
      
            int64_t embedding_dim,
      
            unsigned tid) {
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
          ValidateFlatUpdateArgs(keys, grads, num_rows, embedding_dim);
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
          auto it = tensor_map_.find(table);
      
          1/2✗ Branch 2 not taken.
✓ Branch 3 taken 4 times.

        4
          if (it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error("Table not found: " + table);
      
          }
      
          2/4✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 4 times.

        4
          if (it->second->EmbeddingDim() != embedding_dim) {
      
        ✗
            throw std::runtime_error(
      
        ✗
                "AdamW::UpdateFlat embedding_dim mismatch for table " + table);
      
          }
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
          UpdateRows(table, keys.Data(), grads, num_rows, embedding_dim, tid);
      
        4
        }
      
        4
        void AdamW::UpdateRows(
      
            const std::string& table,
      
            const uint64_t* keys,
      
            const float* grads,
      
            int64_t num_rows,
      
            int64_t embedding_dim,
      
            unsigned tid) {
      
          1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

        4
          auto param_it  = tensor_map_.find(table);
      
          2/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.

        4
          auto first_it  = tensor_map_.find(table + "_adamw_m");
      
          2/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.

        4
          auto second_it = tensor_map_.find(table + "_adamw_v");
      
          2/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.

        4
          auto step_it   = tensor_map_.find(table + "_adamw_step");
      
          2/4✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 4 times.
✗ Branch 7 not taken.

        12
          if (param_it == tensor_map_.end() || first_it == tensor_map_.end() ||
      
          3/6✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 4 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 4 times.

        12
              second_it == tensor_map_.end() || step_it == tensor_map_.end()) {
      
        ✗
            throw std::runtime_error("AdamW state table not found for table " + table);
      
          }
      
        4
          std::string step_value;
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
          step_it->second->Get(kAdamWStepKey, step_value, tid);
      
          float step =
      
          2/2✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.

        4
              step_value.empty() ? 0.0f : base::ConstArray<float>(step_value).Data()[0];
      
        4
          step += 1.0f;
      
        4
          const float bias1 = 1.0f - std::pow(beta1_, step);
      
        4
          const float bias2 = 1.0f - std::pow(beta2_, step);
      
          2/4✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 4 times.

        4
          if (!(bias1 > 0.0f) || !(bias2 > 0.0f)) {
      
        ✗
            throw std::runtime_error("AdamW bias correction underflow");
      
          }
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
          std::vector<uint64_t> key_vec(keys, keys + num_rows);
      
        4
          std::vector<base::ConstArray<float>> params;
      
        4
          std::vector<base::ConstArray<float>> first;
      
        4
          std::vector<base::ConstArray<float>> second;
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
          param_it->second->BatchGet(key_vec, &params, tid);
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
          first_it->second->BatchGet(key_vec, &first, tid);
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
          second_it->second->BatchGet(key_vec, &second, tid);
      
        4
          const float decay       = 1.0f - learning_rate_ * weight_decay_;
      
        4
          const float correction1 = 1.0f / bias1;
      
        4
          const float correction2 = 1.0f / bias2;
      
          2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

        8
          for (int64_t row = 0; row < num_rows; ++row) {
      
        4
            const auto index      = static_cast<size_t>(row);
      
        4
            const float* row_grad = grads + row * embedding_dim;
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
            std::vector<float> param(static_cast<size_t>(embedding_dim), 0.0f);
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
            std::vector<float> moment1(static_cast<size_t>(embedding_dim), 0.0f);
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
            std::vector<float> moment2(static_cast<size_t>(embedding_dim), 0.0f);
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
            if (params[index].Size() != 0) {
      
          1/2✗ Branch 2 not taken.
✓ Branch 3 taken 4 times.

        4
              if (params[index].Size() != embedding_dim) {
      
        ✗
                throw std::runtime_error(
      
        ✗
                    "AdamW parameter dimension mismatch for table " + table);
      
              }
      
          1/2✓ Branch 3 taken 4 times.
✗ Branch 4 not taken.

        8
              std::copy(params[index].Data(),
      
        4
                        params[index].Data() + embedding_dim,
      
                        param.begin());
      
            }
      
          2/2✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.

        4
            if (first[index].Size() != 0) {
      
          1/2✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.

        2
              if (first[index].Size() != embedding_dim) {
      
        ✗
                throw std::runtime_error(
      
        ✗
                    "AdamW first moment dimension mismatch for table " + table);
      
              }
      
          1/2✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.

        4
              std::copy(first[index].Data(),
      
        2
                        first[index].Data() + embedding_dim,
      
                        moment1.begin());
      
            }
      
          2/2✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.

        4
            if (second[index].Size() != 0) {
      
          1/2✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.

        2
              if (second[index].Size() != embedding_dim) {
      
        ✗
                throw std::runtime_error(
      
        ✗
                    "AdamW second moment dimension mismatch for table " + table);
      
              }
      
          1/2✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.

        4
              std::copy(second[index].Data(),
      
        2
                        second[index].Data() + embedding_dim,
      
                        moment2.begin());
      
            }
      
          2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.

        12
            for (int64_t col = 0; col < embedding_dim; ++col) {
      
        8
              const float grad = row_grad[col];
      
        16
              moment1[static_cast<size_t>(col)] =
      
        8
                  beta1_ * moment1[static_cast<size_t>(col)] + (1.0f - beta1_) * grad;
      
        16
              moment2[static_cast<size_t>(col)] =
      
        8
                  beta2_ * moment2[static_cast<size_t>(col)] +
      
        8
                  (1.0f - beta2_) * grad * grad;
      
        8
              const float m_hat = moment1[static_cast<size_t>(col)] * correction1;
      
        8
              const float v_hat = moment2[static_cast<size_t>(col)] * correction2;
      
        8
              param[static_cast<size_t>(col)] =
      
        8
                  decay * param[static_cast<size_t>(col)] -
      
        8
                  learning_rate_ * m_hat / (std::sqrt(v_hat) + epsilon_);
      
            }
      
        4
            const std::string param_value(reinterpret_cast<const char*>(param.data()),
      
          1/2✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.

        4
                                          param.size() * sizeof(float));
      
        4
            const std::string first_value(reinterpret_cast<const char*>(moment1.data()),
      
          1/2✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.

        4
                                          moment1.size() * sizeof(float));
      
            const std::string second_value(
      
        4
                reinterpret_cast<const char*>(moment2.data()),
      
          1/2✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.

        4
                moment2.size() * sizeof(float));
      
          1/2✓ Branch 3 taken 4 times.
✗ Branch 4 not taken.

        4
            param_it->second->Put(keys[index], param_value, tid);
      
          1/2✓ Branch 3 taken 4 times.
✗ Branch 4 not taken.

        4
            first_it->second->Put(keys[index], first_value, tid);
      
          1/2✓ Branch 3 taken 4 times.
✗ Branch 4 not taken.

        4
            second_it->second->Put(keys[index], second_value, tid);
      
        4
          }
      
          const std::string next_step(
      
          1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

        4
              reinterpret_cast<const char*>(&step), sizeof(float));
      
          1/2✓ Branch 3 taken 4 times.
✗ Branch 4 not taken.

        4
          step_it->second->Put(kAdamWStepKey, next_step, tid);
      
        4
        }