// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include /** Refer to https://stackoverflow.com/questions/9983239/how-to-generate-zipf-distributed-numbers-efficiently * Zipf-like random distribution. * * "Rejection-inversion to generate variates from monotone discrete * distributions", Wolfgang Hörmann and Gerhard Derflinger * ACM TOMACS 6.3 (1996): 169-184 */ template class zipf_distribution { public: typedef RealType input_type; typedef IntType result_type; static_assert(std::numeric_limits::is_integer, ""); static_assert(!std::numeric_limits::is_integer, ""); zipf_distribution(const IntType n = std::numeric_limits::max(), const RealType q = 1.0) : n(n), q(q), H_x1(H(1.5) - 1.0), H_n(H(n + 0.5)), dist(H_x1, H_n) {} IntType operator()(std::mt19937& rng) { while (true) { const RealType u = dist(rng); const RealType x = H_inv(u); const IntType k = clamp(std::round(x), 1, n); if (u >= H(k + 0.5) - h(k)) { return k; } } } private: /** Clamp x to [min, max]. */ template static constexpr T clamp(const T x, const T min, const T max) { return std::max(min, std::min(max, x)); } /** exp(x) - 1 / x */ static double expxm1bx(const double x) { return (std::abs(x) > epsilon) ? std::expm1(x) / x : (1.0 + x / 2.0 * (1.0 + x / 3.0 * (1.0 + x / 4.0))); } /** H(x) = log(x) if q == 1, (x^(1-q) - 1)/(1 - q) otherwise. * H(x) is an integral of h(x). * * Note the numerator is one less than in the paper order to work with all * positive q. */ const RealType H(const RealType x) { const RealType log_x = std::log(x); return expxm1bx((1.0 - q) * log_x) * log_x; } /** log(1 + x) / x */ static RealType log1pxbx(const RealType x) { return (std::abs(x) > epsilon) ? std::log1p(x) / x : 1.0 - x * ((1 / 2.0) - x * ((1 / 3.0) - x * (1 / 4.0))); } /** The inverse function of H(x) */ const RealType H_inv(const RealType x) { const RealType t = std::max(-1.0, x * (1.0 - q)); return std::exp(log1pxbx(t) * x); } /** That hat function h(x) = 1 / (x ^ q) */ const RealType h(const RealType x) { return std::exp(-q * std::log(x)); } static constexpr RealType epsilon = 1e-8; IntType n; ///< Number of elements RealType q; ///< Exponent RealType H_x1; ///< H(x_1) RealType H_n; ///< H(n) std::uniform_real_distribution dist; ///< [H(x_1), H(n)] };