From 1788acbc473d042d3b6cb00714fcb60f96ec1d31 Mon Sep 17 00:00:00 2001 From: Alexey Kopytov Date: Wed, 21 Jun 2017 22:21:07 +0300 Subject: [PATCH] Merge ConcurrencyKit up to commit b87563b. Fix busybox compilation (gh#148) by merging the upstream fix. --- .../concurrency_kit/ck/build/ck.build.s390x | 1 + third_party/concurrency_kit/ck/configure | 27 +- .../concurrency_kit/ck/include/ck_epoch.h | 95 ++++- .../concurrency_kit/ck/include/ck_pr.h | 2 + .../ck/include/gcc/s390x/ck_f_pr.h | 97 +++++ .../ck/include/gcc/s390x/ck_pr.h | 373 ++++++++++++++++++ .../concurrency_kit/ck/include/spinlock/dec.h | 3 +- .../ck_epoch/validate/ck_epoch_call.c | 16 +- .../ck_epoch/validate/ck_epoch_poll.c | 6 +- .../ck_epoch/validate/ck_epoch_section.c | 9 +- .../ck_epoch/validate/ck_epoch_section_2.c | 4 +- .../ck_epoch/validate/ck_epoch_synchronize.c | 6 +- .../regressions/ck_epoch/validate/ck_stack.c | 4 +- .../regressions/ck_epoch/validate/torture.c | 28 +- .../ck_hs/benchmark/parallel_bytestring.c | 12 +- .../ck_ht/benchmark/parallel_bytestring.c | 12 +- .../ck_ht/benchmark/parallel_direct.c | 12 +- .../ck_rhs/benchmark/parallel_bytestring.c | 12 +- third_party/concurrency_kit/ck/src/ck_epoch.c | 122 ++++-- .../concurrency_kit/ck/src/ck_ht_hash.h | 8 + 20 files changed, 736 insertions(+), 113 deletions(-) create mode 100644 third_party/concurrency_kit/ck/build/ck.build.s390x create mode 100644 third_party/concurrency_kit/ck/include/gcc/s390x/ck_f_pr.h create mode 100644 third_party/concurrency_kit/ck/include/gcc/s390x/ck_pr.h diff --git a/third_party/concurrency_kit/ck/build/ck.build.s390x b/third_party/concurrency_kit/ck/build/ck.build.s390x new file mode 100644 index 0000000..2a91187 --- /dev/null +++ b/third_party/concurrency_kit/ck/build/ck.build.s390x @@ -0,0 +1 @@ +CFLAGS+=-O2 -D__s390x__ diff --git a/third_party/concurrency_kit/ck/configure b/third_party/concurrency_kit/ck/configure index e840d41..9d2fe83 100755 --- a/third_party/concurrency_kit/ck/configure +++ b/third_party/concurrency_kit/ck/configure @@ -34,7 +34,7 @@ WANT_PIC=yes P_PWD=`pwd` MAINTAINER='sbahra@repnop.org' -VERSION=${VERSION:-'0.6.0'} +VERSION=${VERSION:-'1.0.0'} VERSION_MAJOR='0' BUILD="$PWD/build/ck.build" PREFIX=${PREFIX:-"/usr/local"} @@ -169,7 +169,8 @@ generate_stdout() for option; do case "$option" in *=?*) - value=`expr -- "$option" : '[^=]*=\(.*\)'` + optname=`echo $option|cut -c 3-` + value=`expr "$optname" : '[^=]*=\(.*\)'` ;; *=) value= @@ -294,7 +295,8 @@ for option; do fi ;; *=*) - NAME=`expr -- "$option" : '\([^=]*\)='` + optname=`echo $option|cut -c 3-` + NAME=`expr "$optname" : '\([^=]*\)='` eval "$NAME='$value'" export $NAME ;; @@ -347,7 +349,7 @@ case "$SYSTEM" in DCORES=`sysctl -n hw.ncpu` SYSTEM=darwin ;; - MINGW32*) + MINGW32*|MSYS_NT*) SYSTEM=mingw32 LDFLAGS="-mthreads $LDFLAGS" ;; @@ -482,6 +484,13 @@ case $PLATFORM in PLATFORM=aarch64 ENVIRONMENT=64 ;; + "s390x") + RTM_ENABLE="CK_MD_RTM_DISABLE" + LSE_ENABLE="CK_MD_LSE_DISABLE" + MM="${MM:-"CK_MD_RMO"}" + PLATFORM=s390x + ENVIRONMENT=64 + ;; *) RTM_ENABLE="CK_MD_RTM_DISABLE" LSE_ENABLE="CK_MD_LSE_DISABLE" @@ -561,9 +570,11 @@ else fi printf "Finding suitable compiler........" -CC=`pathsearch "${CC:-cc}"` -if test -z "$CC" -o ! -x "$CC"; then - CC=`pathsearch "${CC:-gcc}"` +if test ! -x "${CC}"; then + CC=`pathsearch "${CC:-cc}"` + if test -z "$CC" -o ! -x "$CC"; then + CC=`pathsearch "${CC:-gcc}"` + fi fi assert "$CC" "not found" @@ -596,7 +607,7 @@ int main(void) { EOF $CC -o .1 .1.c -COMPILER=`./.1` +COMPILER=`./.1 2> /dev/null` r=$? rm -f .1.c .1 diff --git a/third_party/concurrency_kit/ck/include/ck_epoch.h b/third_party/concurrency_kit/ck/include/ck_epoch.h index e7ce5bc..9e166e5 100644 --- a/third_party/concurrency_kit/ck/include/ck_epoch.h +++ b/third_party/concurrency_kit/ck/include/ck_epoch.h @@ -83,6 +83,7 @@ struct ck_epoch_ref { }; struct ck_epoch_record { + ck_stack_entry_t record_next; struct ck_epoch *global; unsigned int state; unsigned int epoch; @@ -92,17 +93,16 @@ struct ck_epoch_record { } local CK_CC_CACHELINE; unsigned int n_pending; unsigned int n_peak; - unsigned long n_dispatch; + unsigned int n_dispatch; + void *ct; ck_stack_t pending[CK_EPOCH_LENGTH]; - ck_stack_entry_t record_next; } CK_CC_CACHELINE; typedef struct ck_epoch_record ck_epoch_record_t; struct ck_epoch { unsigned int epoch; - char pad[CK_MD_CACHELINE - sizeof(unsigned int)]; - ck_stack_t records; unsigned int n_free; + ck_stack_t records; }; typedef struct ck_epoch ck_epoch_t; @@ -110,7 +110,14 @@ typedef struct ck_epoch ck_epoch_t; * Internal functions. */ void _ck_epoch_addref(ck_epoch_record_t *, ck_epoch_section_t *); -void _ck_epoch_delref(ck_epoch_record_t *, ck_epoch_section_t *); +bool _ck_epoch_delref(ck_epoch_record_t *, ck_epoch_section_t *); + +CK_CC_FORCE_INLINE static void * +ck_epoch_record_ct(const ck_epoch_record_t *record) +{ + + return ck_pr_load_ptr(&record->ct); +} /* * Marks the beginning of an epoch-protected section. @@ -160,9 +167,10 @@ ck_epoch_begin(ck_epoch_record_t *record, ck_epoch_section_t *section) } /* - * Marks the end of an epoch-protected section. + * Marks the end of an epoch-protected section. Returns true if no more + * sections exist for the caller. */ -CK_CC_FORCE_INLINE static void +CK_CC_FORCE_INLINE static bool ck_epoch_end(ck_epoch_record_t *record, ck_epoch_section_t *section) { @@ -170,15 +178,19 @@ ck_epoch_end(ck_epoch_record_t *record, ck_epoch_section_t *section) ck_pr_store_uint(&record->active, record->active - 1); if (section != NULL) - _ck_epoch_delref(record, section); + return _ck_epoch_delref(record, section); - return; + return record->active == 0; } /* * Defers the execution of the function pointed to by the "cb" * argument until an epoch counter loop. This allows for a * non-blocking deferral. + * + * We can get away without a fence here due to the monotonic nature + * of the epoch counter. Worst case, this will result in some delays + * before object destruction. */ CK_CC_FORCE_INLINE static void ck_epoch_call(ck_epoch_record_t *record, @@ -195,13 +207,74 @@ ck_epoch_call(ck_epoch_record_t *record, return; } +/* + * Same as ck_epoch_call, but allows for records to be shared and is reentrant. + */ +CK_CC_FORCE_INLINE static void +ck_epoch_call_strict(ck_epoch_record_t *record, + ck_epoch_entry_t *entry, + ck_epoch_cb_t *function) +{ + struct ck_epoch *epoch = record->global; + unsigned int e = ck_pr_load_uint(&epoch->epoch); + unsigned int offset = e & (CK_EPOCH_LENGTH - 1); + + ck_pr_inc_uint(&record->n_pending); + entry->function = function; + + /* Store fence is implied by push operation. */ + ck_stack_push_upmc(&record->pending[offset], &entry->stack_entry); + return; +} + +/* + * This callback is used for synchronize_wait to allow for custom blocking + * behavior. + */ +typedef void ck_epoch_wait_cb_t(ck_epoch_t *, ck_epoch_record_t *, + void *); + +/* + * Return latest epoch value. This operation provides load ordering. + */ +CK_CC_FORCE_INLINE static unsigned int +ck_epoch_value(const ck_epoch_t *ep) +{ + + ck_pr_fence_load(); + return ck_pr_load_uint(&ep->epoch); +} + void ck_epoch_init(ck_epoch_t *); -ck_epoch_record_t *ck_epoch_recycle(ck_epoch_t *); -void ck_epoch_register(ck_epoch_t *, ck_epoch_record_t *); + +/* + * Attempts to recycle an unused epoch record. If one is successfully + * allocated, the record context pointer is also updated. + */ +ck_epoch_record_t *ck_epoch_recycle(ck_epoch_t *, void *); + +/* + * Registers an epoch record. An optional context pointer may be passed that + * is retrievable with ck_epoch_record_ct. + */ +void ck_epoch_register(ck_epoch_t *, ck_epoch_record_t *, void *); + +/* + * Marks a record as available for re-use by a subsequent recycle operation. + * Note that the record cannot be physically destroyed. + */ void ck_epoch_unregister(ck_epoch_record_t *); + bool ck_epoch_poll(ck_epoch_record_t *); void ck_epoch_synchronize(ck_epoch_record_t *); +void ck_epoch_synchronize_wait(ck_epoch_t *, ck_epoch_wait_cb_t *, void *); void ck_epoch_barrier(ck_epoch_record_t *); +void ck_epoch_barrier_wait(ck_epoch_record_t *, ck_epoch_wait_cb_t *, void *); + +/* + * Reclaim entries associated with a record. This is safe to call only on + * the caller's record or records that are using call_strict. + */ void ck_epoch_reclaim(ck_epoch_record_t *); #endif /* CK_EPOCH_H */ diff --git a/third_party/concurrency_kit/ck/include/ck_pr.h b/third_party/concurrency_kit/ck/include/ck_pr.h index 9b7fc42..4fdbdff 100644 --- a/third_party/concurrency_kit/ck/include/ck_pr.h +++ b/third_party/concurrency_kit/ck/include/ck_pr.h @@ -43,6 +43,8 @@ #include "gcc/sparcv9/ck_pr.h" #elif defined(__ppc64__) #include "gcc/ppc64/ck_pr.h" +#elif defined(__s390x__) +#include "gcc/s390x/ck_pr.h" #elif defined(__ppc__) #include "gcc/ppc/ck_pr.h" #elif defined(__arm__) diff --git a/third_party/concurrency_kit/ck/include/gcc/s390x/ck_f_pr.h b/third_party/concurrency_kit/ck/include/gcc/s390x/ck_f_pr.h new file mode 100644 index 0000000..cd54a28 --- /dev/null +++ b/third_party/concurrency_kit/ck/include/gcc/s390x/ck_f_pr.h @@ -0,0 +1,97 @@ +/* DO NOT EDIT. This is auto-generated from feature.sh */ +#define CK_F_PR_ADD_32 +#define CK_F_PR_ADD_64 +#define CK_F_PR_ADD_INT +#define CK_F_PR_ADD_PTR +#define CK_F_PR_ADD_UINT +#define CK_F_PR_AND_32 +#define CK_F_PR_AND_64 +#define CK_F_PR_AND_INT +#define CK_F_PR_AND_PTR +#define CK_F_PR_AND_UINT +#define CK_F_PR_CAS_32 +#define CK_F_PR_CAS_32_VALUE +#define CK_F_PR_CAS_64 +#define CK_F_PR_CAS_64_VALUE +#define CK_F_PR_CAS_INT +#define CK_F_PR_CAS_INT_VALUE +#define CK_F_PR_CAS_PTR +#define CK_F_PR_CAS_PTR_VALUE +#define CK_F_PR_CAS_UINT +#define CK_F_PR_CAS_UINT_VALUE +#define CK_F_PR_DEC_32 +#define CK_F_PR_DEC_64 +#define CK_F_PR_DEC_INT +#define CK_F_PR_DEC_PTR +#define CK_F_PR_DEC_UINT +#define CK_F_PR_FAA_32 +#define CK_F_PR_FAA_64 +#define CK_F_PR_FAA_INT +#define CK_F_PR_FAA_PTR +#define CK_F_PR_FAA_UINT +#define CK_F_PR_FAS_32 +#define CK_F_PR_FAS_64 +#define CK_F_PR_FAS_INT +#define CK_F_PR_FAS_PTR +#define CK_F_PR_FAS_UINT +#define CK_F_PR_FAS_DOUBLE +#define CK_F_PR_FENCE_LOAD +#define CK_F_PR_FENCE_LOAD_DEPENDS +#define CK_F_PR_FENCE_MEMORY +#define CK_F_PR_FENCE_STORE +#define CK_F_PR_FENCE_STRICT_LOAD +#define CK_F_PR_FENCE_STRICT_LOAD_DEPENDS +#define CK_F_PR_FENCE_STRICT_MEMORY +#define CK_F_PR_FENCE_STRICT_STORE +#define CK_F_PR_INC_32 +#define CK_F_PR_INC_64 +#define CK_F_PR_INC_INT +#define CK_F_PR_INC_PTR +#define CK_F_PR_INC_UINT +#define CK_F_PR_LOAD_16 +#define CK_F_PR_LOAD_32 +#define CK_F_PR_LOAD_64 +#define CK_F_PR_LOAD_8 +#define CK_F_PR_LOAD_CHAR +#define CK_F_PR_LOAD_DOUBLE +#define CK_F_PR_LOAD_INT +#define CK_F_PR_LOAD_PTR +#define CK_F_PR_LOAD_SHORT +#define CK_F_PR_LOAD_UINT +#define CK_F_PR_NEG_32 +#define CK_F_PR_NEG_64 +#define CK_F_PR_NEG_INT +#define CK_F_PR_NEG_PTR +#define CK_F_PR_NEG_UINT +#define CK_F_PR_NOT_32 +#define CK_F_PR_NOT_64 +#define CK_F_PR_NOT_INT +#define CK_F_PR_NOT_PTR +#define CK_F_PR_NOT_UINT +#define CK_F_PR_OR_32 +#define CK_F_PR_OR_64 +#define CK_F_PR_OR_INT +#define CK_F_PR_OR_PTR +#define CK_F_PR_OR_UINT +#define CK_F_PR_STALL +#define CK_F_PR_STORE_16 +#define CK_F_PR_STORE_32 +#define CK_F_PR_STORE_64 +#define CK_F_PR_STORE_8 +#define CK_F_PR_STORE_CHAR +#define CK_F_PR_STORE_DOUBLE +#define CK_F_PR_STORE_INT +#define CK_F_PR_STORE_PTR +#define CK_F_PR_STORE_SHORT +#define CK_F_PR_STORE_UINT +#define CK_F_PR_SUB_32 +#define CK_F_PR_SUB_64 +#define CK_F_PR_SUB_INT +#define CK_F_PR_SUB_PTR +#define CK_F_PR_SUB_UINT +#define CK_F_PR_XOR_32 +#define CK_F_PR_XOR_64 +#define CK_F_PR_XOR_INT +#define CK_F_PR_XOR_PTR +#define CK_F_PR_XOR_UINT + diff --git a/third_party/concurrency_kit/ck/include/gcc/s390x/ck_pr.h b/third_party/concurrency_kit/ck/include/gcc/s390x/ck_pr.h new file mode 100644 index 0000000..8ad22b2 --- /dev/null +++ b/third_party/concurrency_kit/ck/include/gcc/s390x/ck_pr.h @@ -0,0 +1,373 @@ +/* + * Copyright 2009-2015 Samy Al Bahra. + * Copyright 2017 Neale Ferguson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef CK_PR_S390X_H +#define CK_PR_S390X_H + +#ifndef CK_PR_H +#error Do not include this file directly, use ck_pr.h +#endif + +#include +#include + +/* + * The following represent supported atomic operations. + * These operations may be emulated. + */ +#include "ck_f_pr.h" + +/* + * Minimum interface requirement met. + */ +#define CK_F_PR + +/* + * This bounces the hardware thread from low to medium + * priority. I am unsure of the benefits of this approach + * but it is used by the Linux kernel. + */ +CK_CC_INLINE static void +ck_pr_stall(void) +{ + __sync_synchronize(); + return; +} + +#define CK_PR_FENCE(T) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __sync_synchronize(); \ + } + +/* + * These are derived from: + * http://www.ibm.com/developerworks/systems/articles/powerpc.html + */ +CK_PR_FENCE(atomic) +CK_PR_FENCE(atomic_store) +CK_PR_FENCE(atomic_load) +CK_PR_FENCE(store_atomic) +CK_PR_FENCE(load_atomic) +CK_PR_FENCE(store) +CK_PR_FENCE(store_load) +CK_PR_FENCE(load) +CK_PR_FENCE(load_store) +CK_PR_FENCE(memory) +CK_PR_FENCE(acquire) +CK_PR_FENCE(release) +CK_PR_FENCE(acqrel) +CK_PR_FENCE(lock) +CK_PR_FENCE(unlock) + +#undef CK_PR_FENCE + +#define CK_PR_LOAD(S, M, T, C, I) \ + CK_CC_INLINE static T \ + ck_pr_md_load_##S(const M *target) \ + { \ + T r; \ + __asm__ __volatile__(I "\t%0, %1\n" \ + : "=r" (r) \ + : "Q" (*(const C *)target) \ + : "memory"); \ + return (r); \ + } + +CK_PR_LOAD(ptr, void, void *, uint64_t, "lg") + +#define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) + +CK_PR_LOAD_S(64, uint64_t, "lg") +CK_PR_LOAD_S(32, uint32_t, "llgf") +CK_PR_LOAD_S(16, uint16_t, "llgh") +CK_PR_LOAD_S(8, uint8_t, "llgc") +CK_PR_LOAD_S(uint, unsigned int, "llgf") +CK_PR_LOAD_S(int, int, "llgf") +CK_PR_LOAD_S(short, short, "lgh") +CK_PR_LOAD_S(char, char, "lgb") +#ifndef CK_PR_DISABLE_DOUBLE +CK_CC_INLINE static double +ck_pr_md_load_double(const double *target) +{ + double r; + __asm__ __volatile__("ld %0, %1\n" + : "=f" (r) + : "Q" (*(const double *)target) + : "memory"); + return (r); +} +#endif + +#undef CK_PR_LOAD_S +#undef CK_PR_LOAD + +#define CK_PR_STORE(S, M, T, C, I) \ + CK_CC_INLINE static void \ + ck_pr_md_store_##S(M *target, T v) \ + { \ + __asm__ __volatile__(I "\t%1, %0\n" \ + : "=Q" (*(C *)target) \ + : "r" (v) \ + : "memory"); \ + return; \ + } + +CK_PR_STORE(ptr, void, const void *, uint64_t, "stg") + +#define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I) + +CK_PR_STORE_S(64, uint64_t, "stg") +CK_PR_STORE_S(32, uint32_t, "st") +CK_PR_STORE_S(16, uint16_t, "sth") +CK_PR_STORE_S(8, uint8_t, "stc") +CK_PR_STORE_S(uint, unsigned int, "st") +CK_PR_STORE_S(int, int, "st") +CK_PR_STORE_S(short, short, "sth") +CK_PR_STORE_S(char, char, "stc") +#ifndef CK_PR_DISABLE_DOUBLE +CK_CC_INLINE static void +ck_pr_md_store_double(double *target, double v) +{ + __asm__ __volatile__(" std %1, %0\n" + : "=Q" (*(double *)target) + : "f" (v) + : "0", "memory"); +} +#endif + +#undef CK_PR_STORE_S +#undef CK_PR_STORE + +CK_CC_INLINE static bool +ck_pr_cas_64_value(uint64_t *target, uint64_t compare, uint64_t set, uint64_t *value) +{ + *value = __sync_val_compare_and_swap(target,compare,set); + return (*value == compare); +} + +CK_CC_INLINE static bool +ck_pr_cas_ptr_value(void *target, void *compare, void *set, void *value) +{ + uintptr_t previous; + + previous = __sync_val_compare_and_swap((uintptr_t *) target, + (uintptr_t) compare, + (uintptr_t) set); + *((uintptr_t *) value) = previous; + return (previous == (uintptr_t) compare); +} + +CK_CC_INLINE static bool +ck_pr_cas_64(uint64_t *target, uint64_t compare, uint64_t set) +{ + return(__sync_bool_compare_and_swap(target,compare,set)); +} + +CK_CC_INLINE static bool +ck_pr_cas_ptr(void *target, void *compare, void *set) +{ + return(__sync_bool_compare_and_swap((uintptr_t *) target, + (uintptr_t) compare, + (uintptr_t) set)); +} + +#define CK_PR_CAS(N, T) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##N##_value(T *target, T compare, T set, T *value) \ + { \ + *value = __sync_val_compare_and_swap(target, \ + compare, \ + set); \ + return(*value == compare); \ + } \ + CK_CC_INLINE static bool \ + ck_pr_cas_##N(T *target, T compare, T set) \ + { \ + return(__sync_bool_compare_and_swap(target, \ + compare, \ + set)); \ + } + +CK_PR_CAS(32, uint32_t) +CK_PR_CAS(uint, unsigned int) +CK_PR_CAS(int, int) + +#undef CK_PR_CAS + +CK_CC_INLINE static void * +ck_pr_fas_ptr(void *target, void *v) +{ + return((void *)__atomic_exchange_n((uintptr_t *) target, (uintptr_t) v, __ATOMIC_ACQUIRE)); +} + +#define CK_PR_FAS(N, M, T) \ + CK_CC_INLINE static T \ + ck_pr_fas_##N(M *target, T v) \ + { \ + return(__atomic_exchange_n(target, v, __ATOMIC_ACQUIRE)); \ + } + +CK_PR_FAS(64, uint64_t, uint64_t) +CK_PR_FAS(32, uint32_t, uint32_t) +CK_PR_FAS(int, int, int) +CK_PR_FAS(uint, unsigned int, unsigned int) + +#ifndef CK_PR_DISABLE_DOUBLE +CK_CC_INLINE static double +ck_pr_fas_double(double *target, double *v) +{ + double previous; + + __asm__ __volatile__ (" lg 1,%2\n" + "0: lg 0,%1\n" + " csg 0,1,%1\n" + " jnz 0b\n" + " ldgr %0,0\n" + : "=f" (previous) + : "Q" (target), "Q" (v) + : "0", "1", "cc", "memory"); + return (previous); +} +#endif + +#undef CK_PR_FAS + +/* + * Atomic store-only binary operations. + */ +#define CK_PR_BINARY(K, S, M, T) \ + CK_CC_INLINE static void \ + ck_pr_##K##_##S(M *target, T d) \ + { \ + d = __sync_fetch_and_##K((T *)target, d); \ + return; \ + } + +#define CK_PR_BINARY_S(K, S, T) CK_PR_BINARY(K, S, T, T) + +#define CK_PR_GENERATE(K) \ + CK_PR_BINARY(K, ptr, void, void *) \ + CK_PR_BINARY_S(K, char, char) \ + CK_PR_BINARY_S(K, int, int) \ + CK_PR_BINARY_S(K, uint, unsigned int) \ + CK_PR_BINARY_S(K, 64, uint64_t) \ + CK_PR_BINARY_S(K, 32, uint32_t) \ + CK_PR_BINARY_S(K, 16, uint16_t) \ + CK_PR_BINARY_S(K, 8, uint8_t) + +CK_PR_GENERATE(add) +CK_PR_GENERATE(sub) +CK_PR_GENERATE(and) +CK_PR_GENERATE(or) +CK_PR_GENERATE(xor) + +#undef CK_PR_GENERATE +#undef CK_PR_BINARY_S +#undef CK_PR_BINARY + +#define CK_PR_UNARY(S, M, T) \ + CK_CC_INLINE static void \ + ck_pr_inc_##S(M *target) \ + { \ + ck_pr_add_##S(target, (T)1); \ + return; \ + } \ + CK_CC_INLINE static void \ + ck_pr_dec_##S(M *target) \ + { \ + ck_pr_sub_##S(target, (T)1); \ + return; \ + } + +#define CK_PR_UNARY_X(S, M) \ + CK_CC_INLINE static void \ + ck_pr_not_##S(M *target) \ + { \ + M newval; \ + do { \ + newval = ~(*target); \ + } while (!__sync_bool_compare_and_swap(target, \ + *target, \ + newval)); \ + } \ + CK_CC_INLINE static void \ + ck_pr_neg_##S(M *target) \ + { \ + M newval; \ + do { \ + newval = -(*target); \ + } while (!__sync_bool_compare_and_swap(target, \ + *target, \ + newval)); \ + } + +#define CK_PR_UNARY_S(S, M) CK_PR_UNARY(S, M, M) \ + CK_PR_UNARY_X(S, M) + +CK_PR_UNARY(ptr, void, void *) +CK_PR_UNARY_S(char, char) +CK_PR_UNARY_S(int, int) +CK_PR_UNARY_S(uint, unsigned int) +CK_PR_UNARY_S(64, uint64_t) +CK_PR_UNARY_S(32, uint32_t) +CK_PR_UNARY_S(16, uint16_t) +CK_PR_UNARY_S(8, uint8_t) + +#undef CK_PR_UNARY_S +#undef CK_PR_UNARY + +CK_CC_INLINE static void * +ck_pr_faa_ptr(void *target, uintptr_t delta) +{ + uintptr_t previous; + + previous = __sync_fetch_and_add((uintptr_t *) target, delta); + + return (void *)(previous); +} + +#define CK_PR_FAA(S, T) \ + CK_CC_INLINE static T \ + ck_pr_faa_##S(T *target, T delta) \ + { \ + T previous; \ + \ + previous = __sync_fetch_and_add(target, delta); \ + \ + return (previous); \ + } + +CK_PR_FAA(64, uint64_t) +CK_PR_FAA(32, uint32_t) +CK_PR_FAA(uint, unsigned int) +CK_PR_FAA(int, int) + +#undef CK_PR_FAA + +#endif /* CK_PR_S390X_H */ diff --git a/third_party/concurrency_kit/ck/include/spinlock/dec.h b/third_party/concurrency_kit/ck/include/spinlock/dec.h index 11d36dd..3e36bf7 100644 --- a/third_party/concurrency_kit/ck/include/spinlock/dec.h +++ b/third_party/concurrency_kit/ck/include/spinlock/dec.h @@ -111,7 +111,8 @@ ck_spinlock_dec_lock_eb(struct ck_spinlock_dec *lock) if (r == true) break; - ck_backoff_eb(&backoff); + while (ck_pr_load_uint(&lock->value) != 1) + ck_backoff_eb(&backoff); } ck_pr_fence_lock(); diff --git a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_call.c b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_call.c index 29e0df8..1c274e0 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_call.c +++ b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_call.c @@ -37,6 +37,7 @@ static void cb(ck_epoch_entry_t *p) { + /* Test that we can reregister the callback. */ if (counter == 0) ck_epoch_call(&record[1], p, cb); @@ -50,15 +51,22 @@ int main(void) { ck_epoch_entry_t entry; + ck_epoch_entry_t another; - ck_epoch_register(&epoch, &record[0]); - ck_epoch_register(&epoch, &record[1]); + ck_epoch_register(&epoch, &record[0], NULL); + ck_epoch_register(&epoch, &record[1], NULL); ck_epoch_call(&record[1], &entry, cb); ck_epoch_barrier(&record[1]); ck_epoch_barrier(&record[1]); - if (counter != 2) - ck_error("Expected counter value 2, read %u.\n", counter); + + /* Make sure that strict works. */ + ck_epoch_call_strict(&record[1], &entry, cb); + ck_epoch_call_strict(&record[1], &another, cb); + ck_epoch_barrier(&record[1]); + + if (counter != 4) + ck_error("Expected counter value 4, read %u.\n", counter); return 0; } diff --git a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_poll.c b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_poll.c index aec6dd0..4e8769b 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_poll.c +++ b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_poll.c @@ -89,7 +89,7 @@ read_thread(void *unused CK_CC_UNUSED) ck_epoch_record_t record CK_CC_CACHELINE; ck_stack_entry_t *cursor, *n; - ck_epoch_register(&stack_epoch, &record); + ck_epoch_register(&stack_epoch, &record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -141,7 +141,7 @@ write_thread(void *unused CK_CC_UNUSED) ck_epoch_record_t record; ck_stack_entry_t *s; - ck_epoch_register(&stack_epoch, &record); + ck_epoch_register(&stack_epoch, &record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -191,7 +191,7 @@ write_thread(void *unused CK_CC_UNUSED) ck_epoch_barrier(&record); if (tid == 0) { - fprintf(stderr, "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[W] Peak: %u (%2.2f%%)\n Reclamations: %lu\n\n", + fprintf(stderr, "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[W] Peak: %u (%2.2f%%)\n Reclamations: %u\n\n", record.n_peak, (double)record.n_peak / ((double)PAIRS_S * ITERATE_S) * 100, record.n_dispatch); diff --git a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_section.c b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_section.c index 12bcca1..7b76d1c 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_section.c +++ b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_section.c @@ -46,8 +46,8 @@ setup_test(void) { ck_epoch_init(&epc); - ck_epoch_register(&epc, &record); - ck_epoch_register(&epc, &record2); + ck_epoch_register(&epc, &record, NULL); + ck_epoch_register(&epc, &record2, NULL); cleanup_calls = 0; return; @@ -88,7 +88,8 @@ test_simple_read_section(void) ck_epoch_begin(&record, §ion); ck_epoch_call(&record, &entry, cleanup); assert(cleanup_calls == 0); - ck_epoch_end(&record, §ion); + if (ck_epoch_end(&record, §ion) == false) + ck_error("expected no more sections"); ck_epoch_barrier(&record); assert(cleanup_calls == 1); @@ -157,7 +158,7 @@ reader_work(void *arg) ck_epoch_section_t section; struct obj *o; - ck_epoch_register(&epc, &local_record); + ck_epoch_register(&epc, &local_record, NULL); o = (struct obj *)arg; diff --git a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_section_2.c b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_section_2.c index aed3661..daf6738 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_section_2.c +++ b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_section_2.c @@ -64,7 +64,7 @@ read_thread(void *unused CK_CC_UNUSED) record = malloc(sizeof *record); assert(record != NULL); - ck_epoch_register(&epoch, record); + ck_epoch_register(&epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -133,7 +133,7 @@ write_thread(void *unused CK_CC_UNUSED) ck_epoch_record_t record; unsigned long iterations = 0; - ck_epoch_register(&epoch, &record); + ck_epoch_register(&epoch, &record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); diff --git a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_synchronize.c b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_synchronize.c index a03a4f7..c278334 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_synchronize.c +++ b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_epoch_synchronize.c @@ -91,7 +91,7 @@ read_thread(void *unused CK_CC_UNUSED) ck_stack_entry_t *n; unsigned int i; - ck_epoch_register(&stack_epoch, &record); + ck_epoch_register(&stack_epoch, &record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -148,7 +148,7 @@ write_thread(void *unused CK_CC_UNUSED) ck_epoch_record_t record; ck_stack_entry_t *s; - ck_epoch_register(&stack_epoch, &record); + ck_epoch_register(&stack_epoch, &record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -204,7 +204,7 @@ write_thread(void *unused CK_CC_UNUSED) ck_epoch_synchronize(&record); if (tid == 0) { - fprintf(stderr, "[W] Peak: %u (%2.2f%%)\n Reclamations: %lu\n\n", + fprintf(stderr, "[W] Peak: %u (%2.2f%%)\n Reclamations: %u\n\n", record.n_peak, (double)record.n_peak / ((double)PAIRS_S * ITERATE_S) * 100, record.n_dispatch); diff --git a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_stack.c b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_stack.c index fc50228..6d493e1 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_stack.c +++ b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/ck_stack.c @@ -81,7 +81,7 @@ thread(void *unused CK_CC_UNUSED) unsigned long smr = 0; unsigned int i; - ck_epoch_register(&stack_epoch, &record); + ck_epoch_register(&stack_epoch, &record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -118,7 +118,7 @@ thread(void *unused CK_CC_UNUSED) while (ck_pr_load_uint(&e_barrier) < n_threads); fprintf(stderr, "Deferrals: %lu (%2.2f)\n", smr, (double)smr / PAIRS); - fprintf(stderr, "Peak: %u (%2.2f%%), %u pending\nReclamations: %lu\n\n", + fprintf(stderr, "Peak: %u (%2.2f%%), %u pending\nReclamations: %u\n\n", record.n_peak, (double)record.n_peak / PAIRS * 100, record.n_pending, diff --git a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/torture.c b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/torture.c index ce3c049..f49d412 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/torture.c +++ b/third_party/concurrency_kit/ck/regressions/ck_epoch/validate/torture.c @@ -31,8 +31,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -119,7 +119,7 @@ read_thread(void *unused CK_CC_UNUSED) record = malloc(sizeof *record); assert(record != NULL); - ck_epoch_register(&epoch, record); + ck_epoch_register(&epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -147,10 +147,11 @@ write_thread(void *unused CK_CC_UNUSED) ck_epoch_record_t *record; unsigned long iterations = 0; bool c = ck_pr_faa_uint(&first, 1); + uint64_t ac = 0; record = malloc(sizeof *record); assert(record != NULL); - ck_epoch_register(&epoch, record); + ck_epoch_register(&epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -160,6 +161,12 @@ write_thread(void *unused CK_CC_UNUSED) ck_pr_inc_uint(&barrier); while (ck_pr_load_uint(&barrier) < n_threads); +#define CK_EPOCH_S do { \ + uint64_t _s = rdtsc(); \ + ck_epoch_synchronize(record); \ + ac += rdtsc() - _s; \ +} while (0) + do { /* * A thread should never observe invalid.value > valid.value. @@ -167,33 +174,34 @@ write_thread(void *unused CK_CC_UNUSED) * invalid.value <= valid.value is valid. */ if (!c) ck_pr_store_uint(&valid.value, 1); - ck_epoch_synchronize(record); + CK_EPOCH_S; if (!c) ck_pr_store_uint(&invalid.value, 1); ck_pr_fence_store(); if (!c) ck_pr_store_uint(&valid.value, 2); - ck_epoch_synchronize(record); + CK_EPOCH_S; if (!c) ck_pr_store_uint(&invalid.value, 2); ck_pr_fence_store(); if (!c) ck_pr_store_uint(&valid.value, 3); - ck_epoch_synchronize(record); + CK_EPOCH_S; if (!c) ck_pr_store_uint(&invalid.value, 3); ck_pr_fence_store(); if (!c) ck_pr_store_uint(&valid.value, 4); - ck_epoch_synchronize(record); + CK_EPOCH_S; if (!c) ck_pr_store_uint(&invalid.value, 4); - ck_epoch_synchronize(record); + CK_EPOCH_S; if (!c) ck_pr_store_uint(&invalid.value, 0); - ck_epoch_synchronize(record); + CK_EPOCH_S; - iterations += 4; + iterations += 6; } while (ck_pr_load_uint(&leave) == 0 && ck_pr_load_uint(&n_rd) > 0); fprintf(stderr, "%lu iterations\n", iterations); + fprintf(stderr, "%" PRIu64 " average latency\n", ac / iterations); return NULL; } diff --git a/third_party/concurrency_kit/ck/regressions/ck_hs/benchmark/parallel_bytestring.c b/third_party/concurrency_kit/ck/regressions/ck_hs/benchmark/parallel_bytestring.c index 6d38379..3c36851 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_hs/benchmark/parallel_bytestring.c +++ b/third_party/concurrency_kit/ck/regressions/ck_hs/benchmark/parallel_bytestring.c @@ -147,7 +147,7 @@ set_init(void) #endif ck_epoch_init(&epoch_hs); - ck_epoch_register(&epoch_hs, &epoch_wr); + ck_epoch_register(&epoch_hs, &epoch_wr, NULL); common_srand48((long int)time(NULL)); if (ck_hs_init(&hs, mode, hs_hash, hs_compare, &my_allocator, 65536, common_lrand48()) == false) { perror("ck_hs_init"); @@ -234,7 +234,7 @@ reader(void *unused) perror("WARNING: Failed to affine thread"); s = j = a = 0; - ck_epoch_register(&epoch_hs, &epoch_record); + ck_epoch_register(&epoch_hs, &epoch_record, NULL); for (;;) { j++; ck_epoch_begin(&epoch_record, NULL); @@ -454,8 +454,8 @@ main(int argc, char *argv[]) ck_epoch_record_t epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); @@ -593,8 +593,8 @@ main(int argc, char *argv[]) epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); return 0; diff --git a/third_party/concurrency_kit/ck/regressions/ck_ht/benchmark/parallel_bytestring.c b/third_party/concurrency_kit/ck/regressions/ck_ht/benchmark/parallel_bytestring.c index f3d3854..bb8f462 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_ht/benchmark/parallel_bytestring.c +++ b/third_party/concurrency_kit/ck/regressions/ck_ht/benchmark/parallel_bytestring.c @@ -132,7 +132,7 @@ table_init(void) #endif ck_epoch_init(&epoch_ht); - ck_epoch_register(&epoch_ht, &epoch_wr); + ck_epoch_register(&epoch_ht, &epoch_wr, NULL); common_srand48((long int)time(NULL)); if (ck_ht_init(&ht, mode, NULL, &my_allocator, 8, common_lrand48()) == false) { perror("ck_ht_init"); @@ -221,7 +221,7 @@ reader(void *unused) perror("WARNING: Failed to affine thread"); s = j = a = 0; - ck_epoch_register(&epoch_ht, &epoch_record); + ck_epoch_register(&epoch_ht, &epoch_record, NULL); for (;;) { j++; ck_epoch_begin(&epoch_record, NULL); @@ -426,8 +426,8 @@ main(int argc, char *argv[]) ck_epoch_record_t epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); @@ -551,8 +551,8 @@ main(int argc, char *argv[]) epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); return 0; diff --git a/third_party/concurrency_kit/ck/regressions/ck_ht/benchmark/parallel_direct.c b/third_party/concurrency_kit/ck/regressions/ck_ht/benchmark/parallel_direct.c index 195bb25..de1d12e 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_ht/benchmark/parallel_direct.c +++ b/third_party/concurrency_kit/ck/regressions/ck_ht/benchmark/parallel_direct.c @@ -136,7 +136,7 @@ table_init(void) { ck_epoch_init(&epoch_ht); - ck_epoch_register(&epoch_ht, &epoch_wr); + ck_epoch_register(&epoch_ht, &epoch_wr, NULL); common_srand48((long int)time(NULL)); if (ck_ht_init(&ht, CK_HT_MODE_DIRECT, hash_function, &my_allocator, 8, common_lrand48()) == false) { perror("ck_ht_init"); @@ -221,7 +221,7 @@ ht_reader(void *unused) perror("WARNING: Failed to affine thread"); s = j = a = 0; - ck_epoch_register(&epoch_ht, &epoch_record); + ck_epoch_register(&epoch_ht, &epoch_record, NULL); for (;;) { j++; ck_epoch_begin(&epoch_record, NULL); @@ -412,8 +412,8 @@ main(int argc, char *argv[]) ck_epoch_record_t epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); @@ -537,8 +537,8 @@ main(int argc, char *argv[]) epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); return 0; diff --git a/third_party/concurrency_kit/ck/regressions/ck_rhs/benchmark/parallel_bytestring.c b/third_party/concurrency_kit/ck/regressions/ck_rhs/benchmark/parallel_bytestring.c index a95d940..017222d 100644 --- a/third_party/concurrency_kit/ck/regressions/ck_rhs/benchmark/parallel_bytestring.c +++ b/third_party/concurrency_kit/ck/regressions/ck_rhs/benchmark/parallel_bytestring.c @@ -144,7 +144,7 @@ set_init(void) ck_epoch_init(&epoch_hs); - ck_epoch_register(&epoch_hs, &epoch_wr); + ck_epoch_register(&epoch_hs, &epoch_wr, NULL); common_srand48((long int)time(NULL)); if (ck_rhs_init(&hs, mode, hs_hash, hs_compare, &my_allocator, 65536, common_lrand48()) == false) { perror("ck_rhs_init"); @@ -231,7 +231,7 @@ reader(void *unused) perror("WARNING: Failed to affine thread"); s = j = a = 0; - ck_epoch_register(&epoch_hs, &epoch_record); + ck_epoch_register(&epoch_hs, &epoch_record, NULL); for (;;) { j++; ck_epoch_begin(&epoch_record, NULL); @@ -451,8 +451,8 @@ main(int argc, char *argv[]) ck_epoch_record_t epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); @@ -590,8 +590,8 @@ main(int argc, char *argv[]) epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); return 0; diff --git a/third_party/concurrency_kit/ck/src/ck_epoch.c b/third_party/concurrency_kit/ck/src/ck_epoch.c index a0e9180..a3273b4 100644 --- a/third_party/concurrency_kit/ck/src/ck_epoch.c +++ b/third_party/concurrency_kit/ck/src/ck_epoch.c @@ -139,7 +139,7 @@ CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry, #define CK_EPOCH_SENSE_MASK (CK_EPOCH_SENSE - 1) -void +bool _ck_epoch_delref(struct ck_epoch_record *record, struct ck_epoch_section *section) { @@ -150,7 +150,7 @@ _ck_epoch_delref(struct ck_epoch_record *record, current->count--; if (current->count > 0) - return; + return false; /* * If the current bucket no longer has any references, then @@ -161,8 +161,7 @@ _ck_epoch_delref(struct ck_epoch_record *record, * If no other active bucket exists, then the record will go * inactive in order to allow for forward progress. */ - other = &record->local.bucket[(i + 1) & - CK_EPOCH_SENSE_MASK]; + other = &record->local.bucket[(i + 1) & CK_EPOCH_SENSE_MASK]; if (other->count > 0 && ((int)(current->epoch - other->epoch) < 0)) { /* @@ -172,7 +171,7 @@ _ck_epoch_delref(struct ck_epoch_record *record, ck_pr_store_uint(&record->epoch, other->epoch); } - return; + return true; } void @@ -230,7 +229,7 @@ ck_epoch_init(struct ck_epoch *global) } struct ck_epoch_record * -ck_epoch_recycle(struct ck_epoch *global) +ck_epoch_recycle(struct ck_epoch *global, void *ct) { struct ck_epoch_record *record; ck_stack_entry_t *cursor; @@ -249,6 +248,12 @@ ck_epoch_recycle(struct ck_epoch *global) CK_EPOCH_STATE_USED); if (state == CK_EPOCH_STATE_FREE) { ck_pr_dec_uint(&global->n_free); + ck_pr_store_ptr(&record->ct, ct); + + /* + * The context pointer is ordered by a + * subsequent protected section. + */ return record; } } @@ -258,7 +263,8 @@ ck_epoch_recycle(struct ck_epoch *global) } void -ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record) +ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record, + void *ct) { size_t i; @@ -269,6 +275,7 @@ ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record) record->n_dispatch = 0; record->n_peak = 0; record->n_pending = 0; + record->ct = ct; memset(&record->local, 0, sizeof record->local); for (i = 0; i < CK_EPOCH_LENGTH; i++) @@ -295,6 +302,7 @@ ck_epoch_unregister(struct ck_epoch_record *record) for (i = 0; i < CK_EPOCH_LENGTH; i++) ck_stack_init(&record->pending[i]); + ck_pr_store_ptr(&record->ct, NULL); ck_pr_fence_store(); ck_pr_store_uint(&record->state, CK_EPOCH_STATE_FREE); ck_pr_inc_uint(&global->n_free); @@ -345,11 +353,10 @@ ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e) { unsigned int epoch = e & (CK_EPOCH_LENGTH - 1); ck_stack_entry_t *head, *next, *cursor; + unsigned int n_pending, n_peak; unsigned int i = 0; - head = CK_STACK_FIRST(&record->pending[epoch]); - ck_stack_init(&record->pending[epoch]); - + head = ck_stack_batch_pop_upmc(&record->pending[epoch]); for (cursor = head; cursor != NULL; cursor = next) { struct ck_epoch_entry *entry = ck_epoch_entry_container(cursor); @@ -359,11 +366,18 @@ ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e) i++; } - if (record->n_pending > record->n_peak) - record->n_peak = record->n_pending; + n_peak = ck_pr_load_uint(&record->n_peak); + n_pending = ck_pr_load_uint(&record->n_pending); + + /* We don't require accuracy around peak calculation. */ + if (n_pending > n_peak) + ck_pr_store_uint(&record->n_peak, n_peak); + + if (i > 0) { + ck_pr_add_uint(&record->n_dispatch, i); + ck_pr_sub_uint(&record->n_pending, i); + } - record->n_dispatch += i; - record->n_pending -= i; return; } @@ -381,13 +395,24 @@ ck_epoch_reclaim(struct ck_epoch_record *record) return; } +CK_CC_FORCE_INLINE static void +epoch_block(struct ck_epoch *global, struct ck_epoch_record *cr, + ck_epoch_wait_cb_t *cb, void *ct) +{ + + if (cb != NULL) + cb(global, cr, ct); + + return; +} + /* * This function must not be called with-in read section. */ void -ck_epoch_synchronize(struct ck_epoch_record *record) +ck_epoch_synchronize_wait(struct ck_epoch *global, + ck_epoch_wait_cb_t *cb, void *ct) { - struct ck_epoch *global = record->global; struct ck_epoch_record *cr; unsigned int delta, epoch, goal, i; bool active; @@ -424,10 +449,27 @@ ck_epoch_synchronize(struct ck_epoch_record *record) * period. */ e_d = ck_pr_load_uint(&global->epoch); - if (e_d != delta) { - delta = e_d; - goto reload; + if (e_d == delta) { + epoch_block(global, cr, cb, ct); + continue; } + + /* + * If the epoch has been updated, we may have already + * met our goal. + */ + delta = e_d; + if ((goal > epoch) & (delta >= goal)) + goto leave; + + epoch_block(global, cr, cb, ct); + + /* + * If the epoch has been updated, then a grace period + * requires that all threads are observed idle at the + * same epoch. + */ + cr = NULL; } /* @@ -459,20 +501,6 @@ ck_epoch_synchronize(struct ck_epoch_record *record) * Otherwise, we have just acquired latest snapshot. */ delta = delta + r; - continue; - -reload: - if ((goal > epoch) & (delta >= goal)) { - /* - * Right now, epoch overflow is handled as an edge - * case. If we have already observed an epoch - * generation, then we can be sure no hazardous - * references exist to objects from this generation. We - * can actually avoid an addtional scan step at this - * point. - */ - break; - } } /* @@ -480,8 +508,16 @@ reload: * However, if non-temporal instructions are used, full barrier * semantics are necessary. */ +leave: ck_pr_fence_memory(); - record->epoch = delta; + return; +} + +void +ck_epoch_synchronize(struct ck_epoch_record *record) +{ + + ck_epoch_synchronize_wait(record->global, NULL, NULL); return; } @@ -494,6 +530,16 @@ ck_epoch_barrier(struct ck_epoch_record *record) return; } +void +ck_epoch_barrier_wait(struct ck_epoch_record *record, ck_epoch_wait_cb_t *cb, + void *ct) +{ + + ck_epoch_synchronize_wait(record->global, cb, ct); + ck_epoch_reclaim(record); + return; +} + /* * It may be worth it to actually apply these deferral semantics to an epoch * that was observed at ck_epoch_call time. The problem is that the latter @@ -509,7 +555,6 @@ ck_epoch_poll(struct ck_epoch_record *record) { bool active; unsigned int epoch; - unsigned int snapshot; struct ck_epoch_record *cr = NULL; struct ck_epoch *global = record->global; @@ -533,12 +578,7 @@ ck_epoch_poll(struct ck_epoch_record *record) } /* If an active thread exists, rely on epoch observation. */ - if (ck_pr_cas_uint_value(&global->epoch, epoch, epoch + 1, - &snapshot) == false) { - record->epoch = snapshot; - } else { - record->epoch = epoch + 1; - } + (void)ck_pr_cas_uint(&global->epoch, epoch, epoch + 1); ck_epoch_dispatch(record, epoch + 1); return true; diff --git a/third_party/concurrency_kit/ck/src/ck_ht_hash.h b/third_party/concurrency_kit/ck/src/ck_ht_hash.h index cd3d7a5..e0767f9 100644 --- a/third_party/concurrency_kit/ck/src/ck_ht_hash.h +++ b/third_party/concurrency_kit/ck/src/ck_ht_hash.h @@ -88,7 +88,15 @@ static inline uint64_t rotl64 ( uint64_t x, int8_t r ) FORCE_INLINE static uint32_t getblock ( const uint32_t * p, int i ) { +#ifdef __s390x__ + uint32_t res; + + __asm__ (" lrv %0,%1\n" + : "=r" (res) : "Q" (p[i]) : "cc", "mem"); + return res; +#else return p[i]; +#endif /* !__s390x__ */ } //-----------------------------------------------------------------------------