TurboPFor: Benchmark app

2023-03-18 21:18:51 +01:00
parent 1280058904
commit 918adacd5d
1 changed files with 50 additions and 48 deletions
--- a/icapp.c
+++ b/icapp.c
@ -435,7 +435,7 @@ enum { T_0, T_UINT8, T_UINT16, T_UINT24, T_UINT32, T_UINT40, T_UINT48, T_UINT56,
  ctou64(in+n*isize) = u; n++;\
 }

-int    mdelta, elog2[8];
+int    mdelta, elog2[8],nsd=3;
 char   *keysep;
 float  errlimf[] = { 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001, 0.00000001 };
 double errlima[] = { 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001, 0.00000001 };
@ -501,7 +501,7 @@ unsigned befgen(unsigned char **_in, unsigned n, int fmt, int isize, FILE *fi, i
 		      int e = t - q - 1;   	//if(e < 0) e = 0; //{ printf("[%s,%d] ", p, e); die(" FATAL"); 			 }
 			  if(e >= 0) {
 		        float ed = errlimf[e], dd = d;
-		        d = _fppad32(d, ed, elog2[e]);    	  //if(fabs(d-dd) > DBL_EPSILON) { printf("[%s,%d:%f ", p, e, d); printf("%f] ", d-dd);}
+		        d = _fprazor32(d, ed, elog2[e]);    	  //if(fabs(d-dd) > DBL_EPSILON) { printf("[%s,%d:%f ", p, e, d); printf("%f] ", d-dd);}
 			  }
 		    }
            uint32_t u;
@ -515,7 +515,7 @@ unsigned befgen(unsigned char **_in, unsigned n, int fmt, int isize, FILE *fi, i
 		      int e = t - q - 1;   				
 			  if(e >= 0) {
 		        double ed = errlima[e],dd=d;             //printf("[%f ", d);
-		        d = _fppad64(d, ed, elog2[e]);    	//printf("%f] ", d-dd);
+		        d = _fprazor64(d, ed, elog2[e]);    	//printf("%f] ", d-dd);
 			  }
 		    }
            uint64_t u;
@ -606,7 +606,7 @@ void libmemcpy(unsigned char *dst, unsigned char *src, int len) {
 }

 //---------------------------------------- IcApp: Benchmark --------------------------------------------------------------------------
-
+#include  "ext/gb.c"
 //----------- external libraries --------------
  #ifdef _STREAMVBYTE
 #include "streamvbyte/include/streamvbyte.h"
@ -1035,11 +1035,11 @@ unsigned char *bestr(unsigned id, unsigned b, unsigned char *s, char *prms, int
    "%3d:meshoptimizer    3D lz%s,%d          ",	
    "%3d:blosc            shuffle+%s,%d       ",   	
    "%3d:blosc            shuffle delta+%s,%d ",   	
-    "%3d:blosc            shuffle trunc_prec  ",   	
-
-    "%3d:fpadd            speed test          ", //150
-    "%3d:libdroundfast    speed test          ",
-    "%3d:bitgrooming      speed test          ",
+    "%3d:fprazor          Turbo razor         ", 
+	
+    "%3d:gb               granular bitgroom   ",  //150
+    "%3d:bitgrooming      bit grooming        ",
+    "%3d:libdroundfast    float fast round    ",
    "%3d:tpzenc           transp+zzag integrat",   	
    "%3d:tpz0enc          transpose+zigzag    ",   	
    "%3d:tpxenc           tranp+xor integrated",   	
@ -1080,7 +1080,7 @@ void fpstat(unsigned char *in, size_t n, unsigned char *out, int s) {
                     eamin = DBL_MAX, eamax = DBL_MIN, easum = 0, easumsqr = 0, //absolute error                    : abs(input-output)
                     ermin = DBL_MAX, ermax = DBL_MIN, ersum = 0, ersumsqr = 0, //relative error                    : abs(input-output)/abs(input)
                     osum  = 0;                                                 //transformed lossy data (output)   : sum
-  unsigned long long xtb = 0, xlb = 0, zlb = 0, tb = 0, lb = 0, elb=0, mtb=0;
+  long long          xtb = 0, xlb = 0, zlb = 0, tb = 0, lb = 0, elb=0, mtb=0, itb = 0;
  size_t             idn = 0;
  unsigned char      *ip, *op;
  unsigned           esize = s<0?-s:s,t=0, dup=0,zero=0;
@ -1108,8 +1108,8 @@ void fpstat(unsigned char *in, size_t n, unsigned char *out, int s) {
  #define EXPO64(u) ((u>>52 & 0x7ff) - 0x3fe)
  #define MANT32(u) (u & 0x807fffffu)
  #define MANT64(u) (u & 0x800fffffffffffffull)
-  #define U(s) T3(uint, s, _t) u = T2(ctou,s)(op);\
-                                        tb +=      u?T2(ctz,s)(u):s;       lb += u?T2(clz,s)(u):s;          AC(t<=s,"Fatal t=%d ", t); \
+  #define U(s) T3(uint, s, _t) u = T2(ctou,s)(op), v = T2(ctou,s)(ip);\
+    itb +=  v?T2(ctz,s)(v):s;           tb +=      u?T2(ctz,s)(u):s;       lb += u?T2(clz,s)(u):s;          AC(t<=s,"Fatal t=%d ", t); \
    xstart ^= u;                       xtb += xstart?T2(ctz,s)(xstart):s; xlb += xstart?T2(clz,s)(xstart):0; xstart = u;\
    zstart  = T2(zigzagenc,s)(u - zstart);                       zlb += zstart?T2(clz,s)(zstart):s; zstart = u

@ -1144,16 +1144,18 @@ void fpstat(unsigned char *in, size_t n, unsigned char *out, int s) {
  else if(s == -8) fb = (double)elb*100/((double)n*11);

  double mse = easumsqr/n, irange = imax - imin;
-  printf("\n");
+  if(verbose > 1) printf("\n");
  //printf("Leading/Trailing bits [%.2f%%,%.2f%%=%.2f%%]. XOR[%.2f%%,%.2f%%=%.2f%%] Zigzag[%.2f%%]\n", BR(lb), BR(tb), BR(lb+tb), BR(xlb), BR(xtb), BR(xlb+xtb), BR(zlb)/*BR(elb), BR(mtb), BR(elb+mtb)*/ );
-  printf("Range: [min=%g / max=%g] = %g. zeros=%.2f%%, Duplicate=%.2f%%\n", imin, imax, irange, (double)zero*100.0/(double)(n/esize), (double)dup*100.0/(double)(n/esize));
+  if(verbose > 1) printf("Range: [min=%g / max=%g] = %g. zeros=%.2f%%, Duplicate=%.2f%%\n", imin, imax, irange, (double)zero*100.0/(double)(n/esize), (double)dup*100.0/(double)(n/esize));
  //printf("Min error: Absolute = %.12f, Relative = %f, pointwise relative = %f\n", eamin, eamin/irange, eamax/irange, ermax);
  //printf("Avg error: Absolute = %.12f, Relative = %f, pointwise relative = %f\n", easum/idn, (easum/idn)/irange, ersum/idn);
-  printf("Max error: Absolute = %g, Relative = %g, pointwise relative = %g\n", eamax, eamax/irange, ermax);
-  printf("Peak Signal-to-Noise Ratio: PSNR         = %f\n", 20*log10(irange)-10*log10(mse));
-  printf("Normalized Root Mean Square Error: NRMSE = %g\n", sqrt(mse)/irange);
+  if(verbose > 1) printf("Max error: Absolute = %g, Relative = %g, pointwise relative = %g\n", eamax, eamax/irange, ermax); else printf("e=%g ", ermax);
+  double psnr=20*log10(irange)-10*log10(mse); 
+  if(verbose > 1) printf("Peak Signal-to-Noise Ratio: PSNR         = %f\n", psnr);            else printf("PSNR=%g ", psnr);
+  if(verbose > 1) printf("Normalized Root Mean Square Error: NRMSE = %g\n", sqrt(mse)/irange);else printf("NRMSE=%g ", sqrt(mse)/irange);
  double std1 = sqrt(isumpavg/n), std2 = sqrt(osumpavg/n), ee = iosumpavg/n, acEff = (iosumpavg/n)/sqrt(isumpavg/n)/sqrt(osumpavg/n);
-  printf("Pearson Correlation Coefficient          = %f\n",    (iosumpavg/n)/sqrt(isumpavg/n)/sqrt(osumpavg/n));
+  if(verbose > 1) printf("Pearson Correlation Coefficient          = %f\n",    (iosumpavg/n)/sqrt(isumpavg/n)/sqrt(osumpavg/n));
+  if(verbose == 1) printf("ctz=%.2f%% ", (double)((tb-itb)/8)* 100.0/ (double)(n*esize));
 }

 //---------------------------------------------------------------------------------------
@ -1750,18 +1752,15 @@ unsigned bench32(unsigned char *in, unsigned n, unsigned char *out, unsigned cha
    //case 149: if(zerrlim>DBL_EPSILON) truncate_precision(6, 4, n, in, out);
 	//  codlev = codid==ICC_ZSTD?((codlev+1)/2):codlev; TMBENCH("",l = blosccomp(in, n, out, ns, codid==ICC_LZ4?(codlev>9?BLOSC_LZ4HC:BLOSC_LZ4):BLOSC_ZSTD, codlev, 4, BLOSC_TRUNC_PREC, BLOSC_SHUFFLE, BLOSC_DELTA),n);    pr(l,n); TMBENCH2("148", bloscdecomp(out, l, cpy, n),n); break;
 	  #endif
-	// ----- speed test & lossy error bound analysis (with option -v1) -----------------------
-    case 150: if(verbose) { fppad32(  in, m, out,0.001); fpstat(in, m, out, -4); }
-	                 TMBENCH("", fppad32(  in, m, out,0.001),n); memcpy(cpy,in,n); l=n; pr(l,n); TMBENCH2("150", fppad32(in, m, out,0.001),n); break;
-	  #ifdef _LIBROUNDFAST
-    case 151: if(verbose) { fround32( in, m, out, 3);    fpstat(in, m, out, -4); } // digirounding algo
-	                 TMBENCH("", fround32( in, m, out, 3),n);    memcpy(cpy,in,n); l=n; pr(l,n); TMBENCH2("151", fround32(in, m, out,3),n); break;
-	  #endif
+	// ----- speed test & lossy error bound analysis (with option -v1) -----------------------	
+    case 149:            TMBENCH("", fprazor32(  in, m, out,zerrlim),n);                                          l=n; pr(l,n); memcpy(cpy,in,n); if(verbose) fpstat(in, m, out, -4); break;
 	  #ifdef _BITGROOMING	
-    case 152: if(verbose) {          BG_compress_args(BG_FLOAT, in, NULL, BITGROOM, BG_NSD, 3/*nsd*/, 3/*dsd*/, m, out);  fpstat(in, m, out, -4);  // bigrooming algo
-	                 TMBENCH("",     BG_compress_args(BG_FLOAT, in, NULL, BITGROOM, BG_NSD, 3/*nsd*/, 3/*dsd*/, m, out),n); memcpy(cpy,in,n); l=n; pr(l,n);
-	                 TMBENCH2("152", BG_compress_args(BG_FLOAT, in, NULL, BITGROOM, BG_NSD, 3/*nsd*/, 3/*dsd*/, m, out),n); } break;
+    case 150: ptr_unn p; TMBENCH("", memcpy(out,in,n); ccr_gbr(nsd, NC_FLOAT, m, 0, p, out),n);                   l=n; pr(l,n); memcpy(cpy,in,n); if(verbose) fpstat(in, m, out, -4); break;
+    case 151:            TMBENCH("", BG_compress_args(BG_FLOAT, in, NULL, BITGROOM, BG_NSD, nsd, nsd, m, out),n); l=n; pr(l,n); memcpy(cpy,in,n); if(verbose) fpstat(in, m, out, -4); break;
 	  #endif
+	  #ifdef _LIBROUNDFAST // digirounding algo
+    case 152:            TMBENCH("", fround32( in, m, out, nsd),n);                                               l=n; pr(l,n); memcpy(cpy,in,n); if(verbose) fpstat(in, m, out, -4); break;
+	  #endif
 	// ----- speed test transpose integrated -----------------------
    case 153: l = n; TMBENCH("", tpzenc(  in, n, out, USIZE),n);             pr(l,n); TMBENCH2("153", tpzdec(  out, n,cpy, USIZE),n); break;
    case 154: l = n; TMBENCH("", tpz0enc( in, n, out, USIZE, tmp),n);        pr(l,n); TMBENCH2("154", tpz0dec( out, n,cpy, USIZE),n); break;
@ -1925,18 +1924,15 @@ unsigned bench64(unsigned char *in, unsigned n, unsigned char *out, unsigned cha
 	  if(zerrlim > DBL_EPSILON) { if(verbose) fpstat(in, m, cpy, -8); memcpy(cpy,in,n); } //lossy compression irreversible
 	} break;
 	  #endif
-	// ----- speed test -----------------------
-    case 150: if(verbose) { fppad64(  in, m, out,0.001); fpstat(in, m, out, -8); }
-	                 TMBENCH("", fppad64(  in, m, out,0.001),n); memcpy(cpy,in,n); l=n; pr(l,n); TMBENCH2("141", fppad64(in, m, out,0.001),n);  break;
-	  #ifdef _LIBROUNDFAST
-    case 151: if(verbose) { fround64( in, m, out, 3);    fpstat(in, m, out, -8); } // digirounding algo
-	                 TMBENCH("", fround64( in, m, out, 3),n);    memcpy(cpy,in,n); l=n; pr(l,n); TMBENCH2("142", fround64(in, m, out,3),n); break;
+	// ----- speed test & lossy error bound analysis (with option -v1) -----------------------	
+    case 149:            TMBENCH("", fprazor64(  in, m, out,zerrlim),n);                                           l=n; pr(l,n); memcpy(cpy,in,n); if(verbose) fpstat(in, m, out, -8); break;
+	  #ifdef _BITGROOMING	
+    case 150: ptr_unn p; TMBENCH("", memcpy(out,in,n); ccr_gbr(nsd, NC_DOUBLE, m, 0, p, out),n);                   l=n; pr(l,n); memcpy(cpy,in,n); if(verbose) fpstat(in, m, out, -8); break;
+    case 151:            TMBENCH("", BG_compress_args(BG_DOUBLE, in, NULL, BITGROOM, BG_NSD, nsd, nsd, m, out),n); l=n; pr(l,n); memcpy(cpy,in,n); if(verbose) fpstat(in, m, out, -8); break;
+ 	  #endif 
+	  #ifdef _LIBROUNDFAST // digirounding algo
+    case 152:            TMBENCH("", fround64( in, m, out, nsd),n);                                                l=n; pr(l,n); memcpy(cpy,in,n); if(verbose) fpstat(in, m, out, -8); break;
 	  #endif
-	  #ifdef _BITGROOMING
-    case 152: if(verbose) {          BG_compress_args(BG_DOUBLE, in, NULL, BITGROOM, BG_NSD, 3/*nsd*/, 3/*dsd*/, m, out);  fpstat(in, m, out, -8);  // bigrooming algo
-	                 TMBENCH("",     BG_compress_args(BG_DOUBLE, in, NULL, BITGROOM, BG_NSD, 3/*nsd*/, 3/*dsd*/, m, out),n); memcpy(cpy,in,n); l=n; pr(l,n);
-	                 TMBENCH2("143", BG_compress_args(BG_DOUBLE, in, NULL, BITGROOM, BG_NSD, 3/*nsd*/, 3/*dsd*/, m, out),n); } break;
- 	  #endif
    case 153: l = n; TMBENCH("", tpzenc(  in, n, out, USIZE),n);             pr(l,n); TMBENCH2("153", tpzdec(  out, n,cpy, USIZE),n); break;
    case 154: l = n; TMBENCH("", tpz0enc( in, n, out, USIZE, tmp),n);        pr(l,n); TMBENCH2("154", tpz0dec( out, n,cpy, USIZE),n); break;
    case 155: l = n; TMBENCH("", tpxenc(  in, n, out, USIZE),n);             pr(l,n); TMBENCH2("155", tpxdec(  out, n,cpy, USIZE),n); break;
@ -2012,9 +2008,15 @@ void usage(char *pgm) {
  exit(0);
 }

-int main(int argc, char* argv[]) {
+testrazor() {   
+  double d = M_PI; uint64_t u = ctou64(&d); printf("pi=%e - ctz=%u\n", M_PI, ctz64(u)); 
+  for(int i = 0; i < 8; i++) { double e = errlima[i], d; int lg2e = -log(e)/log(2.0);  d = _fprazor64(M_PI, e, lg2e); uint64_t u = ctou64(&d); printf("%e:%e - %.8f ctz=%u\n", e, d, M_PI - d, u?ctz64(u):64 ); } 
+  exit(0);
+}
+
+int main(int argc, char* argv[]) { //testrazor(); 
  unsigned      b = 1 << 30, lz=0, fno,m=1000000, bsize = (unsigned)-1;
-  int           isize=4,dfmt = 0,kid=1,skiph=0,decs=0,divs=1,nsd=-1,dim0=0;
+  int           isize=4,dfmt = 0,kid=1,skiph=0,decs=0,divs=1,dim0=0;
  uint64_t      be_mindelta=0;
  unsigned char *in = NULL, *out, *cpy, *scmd = NULL, *icmd = NULL;
  double        mdelta=-10, errlim=-1.0;
@ -2174,19 +2176,19 @@ int main(int argc, char* argv[]) {
    //if(fno == optind)
    tm_init(tm_Rep, tm_verbose /* 2 print id */);

-    if(errlim > 0.0 || nsd >= 0) {  // convert input for lossy floating point compression
+    if(errlim > 0.0 /*|| nsd >= 0*/) {  // convert input for lossy floating point compression
      if(errlim < 0.0000009999) errlim = 0.000001;
      if(isize == -4) {
-                             fppad32(in,n/4,out,errlim); if(verbose>0) fpstat(in, n/4, out, -4); //if(nsd >= 0) fprnd32(in,n/4,out,nsd); else //
-                             fppad32(in,n/4, in,errlim);                                         /*if(nsd >= 0) fprnd32(in,n/4,in, nsd); else */
+                             fprazor32(in,n/4,out,errlim); if(verbose>0) fpstat(in, n/4, out, -4); //if(nsd >= 0) fprnd32(in,n/4,out,nsd); else //
+                             fprazor32(in,n/4, in,errlim);                                         /*if(nsd >= 0) fprnd32(in,n/4,in, nsd); else */
      } else if(isize == -8) {
-          fppad64(in,n/8,out,errlim); if(verbose>0) fpstat(in, n/8, out, -8);                    /*if(nsd>=0) fprnd64(in,n/8,out,nsd); else*/
-          fppad64(in,n/8, in,errlim);                                                            /*if(nsd>=0) fprnd64(in,n/8, in,nsd); else*/
+          fprazor64(in,n/8,out,errlim); if(verbose>0) fpstat(in, n/8, out, -8);                    /*if(nsd>=0) fprnd64(in,n/8,out,nsd); else*/
+          fprazor64(in,n/8, in,errlim);                                                            /*if(nsd>=0) fprnd64(in,n/8, in,nsd); else*/
      }
        #ifdef USE_FLOAT16
      else if(isize == -2) {
-          fppad16(in,n/2,out,errlim); if(verbose>0) fpstat(in, n/2, out, -2);                    /*if(nsd>=0) fprnd64(in,n/8,out,nsd); else*/
-          fppad16(in,n/2, in,errlim);                                                            /*if(nsd>=0) fprnd64(in,n/8, in,nsd); else*/
+          fprazor16(in,n/2,out,errlim); if(verbose>0) fpstat(in, n/2, out, -2);                    /*if(nsd>=0) fprnd64(in,n/8,out,nsd); else*/
+          fprazor16(in,n/2, in,errlim);                                                            /*if(nsd>=0) fprnd64(in,n/8, in,nsd); else*/
      }
        #endif
    }