ATLAS Offline Software
crc64.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
3 */
4 /*
5  */
195 #include "CxxUtils/crc64.h"
197 #include <stdio.h>
198 
199 
200 #if ATH_CRC64_VEC
201 
203 // Two 64-bit integers.
204 typedef long long int v2di __attribute__ ((vector_size (16)));
205 
206 
207 // Two unsigned 64-bit integers.
208 typedef uint64_t v2du __attribute__ ((vector_size (16)));
209 
210 
211 // 16 signed characters.
212 typedef char v16qi __attribute__ ((vector_size (16)));
213 
214 
215 #endif
216 
217 
218 namespace {
219 
220 
221 //***************************************************************************
222 // Primitive functions and utilities.
223 //
224 
225 
226 #if ATH_CRC64_VEC
227 
232 inline
233 v2di load_unaligned (const char* x)
234 {
235  return (v2di)__builtin_ia32_loaddqu (x);
236 }
237 
238 
244 inline
245 v2di load_aligned (const char* x)
246 {
247  return *(v2di*)x;
248 }
249 
250 
256 // A macro, not a function, because the second argument is constrained
257 // to be an 8-bit constant. If this were a function, compilation might fail
258 // if it is not inlined.
259 #define byteshift_l(X, N) (__builtin_ia32_pslldqi128 ((X), (N)*8))
260 
261 
267 // A macro, not a function; see above.
268 #define byteshift_r(X, N) (__builtin_ia32_psrldqi128 ((X), (N)*8))
269 
270 
281 // A macro, not a function; see above.
282 #define clmul(A, B, WHICH) (__builtin_ia32_pclmulqdq128 ((A), (B), (WHICH)))
283 
284 
297 __attribute__ ((target ("sse4")))
298 inline
299 void byteshift_l256 (v2di in, size_t n, v2di& outHigh, v2di& outLow)
300 {
301  static const uint8_t shuffleMasks[] = {
302  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
303  0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80,
304  };
305 
306  const v16qi mask = (v16qi)load_unaligned ((const char*)shuffleMasks + (16-n));
307  outLow = (v2di)__builtin_ia32_pshufb128 ((v16qi)in, ~mask);
308  outHigh = (v2di)__builtin_ia32_pshufb128 ((v16qi)in, mask);
309 }
310 
311 
319 inline
320 uint64_t hightest (uint64_t x, uint64_t y)
321 {
322  // Relies on sign-extension of right-shift of a signed int.
323  // This is strictly speaking implementation-defined behavior.
324  // Since this code is anyway enabled only on x86_64, that's ok.
325  // cppcheck-suppress shiftTooManyBitsSigned
326  return y & (static_cast<int64_t>(x)>>63);
327 }
328 
329 
339 uint64_t exp_mod (unsigned exp, uint64_t p)
340 {
341  // This is basically just doing binary long division without carry
342  // (so subtraction becomes xor).
343  uint64_t d = p;
344  for (unsigned i=0; i < exp-64; i++) {
345  d = (d<<1) ^ hightest (d, p);
346  }
347  return d;
348 }
349 
350 
359 uint64_t exp129_div (uint64_t p)
360 {
361  // Again, just binary long division without carry.
362  uint64_t q = 0;
363  uint64_t h = p;
364  for (unsigned i=0; i < 64; i++) {
365  q |= (h & (1ull << 63)) >> i;
366  h = (h << 1) ^ hightest (h, p);
367  }
368  return q;
369 }
370 
371 
372 #endif // ATH_CRC64_VEC
373 
374 
375 /*
376  * @brief Reflect the bits in a 64-bit word around the center.
377  * @param v Value to reflect.
378  *
379  * So, for example, 0x0120034005600780 becomes
380  * 0x01e006a002c00480
381  *
382  * Reference: https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
383  * Originally credited to E. Freed, Dr. Dobbs's Journal 8 no. 4, p. 24 (1983).
384  */
385 uint64_t bit_reflect (uint64_t v)
386 {
387  v = ((v >> 1) & 0x5555555555555555) | ((v & 0x5555555555555555) << 1);
388  v = ((v >> 2) & 0x3333333333333333) | ((v & 0x3333333333333333) << 2);
389  v = ((v >> 4) & 0x0F0F0F0F0F0F0F0F) | ((v & 0x0F0F0F0F0F0F0F0F) << 4);
390  v = ((v >> 8) & 0x00FF00FF00FF00FF) | ((v & 0x00FF00FF00FF00FF) << 8);
391  v = ((v >> 16) & 0x0000FFFF0000FFFF) | ((v & 0x0000FFFF0000FFFF) << 16);
392  v = (v >> 32) | (v << 32);
393  return v;
394 }
395 
396 
397 //****************************************************************************
398 // CRC helpers.
399 //
400 
401 
402 #if ATH_CRC64_VEC
403 
411 __attribute__ ((target ("pclmul")))
412 inline
413 v2di folding_round (v2di fold, v2di data, v2di k)
414 {
415  return data
416  ^ clmul (fold, k, 0x00)
417  ^ clmul (fold, k, 0x11);
418 }
419 
420 
426 __attribute__ ((target ("pclmul")))
427 inline
428 v2di fold_trailing_zeros (v2di data, v2di k)
429 {
430  return clmul (data, k, 0x10) ^ byteshift_r (data, 8);
431 }
432 
433 
439 __attribute__ ((target ("pclmul")))
440 inline
441 v2di barrett_reduce (v2di R, v2di k)
442 {
443  v2di T1 = clmul (R, k, 0x00);
444  return R
445  ^ clmul (T1, k, 0x10)
446  ^ byteshift_l (T1, 8);
447 }
448 #endif
449 
450 
451 } // anonymous namespace
452 
453 
454 namespace CxxUtils {
455 
456 
457 //***************************************************************************
458 // Public entry points.
459 //
460 
464 class CRCTable
465 {
466 public:
472  CRCTable (uint64_t p, uint64_t initial = 0xffffffffffffffff);
473 
476 
479 
480 #if ATH_CRC64_VEC
481  v2di m_fold_constants;
483 
485  v2di m_barrett_constants;
486 #endif
487 };
488 
489 
495 CRCTable::CRCTable (uint64_t p, uint64_t initial /* = 0xffffffffffffffff*/)
496 {
497  m_initial = initial;
498 
499  uint64_t prev = bit_reflect (p);
500  for (int i = 0; i < 256; i++)
501  {
502  uint64_t r = i;
503  for (int j = 0; j < 8; j++)
504  {
505  if (r & 1)
506  r = (r >> 1) ^ prev;
507  else
508  r >>= 1;
509  }
510  m_table[i] = r;
511  }
512 
513 #if ATH_CRC64_VEC
514  const uint64_t k1 = bit_reflect (exp_mod (128+64, p)) << 1;
515  const uint64_t k2 = bit_reflect (exp_mod (128, p)) << 1;
516  const uint64_t mu = (bit_reflect (exp129_div (p)) << 1) | 1;
517  const uint64_t prev65 = (bit_reflect (p) << 1) | 1;
518  v2du a = {k1, k2};
519  m_fold_constants = reinterpret_cast<v2di>(a);
520  v2du b = { mu, prev65 };
521  m_barrett_constants = reinterpret_cast<v2di>(b);
522 #endif
523 }
524 
525 
526 // Polynomial taken from code from David T. Jones (dtj@cs.ucl.ac.uk).
527 // (The form given here is reversed.)
528 // http://www0.cs.ucl.ac.uk/staff/D.Jones/crcnote.pdf
529 const CRCTable defaultCRCTable (0xad93d23594c935a9);
530 
531 
536 {
537  delete table;
538 }
539 
540 
547 std::unique_ptr<CRCTable> makeCRCTable (uint64_t p,
548  uint64_t initial /*= 0xffffffffffffffff*/)
549 {
550  return std::make_unique<CRCTable> (p, initial);
551 }
552 
553 
561  const char* data,
562  size_t data_len)
563 {
564  uint64_t crc = table.m_initial;
565  const char* seq = data;
566  const char* end = seq + data_len;
567  while (seq < end)
568  crc = table.m_table[(crc ^ *seq++) & 0xff] ^ (crc >> 8);
569  return crc;
570 }
571 
572 
581  size_t data_len)
582 {
583  return crc64_bytewise (defaultCRCTable, data, data_len);
584 }
585 
586 
593 uint64_t crc64_bytewise (const std::string& s)
594 {
595  return crc64_bytewise (defaultCRCTable, s.data(), s.size());
596 }
597 
598 
599 #if ATH_CRC64_VEC
600 
606 __attribute__ ((target ("pclmul")))
607 uint64_t crc64 (const CRCTable& table,
608  const char* data,
609  size_t data_len)
610 {
611  uint64_t crc = table.m_initial;
612 
613  // Early exit if the string is null.
614  if (ATH_UNLIKELY(!data_len)) return crc;
615 
616  // The main body assumes that the data are aligned to 128 bits.
617  // This should almost always be the case. But just in case the input
618  // string is not, consume the initial unaligned portion byte-by-byte
619  // until it is.
620  if (ATH_UNLIKELY (reinterpret_cast<unsigned long>(data) & 15)) {
621  // Number of unaligned bytes we need to read from the start of the string.
622  size_t leadin = std::min (16 - (reinterpret_cast<unsigned long>(data) & 15), data_len);
623  crc = crc64_bytewise (table, data, leadin);
624  data += leadin;
625  data_len -= leadin;
626 
627  if (ATH_UNLIKELY(!data_len)) return crc;
628  }
629 
630  // Accumulator for CRC value.
631  v2di fold = {static_cast<int64_t>(crc), 0};
632 
633  // Constants for the folding step.
634  v2di k = table.m_fold_constants;
635 
636  if (ATH_UNLIKELY (data_len < 16)) {
637  // Special case for less than 128 bits.
638  v2di temp2 = load_aligned (data);
639  v2di crc0, crc1;
640  byteshift_l256 (fold, 16-data_len, crc1, crc0);
641  v2di A, B;
642  byteshift_l256 (temp2, 16-data_len, B, A);
643 
644  fold = A ^ crc0;
645  fold = fold_trailing_zeros (fold, k);
646  fold ^= byteshift_l (crc1, 8);
647  }
648  else {
649  // We have 128 bits or more.
650 
651  // Load the first 128 bits.
652  fold ^= load_aligned (data);
653 
654  // Main folding loop. Fold in 128 bits at a time until there
655  // are fewer than 128 left.
656  size_t n = 16;
657  for (; n+16 <= data_len; n += 16) {
658  v2di temp2 = load_aligned (data + n);
659  fold = folding_round (fold, temp2, k);
660  }
661 
662  // Handle a partial block at the end of less than 128 bits.
663  if (ATH_LIKELY (n < data_len)) {
664  v2di remainder = load_aligned (data + n);
665  // Number of remaining bytes.
666  size_t nrem = data_len - n;
667  v2di A, B, C, D;
668  byteshift_l256 (fold, 16-nrem, B, A);
669  byteshift_l256 (remainder, 16-nrem, D, C);
670  fold = folding_round (A, B|C, k);
671  }
672  fold = fold_trailing_zeros (fold, k);
673  }
674 
676  fold = barrett_reduce (fold, table.m_barrett_constants);
677 
678  return fold[1];
679 }
680 
681 
682 #endif // ATH_CRC64_VEC
683 
684 
693 #if ATH_CRC64_VEC
694 __attribute__ ((target ("default")))
695 #endif
697  const char* data,
698  size_t data_len)
699 {
700  return crc64_bytewise (table, data, data_len);
701 }
702 
703 
709 uint64_t crc64 (const char* data,
710  size_t data_len)
711 {
712  return crc64 (defaultCRCTable, data, data_len);
713 }
714 
715 
720 uint64_t crc64 (const std::string& s)
721 {
722  return crc64 (defaultCRCTable, s.data(), s.size());
723 }
724 
725 
733 {
734  while (x > 0) {
735  crc = defaultCRCTable.m_table[(crc ^ x) & 0xff] ^ (crc >> 8);
736  x >>= 8;
737  }
738  return crc;
739 }
740 
741 
746 std::string crc64format (uint64_t crc)
747 {
748  char buf[64];
749  sprintf (buf, "%08X%08X",
750  (unsigned)((crc>>32)&0xffffffff), (unsigned)(crc&0xffffffff));
751  return buf;
752 }
753 
754 
761 std::string crc64digest (const std::string& str)
762 {
763  return crc64format (crc64 (str));
764 }
765 
766 
767 } // namespace CxxUtils
768 
769 
test_athena_ntuple_filter.seq
seq
filter configuration ## -> we use the special sequence 'AthMasterSeq' which is run before any other a...
Definition: test_athena_ntuple_filter.py:18
beamspotman.r
def r
Definition: beamspotman.py:676
data
char data[hepevt_bytes_allocation_ATLAS]
Definition: HepEvt.cxx:11
python.SystemOfUnits.s
int s
Definition: SystemOfUnits.py:131
CxxUtils::crc64_bytewise
uint64_t crc64_bytewise(const CRCTable &table, const char *data, size_t data_len)
Find the CRC-64 of a string, using a byte-by-byte algorithm.
Definition: crc64.cxx:560
xAOD::uint8_t
uint8_t
Definition: Muon_v1.cxx:557
hist_file_dump.d
d
Definition: hist_file_dump.py:137
DMTest::C
C_v1 C
Definition: C.h:26
ATH_UNLIKELY
#define ATH_UNLIKELY(x)
Definition: AthUnlikelyMacros.h:17
CxxUtils::deleteCRCTable
void deleteCRCTable(CxxUtils::CRCTable *table)
Delete a CRCTable object.
Definition: crc64.cxx:535
const
bool const RAWDATA *ch2 const
Definition: LArRodBlockPhysicsV0.cxx:560
CxxUtils::CRCTable::m_table
uint64_t m_table[256]
Lookup table for bytewise CRC calculation.
Definition: crc64.cxx:478
drawFromPickle.exp
exp
Definition: drawFromPickle.py:36
x
#define x
CxxUtils::CRCTable::CRCTable
CRCTable(uint64_t p, uint64_t initial=0xffffffffffffffff)
Initialize the CRC tables and constants.
Definition: crc64.cxx:495
python.utils.AtlRunQueryLookup.mask
string mask
Definition: AtlRunQueryLookup.py:460
AthUnlikelyMacros.h
CxxUtils::CRCTable
Precomputed tables and constants for the CRC calculation.
Definition: crc64.cxx:465
A
python.utils.AtlRunQueryDQUtils.p
p
Definition: AtlRunQueryDQUtils.py:210
lumiFormat.i
int i
Definition: lumiFormat.py:85
beamspotman.n
n
Definition: beamspotman.py:731
CxxUtils
Definition: aligned_vector.h:29
CxxUtils::crc64addint
uint64_t crc64addint(uint64_t crc, uint64_t x)
Extend a previously-calculated CRC to include an int.
Definition: crc64.cxx:732
xAOD::uint64_t
uint64_t
Definition: EventInfo_v1.cxx:123
AnalysisUtils::Delta::R
double R(const INavigable4Momentum *p1, const double v_eta, const double v_phi)
Definition: AnalysisMisc.h:49
CxxUtils::crc64
uint64_t crc64(const CRCTable &table, const char *data, size_t data_len)
Find the CRC-64 of a string,.
Definition: crc64.cxx:696
min
#define min(a, b)
Definition: cfImp.cxx:40
CxxUtils::end
auto end(range_with_at< T > &s)
Definition: range_with_at.h:68
remainder
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
list of entries in a vector that are not in another
Definition: compareFlatTrees.cxx:44
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:77
python.ext.table_printer.table
list table
Definition: table_printer.py:81
CxxUtils::defaultCRCTable
const CRCTable defaultCRCTable(0xad93d23594c935a9)
dqt_zlumi_alleff_HIST.B
B
Definition: dqt_zlumi_alleff_HIST.py:110
CxxUtils::makeCRCTable
std::unique_ptr< CRCTable > makeCRCTable(uint64_t p, uint64_t initial=0xffffffffffffffff)
Initialize CRC tables and constants.
Definition: crc64.cxx:547
python.PyAthena.v
v
Definition: PyAthena.py:154
ATH_LIKELY
#define ATH_LIKELY(x)
Definition: AthUnlikelyMacros.h:16
__attribute__
__attribute__((always_inline)) inline uint16_t TileCalibDrawerBase
Definition: TileCalibDrawerBase.h:190
a
TList * a
Definition: liststreamerinfos.cxx:10
y
#define y
h
CxxUtils::crc64format
std::string crc64format(uint64_t crc)
Format a CRC-64 as a string.
Definition: crc64.cxx:746
copySelective.target
string target
Definition: copySelective.py:37
CxxUtils::CRCTable::m_initial
uint64_t m_initial
Initial CRC value.
Definition: crc64.cxx:475
extractSporadic.q
list q
Definition: extractSporadic.py:98
str
Definition: BTagTrackIpAccessor.cxx:11
crc64.h
A crc-64 implementation, using pclmul where possible.
CxxUtils::crc64digest
std::string crc64digest(const std::string &str)
Return a CRC-64 digest of a string.
Definition: crc64.cxx:761
CaloNoise_fillDB.mu
mu
Definition: CaloNoise_fillDB.py:53
fitman.k
k
Definition: fitman.py:528