/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define C_LUCY_BITVECTOR #include "Lucy/Util/ToolSet.h" #include "Lucy/Object/BitVector.h" #include "Lucy/Util/NumberUtils.h" // Shared subroutine for performing both OR and XOR ops. #define DO_OR 1 #define DO_XOR 2 static void S_do_or_or_xor(BitVector *self, const BitVector *other, int operation); // Number of 1 bits given a u8 value. static const uint32_t BYTE_COUNTS[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; static CFISH_INLINE size_t SI_octet_size(size_t bit_size) { return (bit_size + 7) / 8; } BitVector* BitVec_new(size_t capacity) { BitVector *self = (BitVector*)Class_Make_Obj(BITVECTOR); return BitVec_init(self, capacity); } BitVector* BitVec_init(BitVector *self, size_t capacity) { BitVectorIVARS *const ivars = BitVec_IVARS(self); if (capacity > SIZE_MAX - 7) { THROW(ERR, "BitVector capacity too large"); } const size_t byte_size = SI_octet_size(capacity); // Derive. ivars->bits = capacity ? (uint8_t*)CALLOCATE(byte_size, sizeof(uint8_t)) : NULL; // Assign. ivars->cap = byte_size * 8; return self; } void BitVec_Destroy_IMP(BitVector* self) { BitVectorIVARS *const ivars = BitVec_IVARS(self); FREEMEM(ivars->bits); SUPER_DESTROY(self, BITVECTOR); } BitVector* BitVec_Clone_IMP(BitVector *self) { BitVectorIVARS *const ivars = BitVec_IVARS(self); BitVector *other = BitVec_new(ivars->cap); size_t byte_size = SI_octet_size(ivars->cap); BitVectorIVARS *const ovars = BitVec_IVARS(other); // Forbid inheritance. if (BitVec_get_class(self) != BITVECTOR) { THROW(ERR, "Attempt by %o to inherit BitVec_Clone", BitVec_get_class_name(self)); } memcpy(ovars->bits, ivars->bits, byte_size * sizeof(uint8_t)); return other; } uint8_t* BitVec_Get_Raw_Bits_IMP(BitVector *self) { return BitVec_IVARS(self)->bits; } size_t BitVec_Get_Capacity_IMP(BitVector *self) { return BitVec_IVARS(self)->cap; } void BitVec_Mimic_IMP(BitVector *self, Obj *other) { CERTIFY(other, BITVECTOR); BitVectorIVARS *const ivars = BitVec_IVARS(self); BitVectorIVARS *const ovars = BitVec_IVARS((BitVector*)other); const size_t my_byte_size = SI_octet_size(ivars->cap); const size_t other_byte_size = SI_octet_size(ovars->cap); if (my_byte_size > other_byte_size) { size_t space = my_byte_size - other_byte_size; memset(ivars->bits + other_byte_size, 0, space); } else if (my_byte_size < other_byte_size) { BitVec_Grow(self, ovars->cap - 1); } memcpy(ivars->bits, ovars->bits, other_byte_size); } void BitVec_Grow_IMP(BitVector *self, size_t capacity) { BitVectorIVARS *const ivars = BitVec_IVARS(self); if (capacity > ivars->cap) { if (capacity > SIZE_MAX - 7) { THROW(ERR, "BitVector capacity overflow"); } const size_t old_byte_cap = SI_octet_size(ivars->cap); const size_t new_byte_cap = SI_octet_size(capacity); const size_t num_new_bytes = new_byte_cap - old_byte_cap; ivars->bits = (uint8_t*)REALLOCATE(ivars->bits, new_byte_cap); memset(ivars->bits + old_byte_cap, 0, num_new_bytes); ivars->cap = new_byte_cap * 8; } } void BitVec_Set_IMP(BitVector *self, size_t tick) { BitVectorIVARS *const ivars = BitVec_IVARS(self); if (tick >= ivars->cap) { size_t new_cap = (size_t)Memory_oversize(tick + 1, 0); BitVec_Grow(self, new_cap); } NumUtil_u1set(ivars->bits, tick); } void BitVec_Clear_IMP(BitVector *self, size_t tick) { BitVectorIVARS *const ivars = BitVec_IVARS(self); if (tick >= ivars->cap) { return; } NumUtil_u1clear(ivars->bits, tick); } void BitVec_Clear_All_IMP(BitVector *self) { BitVectorIVARS *const ivars = BitVec_IVARS(self); const size_t byte_size = SI_octet_size(ivars->cap); memset(ivars->bits, 0, byte_size); } bool BitVec_Get_IMP(BitVector *self, size_t tick) { BitVectorIVARS *const ivars = BitVec_IVARS(self); if (tick >= ivars->cap) { return false; } return NumUtil_u1get(ivars->bits, tick); } static int32_t S_first_bit_in_nonzero_byte(uint8_t num) { int32_t first_bit = 0; if ((num & 0xF) == 0) { first_bit += 4; num >>= 4; } if ((num & 0x3) == 0) { first_bit += 2; num >>= 2; } if ((num & 0x1) == 0) { first_bit += 1; } return first_bit; } int32_t BitVec_Next_Hit_IMP(BitVector *self, size_t tick) { BitVectorIVARS *const ivars = BitVec_IVARS(self); if (ivars->cap > INT32_MAX) { THROW(ERR, "Capacity too large for Next_Hit: %u64", (uint64_t)ivars->cap); } if (tick >= ivars->cap) { return -1; } size_t byte_size = SI_octet_size(ivars->cap); uint8_t *const limit = ivars->bits + byte_size; uint8_t *ptr = ivars->bits + (tick >> 3); if (*ptr != 0) { // Special case the first byte. size_t min_sub_tick = tick & 0x7; uint8_t byte = (uint8_t)(*ptr >> min_sub_tick); if (byte) { return (int32_t)tick + S_first_bit_in_nonzero_byte(byte); } } for (ptr++ ; ptr < limit; ptr++) { if (*ptr != 0) { // There's a non-zero bit in this byte. int32_t base = (int32_t)((ptr - ivars->bits) * 8); return base + S_first_bit_in_nonzero_byte(*ptr); } } return -1; } void BitVec_And_IMP(BitVector *self, const BitVector *other) { BitVectorIVARS *const ivars = BitVec_IVARS(self); const BitVectorIVARS *const ovars = BitVec_IVARS((BitVector*)other); uint8_t *bits_a = ivars->bits; uint8_t *bits_b = ovars->bits; const size_t min_cap = ivars->cap < ovars->cap ? ivars->cap : ovars->cap; const size_t byte_size = SI_octet_size(min_cap); uint8_t *const limit = bits_a + byte_size; // Intersection. while (bits_a < limit) { *bits_a &= *bits_b; bits_a++, bits_b++; } // Set all remaining to zero. if (ivars->cap > min_cap) { const size_t self_byte_size = SI_octet_size(ivars->cap); memset(bits_a, 0, self_byte_size - byte_size); } } void BitVec_Or_IMP(BitVector *self, const BitVector *other) { S_do_or_or_xor(self, other, DO_OR); } void BitVec_Xor_IMP(BitVector *self, const BitVector *other) { S_do_or_or_xor(self, other, DO_XOR); } static void S_do_or_or_xor(BitVector *self, const BitVector *other, int operation) { BitVectorIVARS *const ivars = BitVec_IVARS(self); const BitVectorIVARS *const ovars = BitVec_IVARS((BitVector*)other); uint8_t *bits_a, *bits_b; size_t max_cap, min_cap; uint8_t *limit; size_t byte_size; // Sort out what the minimum and maximum caps are. if (ivars->cap < ovars->cap) { max_cap = ovars->cap; min_cap = ivars->cap; } else { max_cap = ivars->cap; min_cap = ovars->cap; } // Grow self if smaller than other, then calc pointers. if (max_cap > ivars->cap) { BitVec_Grow(self, max_cap); } bits_a = ivars->bits; bits_b = ovars->bits; byte_size = SI_octet_size(min_cap); limit = ivars->bits + byte_size; // Perform union of common bits. if (operation == DO_OR) { while (bits_a < limit) { *bits_a |= *bits_b; bits_a++, bits_b++; } } else if (operation == DO_XOR) { while (bits_a < limit) { *bits_a ^= *bits_b; bits_a++, bits_b++; } } else { THROW(ERR, "Unrecognized operation: %i32", (int32_t)operation); } // Copy remaining bits if other is bigger than self. if (ovars->cap > min_cap) { const size_t other_byte_size = SI_octet_size(ovars->cap); const size_t bytes_to_copy = other_byte_size - byte_size; memcpy(bits_a, bits_b, bytes_to_copy); } } void BitVec_And_Not_IMP(BitVector *self, const BitVector *other) { BitVectorIVARS *const ivars = BitVec_IVARS(self); const BitVectorIVARS *const ovars = BitVec_IVARS((BitVector*)other); uint8_t *bits_a = ivars->bits; uint8_t *bits_b = ovars->bits; const size_t min_cap = ivars->cap < ovars->cap ? ivars->cap : ovars->cap; const size_t byte_size = SI_octet_size(min_cap); uint8_t *const limit = bits_a + byte_size; // Clear bits set in other. while (bits_a < limit) { *bits_a &= ~(*bits_b); bits_a++, bits_b++; } } void BitVec_Flip_IMP(BitVector *self, size_t tick) { BitVectorIVARS *const ivars = BitVec_IVARS(self); if (tick >= ivars->cap) { size_t new_cap = Memory_oversize(tick + 1, 0); BitVec_Grow(self, new_cap); } NumUtil_u1flip(ivars->bits, tick); } void BitVec_Flip_Block_IMP(BitVector *self, size_t offset, size_t length) { BitVectorIVARS *const ivars = BitVec_IVARS(self); size_t first = offset; size_t last = offset + length - 1; // Bail if there's nothing to flip. if (!length) { return; } if (last >= ivars->cap) { BitVec_Grow(self, last + 1); } // Flip partial bytes. while (last % 8 != 0 && last > first) { NumUtil_u1flip(ivars->bits, last); last--; } while (first % 8 != 0 && first < last) { NumUtil_u1flip(ivars->bits, first); first++; } // Are first and last equal? if (first == last) { // There's only one bit left to flip. NumUtil_u1flip(ivars->bits, last); } // They must be multiples of 8, then. else { const size_t start_tick = first >> 3; const size_t limit_tick = last >> 3; uint8_t *bits = ivars->bits + start_tick; uint8_t *limit = ivars->bits + limit_tick; // Last actually belongs to the following byte (e.g. 8, in byte 2). NumUtil_u1flip(ivars->bits, last); // Flip whole bytes. for (; bits < limit; bits++) { *bits = ~(*bits); } } } size_t BitVec_Count_IMP(BitVector *self) { BitVectorIVARS *const ivars = BitVec_IVARS(self); size_t count = 0; const size_t byte_size = SI_octet_size(ivars->cap); uint8_t *ptr = ivars->bits; uint8_t *const limit = ptr + byte_size; for (; ptr < limit; ptr++) { count += BYTE_COUNTS[*ptr]; } return count; } I32Array* BitVec_To_Array_IMP(BitVector *self) { BitVectorIVARS *const ivars = BitVec_IVARS(self); size_t count = BitVec_Count(self); size_t num_left = count; const size_t capacity = ivars->cap; uint32_t *const array = (uint32_t*)CALLOCATE(count, sizeof(uint32_t)); const size_t byte_size = SI_octet_size(ivars->cap); uint8_t *const bits = ivars->bits; uint8_t *const limit = bits + byte_size; uint32_t num = 0; uint32_t i = 0; while (num_left) { uint8_t *ptr = bits + (num >> 3); while (ptr < limit && *ptr == 0) { num += 8; ptr++; } do { if (BitVec_Get(self, num)) { array[i++] = num; if (--num_left == 0) { break; } } if (num >= capacity) { THROW(ERR, "Exceeded capacity: %u32 %u32", num, capacity); } } while (++num % 8); } return I32Arr_new_steal((int32_t*)array, count); }